#EFFICIENTB0 MODEL

In [2]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
import librosa.display
#!pip uninstall speechbrain
!pip install speechbrain
from google.colab import drive
drive.mount('/content/drive')
dataset_path = '/content/drive/MyDrive/dev-clean.tar.gz'
# extract the dataset
import tarfile
import os

# Directory to save extracted files and spectrograms
extracted_dir = '/content/extracted'
os.makedirs(extracted_dir, exist_ok=True)

# Extract the dataset
try:
    with tarfile.open(dataset_path, 'r:gz') as tar:
        tar.extractall(path=extracted_dir)
        print(f"Extraction completed successfully to {extracted_dir}")
except tarfile.ReadError:
    print("Error: Unable to read the tar file. It might be corrupted.")
except EOFError:
    print("Error: The file seems to be incomplete or corrupted.")


Mounted at /content/drive
Extraction completed successfully to /content/extracted


In [None]:
import tensorflow as tf


In [None]:
# Parameters
IMG_SIZE = (128, 128, 3)  # EfficientNet expects 3 channels (RGB)
BATCH_SIZE = 32
EPOCHS = 20
LEARNING_RATE = 1e-4

# Function to load audio file, add noise, and create a Mel spectrogram
def load_and_preprocess_audio(audio_path, sr=22050, n_mels=128, hop_length=512):
    waveform, _ = librosa.load(audio_path, sr=sr)
    mel_spectrogram = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=n_mels, hop_length=hop_length)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    # Normalize to range [0, 1] and convert to 3 channels
    mel_spectrogram_db = np.clip((mel_spectrogram_db + 80) / 80, 0, 1)  # Normalize between 0 and 1
    mel_spectrogram_rgb = np.stack([mel_spectrogram_db] * 3, axis=-1)  # Convert to 3 channels
    return mel_spectrogram_rgb



In [None]:
 Denoising autoencoder model definition with EfficientNet
def create_denoising_autoencoder(input_shape=IMG_SIZE):
    # Encoder using EfficientNetB0
    base_model = tf.keras.applications.EfficientNetB0(include_top=False, weights='imagenet', input_shape=input_shape)
    base_model.trainable = False  # Freeze the base model

    inputs = tf.keras.layers.Input(shape=input_shape)
    x = base_model(inputs)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    encoded = tf.keras.layers.Dense(32, activation='relu')(x)

    # Decoder
    x = tf.keras.layers.Dense(64, activation='relu')(encoded)
    x = tf.keras.layers.Dense(np.prod((32, 32, 128)), activation='relu')(x)  # Adjust to output shape
    x = tf.keras.layers.Reshape((32, 32, 128))(x)  # Reshape to 3D tensor

    x = tf.keras.layers.Conv2DTranspose(128, (3, 3), strides=2, activation='relu', padding='same')(x)
    x = tf.keras.layers.Conv2DTranspose(64, (3, 3), strides=2, activation='relu', padding='same')(x)
    outputs = tf.keras.layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)  # Output layer

    model = tf.keras.Model(inputs, outputs)
    return model


In [None]:

# Instantiate and compile model
autoencoder = create_denoising_autoencoder()
autoencoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE), loss='mse')

In [None]:

# Define input and target datasets (noisy and clean Mel spectrograms)
# Assuming noisy_mel_spectrograms and clean_mel_spectrograms are your data
train_dataset = tf.data.Dataset.from_tensor_slices((mel_spectrograms_noisy, mel_spectrograms_original))
train_dataset = train_dataset.shuffle(1024).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Train the model
autoencoder.fit(train_dataset, epochs=EPOCHS)


# Save the model
autoencoder.save('efficientnet_denoising_autoencoder.h5')