<a href="https://colab.research.google.com/github/RafaJBZ/Audio-denoising-autoencoder/blob/main/denoisening_dae_audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Fri Mar  1 20:24:11 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   31C    P0              49W / 400W |  17091MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [11]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 89.6 gigabytes of available RAM

You are using a high-RAM runtime!


In [12]:
from tensorflow.keras.layers import Input, Dense, MaxPooling2D, Reshape, Conv2D, Flatten, Conv2DTranspose, UpSampling2D, Cropping2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import numpy as np
import tensorflow as tf
import os
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

In [13]:
x_train = np.load('/content/drive/MyDrive/ITESO/Eight Semester-Primavera 2024/Deep Learning/Audio-denoising-autoencoder/data/noise_on_audio/processed_data/train/clean_hybrid_representations.npy')
x_train_noisy = np.load('/content/drive/MyDrive/ITESO/Eight Semester-Primavera 2024/Deep Learning/Audio-denoising-autoencoder/data/noise_on_audio/processed_data/train/noisy_hybrid_representations.npy')

In [14]:
x_train.shape, x_train_noisy.shape

((36000, 148, 109), (36000, 148, 109))

In [15]:
x_train.shape[1:], x_train_noisy.shape[1:]

((148, 109), (148, 109))

In [16]:
x_train = np.expand_dims(x_train, axis=-1)
x_train_noisy = np.expand_dims(x_train_noisy, axis=-1)
x_train.shape[1:], x_train_noisy.shape[1:]

((148, 109, 1), (148, 109, 1))

In [17]:
def create_autoencoder(input_shape):
    input_layer = Input(shape=input_shape) # (148, 109, 1)

    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer) # (148, 109, 32)
    x = MaxPooling2D((2, 2), padding='same')(x) # (74, 55, 32)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x) # (74, 55, 64)
    x = MaxPooling2D((2, 2), padding='same')(x) # (37, 28, 64)
    x = Flatten()(x) # (66304)

    latent_space = Dense(128, activation='relu')(x) # (128)

    x = Dense(37 * 28 * 64, activation='relu')(latent_space) # (37 * 28 * 64) = 66304
    x = Reshape((37, 28, 64))(x) # (37, 28, 64)
    x = Conv2DTranspose(64, (3, 3), activation='relu', padding='same')(x) # (37, 28, 64)
    x = UpSampling2D((2, 2))(x) # (74, 56, 64)
    x = Cropping2D(cropping=((0, 0), (0, 1)))(x) # (74, 55, 64)
    x = Conv2DTranspose(32, (3, 3), activation='relu', padding='same')(x) # (74, 55, 32)
    x = UpSampling2D((2, 2))(x) #  # (148, 110, 32)
    x = Cropping2D(cropping=((0, 0), (0, 1)))(x) # (148, 109, 32)
    output_layer = Conv2DTranspose(1, (3, 3), activation='sigmoid', padding='same')(x) # (148, 109, 1)

    autoencoder = Model(inputs=input_layer, outputs=output_layer)
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')

    return autoencoder

# Create autoencoder model
input_shape = x_train.shape[1:]
autoencoder = create_autoencoder(input_shape)
print(autoencoder.summary())

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 148, 109, 1)]     0         
                                                                 
 conv2d_2 (Conv2D)           (None, 148, 109, 32)      320       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 74, 55, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 74, 55, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 37, 28, 64)        0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 66304)             0   

In [18]:
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    patience=6,           # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restore the best weights observed during training
)
# Define model checkpointing
checkpoint_path = "/content/drive/MyDrive/ITESO/Eight Semester-Primavera 2024/Deep Learning/Audio-denoising-autoencoder/data/noise_on_audio/processed_data/autoencoder_model_audio.keras"
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_path,   # Filepath to save the model
    monitor='val_loss',         # Monitor validation loss
    save_best_only=True,        # Save only the best model based on the monitored quantity
    verbose=1                   # Print messages about the saving process
)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5, min_lr=0.001)

# Train the autoencoder with early stopping and model checkpointing
history = autoencoder.fit(
    x_train_noisy,
    x_train,
    epochs=200,               # Increase the maximum number of epochs
    batch_size=32,
    validation_data=(x_train_noisy, x_train),
    callbacks=[early_stopping, model_checkpoint, reduce_lr]  # Pass the EarlyStopping and ModelCheckpoint callbacks to the fit method
)

Epoch 1/200
Epoch 1: val_loss improved from inf to 1233.45642, saving model to /content/drive/MyDrive/ITESO/Eight Semester-Primavera 2024/Deep Learning/Audio-denoising-autoencoder/data/noise_on_audio/processed_data/autoencoder_model_audio.keras
Epoch 2/200
Epoch 2: val_loss improved from 1233.45642 to 1233.39771, saving model to /content/drive/MyDrive/ITESO/Eight Semester-Primavera 2024/Deep Learning/Audio-denoising-autoencoder/data/noise_on_audio/processed_data/autoencoder_model_audio.keras
Epoch 3/200
Epoch 3: val_loss improved from 1233.39771 to 1233.38049, saving model to /content/drive/MyDrive/ITESO/Eight Semester-Primavera 2024/Deep Learning/Audio-denoising-autoencoder/data/noise_on_audio/processed_data/autoencoder_model_audio.keras
Epoch 4/200
Epoch 4: val_loss improved from 1233.38049 to 1233.36646, saving model to /content/drive/MyDrive/ITESO/Eight Semester-Primavera 2024/Deep Learning/Audio-denoising-autoencoder/data/noise_on_audio/processed_data/autoencoder_model_audio.keras