<a href="https://colab.research.google.com/github/Pooja-Informatics/symmetrical-orchid/blob/main/speech_enhancement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Wave U Net Architecture For Speech Enhancement

**Install modules and libraries**

In [None]:
!pip install librosa tensorflow keras numpy matplotlib pandas
!pip install soundfile


**Import**

In [None]:
import librosa
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import soundfile as sf
import os


**Dataset**

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
filePath ='/content/drive/MyDrive/Data_training/train_data/clean_trainset_wav'
filePath ='/content/drive/MyDrive/Data_training/train_data/noisy_trainset_wav'

Mounted at /content/drive


**function to convert a folder of audio files into list of arrays of audio files**

In [None]:
import os
import librosa
#The librosa library to convert an audio folder containing audio files into a list of arrays:

def convert_audio_folder_to_list(folder_path):
    audio_list = []

    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)

        # Load audio file using librosa
        audio_data, _ = librosa.load(file_path)

        # Append audio data to the list
        audio_list.append(audio_data)

    return audio_list

# Specify the path to the audio folder
folder_path = "/content/drive/MyDrive/Data_training/train_data/noisy_trainset_wav"

# Convert the audio folder to a list of arrays
audio_list = convert_audio_folder_to_list(folder_path)

# Print the list of audio arrays
for audio_data in audio_list:
    print(audio_data)

   # The outputs contains representations of multiple 1-dimensional NumPy arrays.
   #Each array consists of a sequence of float values.


[0.01245543 0.02198051 0.02311831 ... 0.09252393 0.11061627 0.04900526]
[-0.00696815 -0.01127945 -0.00957985 ...  0.00731695  0.00189796
  0.        ]
[-0.00474634 -0.00490906 -0.00352744 ...  0.00466625  0.00630934
  0.00461349]
[-0.00248011 -0.00380439 -0.00414199 ...  0.00536412  0.00697804
  0.00536193]
[-0.05526794 -0.05634881 -0.08667254 ...  0.00172743  0.00282566
  0.00107446]


**Generators**

In [None]:
import os
import random
import numpy as np
import soundfile as sf

audio_folder = 'audio_files_folder'
annotations_folder = 'annotations_folder'

def audio_generator(files, batch_size=32):
    while True:
        # Extract a random batch
        batch = np.random.choice(files, size=batch_size)

        # Variables for collecting batches of inputs and outputs
        batch_x = []
        batch_y = []

        for f in batch:
            # Load the audio file
            audio_path = os.path.join(audio_folder, f)
            audio, _ = sf.read(audio_path)

            # Load the corresponding annotation
            annotation_path = os.path.join(annotations_folder, f[:-4] + '.txt')
            annotation = np.loadtxt(annotation_path)

            # Preprocess the audio and annotation
            # Add your preprocessing steps here
            processed_audio = preprocess_audio(audio)
            processed_annotation = preprocess_annotation(annotation)

            batch_x.append(processed_audio)
            batch_y.append(processed_annotation)

        # Preprocess a batch of audio and annotations
        batch_x = np.array(batch_x)
        batch_y = np.array(batch_y)

        yield batch_x, batch_y


**train and test generator**


In [None]:
batch_size = 32
#split into training and testing
train_files = all_files[0:split]
test_files  = all_files[split:]
train_generator = audio_generator(train_files, batch_size = batch_size)
test_generator  = audio_generator(test_files, batch_size = batch_size)

**Mean**

In [None]:
import tensorflow.keras.backend as K

def mean_audio(y_true, y_pred):
    tmp = np.mean(np.abs(y_true-y_pred))
    return tmp


**Model**

In [None]:
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model

def unet(sz=(256, )):
    x = Input(sz)
    inputs = x

    # Downsampling
    f = 8
    layers = []

    for i in range(0, 6):
        x = Conv2D(f, 3, activation='relu', padding='same')(x)
        x = Conv2D(f, 3, activation='relu', padding='same')(x)
        layers.append(x)
        x = MaxPooling2D()(x)
        f = f * 2
    ff2 = 64

    # Bottleneck
    j = len(layers) - 1
    x = Conv2D(f, 3, activation='relu', padding='same')(x)
    x = Conv2D(f, 3, activation='relu', padding='same')(x)
    x = Conv2DTranspose(ff2, 2, strides=(2, 2), padding='same')(x)
    x = Concatenate(axis=3)([x, layers[j]])
    j = j - 1

    # Upsampling
    for i in range(0, 5):
        ff2 = ff2 // 2
        f = f // 2
        x = Conv2D(f, 3, activation='relu', padding='same')(x)
        x = Conv2D(f, 3, activation='relu', padding='same')(x)
        x = Conv2DTranspose(ff2, 2, strides=(2, 2), padding='same')(x)
        x = Concatenate(axis=3)([x, layers[j]])
        j = j - 1

    # Reconstruction
    x = Conv2D(f, 3, activation='relu', padding='same')(x)
    x = Conv2D(f, 3, activation='relu', padding='same')(x)
    outputs = Conv2D(1, 1, activation='sigmoid')(x)

    # Model creation
    model = Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=[temp])

    return model


In [None]:
model = unet(sz=(256, 256, 3))


**Call back**

In [None]:
def build_callbacks():
    checkpointer = ModelCheckpoint(filepath='wave_unet.h5', verbose=0, save_best_only=True, save_weights_only=True)
    callbacks = [checkpointer, PlotLearning()]
    return callbacks


**Training**

In [None]:
train_steps = len(train_files) // batch_size
test_steps = len(test_files) // batch_size

model.fit(train_generator,
          epochs=30,
          steps_per_epoch=train_steps,
          validation_data=test_generator,
          validation_steps=test_steps,
          callbacks=build_callbacks(),
          verbose=0)


**Testing**

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
filePath ='/content/drive/MyDrive/Data_training/train_data/clean_testset_wav'
filePath ='/content/drive/MyDrive/Data_training/train_data/noisy_testset_wav'

Mounted at /content/drive
