In [6]:
#This is one of the model architectures we tried. Trained on kaggle

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))




In [3]:
import glob
trainfilenames = glob.glob(os.path.join('../input/trainrecords2/TrainRecords2', 'trainrecords*'))
train2filenames=glob.glob(os.path.join('../input/trainrecords3/TrainRecords', 'trainrecords*'))
train3filenames=glob.glob(os.path.join('../input/train-records-57k', 'trainrecords*'))
trainfilenames=trainfilenames+train2filenames+train3filenames
valfilenames = glob.glob(os.path.join('../input/val-records', 'valrecords*'))

#Create the list of validation and Training files to create datasets to train the model

In [4]:
def readtfrecord(record):
    keys_to_features = {
        "noise_stft_phase": tf.io.FixedLenFeature((), tf.string, default_value=""),
        'noise_stft_mag_features': tf.io.FixedLenFeature([], tf.string),
        "clean_stft_magnitude": tf.io.FixedLenFeature((), tf.string)
    }

    features = tf.io.parse_single_example(record, keys_to_features)

    noise_stft_mag_features = tf.io.decode_raw(features['noise_stft_mag_features'], tf.float32)
    clean_stft_magnitude = tf.io.decode_raw(features['clean_stft_magnitude'], tf.float32)
    noise_stft_phase = tf.io.decode_raw(features['noise_stft_phase'], tf.float32)

    # reshape input and annotation images
    noise_stft_mag_features = tf.reshape(noise_stft_mag_features, (129, 8, 1), name="noise_stft_mag_features")
    clean_stft_magnitude = tf.reshape(clean_stft_magnitude, (129, 1, 1), name="clean_stft_magnitude")
    noise_stft_phase = tf.reshape(noise_stft_phase, (129,), name="noise_stft_phase")

    return noise_stft_mag_features, clean_stft_magnitude

#A function to read and get data from the TF records in an appropropriate manner to feed to the model

In [5]:
import tensorflow as tf
train_dataset = tf.data.TFRecordDataset(trainfilenames) #Create Train Dataset
train_dataset = train_dataset.map(readtfrecord)  #Parse TF Record 
train_dataset = train_dataset.shuffle(8192)    #Shuffle data
train_dataset = train_dataset.repeat()       #Remove duplicated elements if any
train_dataset = train_dataset.batch(512)    #Creates batches
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)


In [6]:
print(len(trainfilenames))

182661


In [7]:
val_dataset = tf.data.TFRecordDataset([valfilenames]) #Create validation dataset like train dataset
val_dataset = val_dataset.map(readtfrecord)
val_dataset = val_dataset.repeat(1)
val_dataset = val_dataset.batch(512)

In [None]:
#30 epochs took around 9 hours to train so training for around 210 epochs was done by saving and loading models at intervals of 30 epochs
%%time
%load_ext tensorboard
%tensorboard --logdir logs
import datetime
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, baseline=None)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq='batch')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='model.h5',  monitor='val_loss', save_best_only=True)
   
train_dataset=train_dataset.apply(tf.data.experimental.ignore_errors())
                                                         
model.fit(train_dataset,
         steps_per_epoch=300,
         validation_data=val_dataset,
         epochs=30,
        callbacks=[early_stopping_callback, tensorboard_callback, checkpoint_callback]
       )

In [8]:
#Loading model
from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, Flatten, Dense, Reshape, Conv2DTranspose, BatchNormalization, Activation
from tensorflow.keras import Model, Sequential, models
model = models.load_model('../input/modeltest/Model/Model/model.h5')
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 129, 8, 1)]       0         
_________________________________________________________________
zero_padding2d (ZeroPadding2 (None, 141, 8, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 129, 1, 12)        1260      
_________________________________________________________________
activation (Activation)      (None, 129, 1, 12)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 129, 1, 12)        48        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 129, 1, 16)        2128      
_________________________________________________________________
activation_1 (Activation)    (None, 129, 1, 16)        0     

In [None]:
#Training next iteration
%%time
%load_ext tensorboard
%tensorboard --logdir logs
import datetime
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, baseline=None)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq='batch')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='model2.h5',  monitor='val_loss', save_best_only=True)
   
train_dataset=train_dataset.apply(tf.data.experimental.ignore_errors())
                                                         
model.fit(train_dataset,
         steps_per_epoch=300,
         validation_data=val_dataset,
         epochs=30,
        callbacks=[early_stopping_callback, tensorboard_callback, checkpoint_callback]
       )

In [9]:
#Loading model
from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, Flatten, Dense, Reshape, Conv2DTranspose, BatchNormalization, Activation
from tensorflow.keras import Model, Sequential, models
model = models.load_model('../input/modeltest/Model/Model/model2.h5')
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 129, 8, 1)]       0         
_________________________________________________________________
zero_padding2d (ZeroPadding2 (None, 141, 8, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 129, 1, 12)        1260      
_________________________________________________________________
activation (Activation)      (None, 129, 1, 12)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 129, 1, 12)        48        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 129, 1, 16)        2128      
_________________________________________________________________
activation_1 (Activation)    (None, 129, 1, 16)        0     

In [None]:
#Training next iteration
%%time
%load_ext tensorboard
%tensorboard --logdir logs
import datetime
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, baseline=None)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq='batch')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='model3.h5',  monitor='val_loss', save_best_only=True)
   
train_dataset=train_dataset.apply(tf.data.experimental.ignore_errors())
                                                         
model.fit(train_dataset,
         steps_per_epoch=300,
         validation_data=val_dataset,
         epochs=30,
        callbacks=[early_stopping_callback, tensorboard_callback, checkpoint_callback]
       )

In [10]:
#Loading model
from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, Flatten, Dense, Reshape, Conv2DTranspose, BatchNormalization, Activation
from tensorflow.keras import Model, Sequential, models
model = models.load_model('../input/modeltest/Model/Model/model3.h5')
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 129, 8, 1)]       0         
_________________________________________________________________
zero_padding2d (ZeroPadding2 (None, 141, 8, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 129, 1, 12)        1260      
_________________________________________________________________
activation (Activation)      (None, 129, 1, 12)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 129, 1, 12)        48        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 129, 1, 16)        2128      
_________________________________________________________________
activation_1 (Activation)    (None, 129, 1, 16)        0     

In [None]:
#Training next iteration
%%time
%load_ext tensorboard
%tensorboard --logdir logs
import datetime
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, baseline=None)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq='batch')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='model4.h5',  monitor='val_loss', save_best_only=True)
   
train_dataset=train_dataset.apply(tf.data.experimental.ignore_errors())
                                                         
model.fit(train_dataset,
         steps_per_epoch=300,
         validation_data=val_dataset,
         epochs=30,
        callbacks=[early_stopping_callback, tensorboard_callback, checkpoint_callback]
       )

In [11]:
#Loading model
from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, Flatten, Dense, Reshape, Conv2DTranspose, BatchNormalization, Activation
from tensorflow.keras import Model, Sequential, models
model = models.load_model('../input/modeltest/Model/Model/model4.h5')
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 129, 8, 1)]       0         
_________________________________________________________________
zero_padding2d (ZeroPadding2 (None, 141, 8, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 129, 1, 12)        1260      
_________________________________________________________________
activation (Activation)      (None, 129, 1, 12)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 129, 1, 12)        48        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 129, 1, 16)        2128      
_________________________________________________________________
activation_1 (Activation)    (None, 129, 1, 16)        0     

In [None]:
#Training next iteration
%%time
%load_ext tensorboard
%tensorboard --logdir logs
import datetime
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, baseline=None)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq='batch')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='model5.h5',  monitor='val_loss', save_best_only=True)
   
train_dataset=train_dataset.apply(tf.data.experimental.ignore_errors())
                                                         
model.fit(train_dataset,
         steps_per_epoch=300,
         validation_data=val_dataset,
         epochs=30,
        callbacks=[early_stopping_callback, tensorboard_callback, checkpoint_callback]
       )

In [12]:
#Loading model
from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, Flatten, Dense, Reshape, Conv2DTranspose, BatchNormalization, Activation
from tensorflow.keras import Model, Sequential, models
model = models.load_model('../input/modeltest/Model/Model/model5.h5')
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 129, 8, 1)]       0         
_________________________________________________________________
zero_padding2d (ZeroPadding2 (None, 141, 8, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 129, 1, 12)        1260      
_________________________________________________________________
activation (Activation)      (None, 129, 1, 12)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 129, 1, 12)        48        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 129, 1, 16)        2128      
_________________________________________________________________
activation_1 (Activation)    (None, 129, 1, 16)        0     

In [None]:
#Training next iteration
%%time
%load_ext tensorboard
%tensorboard --logdir logs
import datetime
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, baseline=None)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq='batch')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='model6.h5',  monitor='val_loss', save_best_only=True)
   
train_dataset=train_dataset.apply(tf.data.experimental.ignore_errors())
                                                         
model.fit(train_dataset,
         steps_per_epoch=300,
         validation_data=val_dataset,
         epochs=30,
        callbacks=[early_stopping_callback, tensorboard_callback, checkpoint_callback]
       )

In [14]:
#Loading model
from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, Flatten, Dense, Reshape, Conv2DTranspose, BatchNormalization, Activation
from tensorflow.keras import Model, Sequential, models
model = models.load_model('../input/modeltest/model6.h5')
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 129, 8, 1)]       0         
_________________________________________________________________
zero_padding2d (ZeroPadding2 (None, 141, 8, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 129, 1, 12)        1260      
_________________________________________________________________
activation (Activation)      (None, 129, 1, 12)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 129, 1, 12)        48        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 129, 1, 16)        2128      
_________________________________________________________________
activation_1 (Activation)    (None, 129, 1, 16)        0     

In [None]:
#Training next iteration
%%time
%load_ext tensorboard
%tensorboard --logdir logs
import datetime
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, baseline=None)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq='batch')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='model7.h5',  monitor='val_loss', save_best_only=True)
   
train_dataset=train_dataset.apply(tf.data.experimental.ignore_errors())
                                                         
model.fit(train_dataset,
         steps_per_epoch=300,
         validation_data=val_dataset,
         epochs=30,
        callbacks=[early_stopping_callback, tensorboard_callback, checkpoint_callback]
       )

In [1]:
#Testing model on a random file
import librosa
import IPython.display as ipd
import librosa.display
import scipy

In [7]:
cleanAudio, sr = librosa.load('../input/testformodel/common_voice_en_17405975.mp3', 16000)
div_fac = 1 / np.max(np.abs(cleanAudio)) / 3.0
cleanAudio = cleanAudio * div_fac
ipd.Audio(data=cleanAudio, rate=16000)
#Reading and preppin cleang audio file



In [8]:
noiseAudio, sr = librosa.load('../input/testformodel/17578-5-0-3.wav', 16000)
div_fac = 1 / np.max(np.abs(noiseAudio)) / 3.0
noiseAudio = noiseAudio * div_fac
ipd.Audio(data=noiseAudio, rate=16000) 
#Reading and prepping the noise file to be added

In [9]:
if len(cleanAudio) >= len(noiseAudio):
    while len(cleanAudio) >= len(noiseAudio):
        noiseAudio = np.append(noiseAudio, noiseAudio)	#If noise audio isnt enough loop it to make it greater than audio

    ind = np.random.randint(0, noiseAudio.size - cleanAudio.size)
    noiseSegment = noiseAudio[ind: ind + cleanAudio.size]			#Take a random noise segment similar in size to the audio clip
    audiop = np.sum(cleanAudio ** 2)
    noisep = np.sum(noiseSegment ** 2)
    cleanNoisyAudio = cleanAudio + np.sqrt(audiop / noisep) * noiseSegment #Combining the 2 files to introduce noise in the audio

ipd.Audio(data=cleanNoisyAudio, rate=16000) 

In [10]:
noise_stft_features = librosa.stft(cleanNoisyAudio, n_fft=256, win_length=256, hop_length=64, window=scipy.signal.hamming(256, sym=False), center=True) #Get Spectogram from audio
noisyPhase = np.angle(noise_stft_features)

noise_stft_features = np.abs(noise_stft_features)

mean = np.mean(noise_stft_features)
std = np.std(noise_stft_features)
noise_stft_features = (noise_stft_features - mean) / std #Standardise

In [11]:
noisySTFT = np.concatenate([noise_stft_features[:,0:7], noise_stft_features], axis=1)
stftSegments = np.zeros((129, 8, noisySTFT.shape[1] - 7 ))

for index in range(noisySTFT.shape[1] - 7):
        stftSegments[:, :, index] = noisySTFT[:, index:index + 8]
predictors=stftSegments
#Transorm into form to input to model

In [12]:
predictors = np.reshape(predictors, (predictors.shape[0], predictors.shape[1], 1, predictors.shape[2]))
predictors = np.transpose(predictors, (3, 0, 1, 2)).astype(np.float32)
print('predictors.shape:', predictors.shape)
#Transorm into form to input to model

predictors.shape: (1087, 129, 8, 1)


In [13]:
from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, Flatten, Dense, Reshape, Conv2DTranspose, BatchNormalization, Activation
from tensorflow.keras import Model, Sequential, models
model1 = models.load_model('../input/modeltest/Model/Model/model.h5')
model2 = models.load_model('../input/modeltest/Model/Model/model2.h5')
model3 = models.load_model('../input/modeltest/Model/Model/model3.h5')
model4 = models.load_model('../input/modeltest/Model/Model/model4.h5')
model5 = models.load_model('../input/modeltest/Model/Model/model5.h5')
model6 = models.load_model('../input/modeltest/model6.h5')
model7 = models.load_model('../input/modeltest/model7.h5')
#Load all trained epoch version to test the best one

In [21]:
Pred1 = model1.predict(predictors)
Pred2 = model2.predict(predictors)
Pred3 = model3.predict(predictors)
Pred4 = model4.predict(predictors)
Pred5  = model5.predict(predictors)
Pred6 = model6.predict(predictors)
Pred7 = model7.predict(predictors)
#Model outputs

In [22]:
def revert_features_to_audio(features, phase, cleanMean, cleanStd):
    features = cleanStd * features + cleanMean #Remove standardisation

    phase = np.transpose(phase, (1, 0))
    features = np.squeeze(features)

    features = features * np.exp(1j * phase)  #Removes the abs()

    features = np.transpose(features, (1, 0))
    return librosa.istft(features,win_length=256, hop_length=64, window=scipy.signal.hamming(256, sym=False), center=True)
#Inverses the stft feature received from the model into audio

In [23]:
Audioop1 = revert_features_to_audio(Pred1, noisyPhase, mean, std)
ipd.Audio(data=Audioop1, rate=16000) 
#Play audio output of model 1

In [24]:
Audioop2 = revert_features_to_audio(Pred2, noisyPhase, mean, std)
ipd.Audio(data=Audioop2, rate=16000) 
#Play audio output of model 2


In [25]:
Audioop3 = revert_features_to_audio(Pred3, noisyPhase, mean, std)
ipd.Audio(data=Audioop3, rate=16000)
#Play audio output of model 3

In [26]:

Audioop4 = revert_features_to_audio(Pred4, noisyPhase, mean, std)
ipd.Audio(data=Audioop4, rate=16000) 
#Play audio output of model 4


In [27]:
Audioop5 = revert_features_to_audio(Pred5, noisyPhase, mean, std)
ipd.Audio(data=Audioop5, rate=16000) 
#Play audio output of model 5

In [28]:
Audioop6 = revert_features_to_audio(Pred6, noisyPhase, mean, std)
ipd.Audio(data=Audioop6, rate=16000)
#Play audio output of model 6

In [29]:
Audioop7 = revert_features_to_audio(Pred7, noisyPhase, mean, std)
ipd.Audio(data=Audioop7, rate=16000) 
#Play audio output of model 7