In [None]:
import numpy as np
import librosa
import os
import glob
import gc
import tensorflow as tf
from tensorflow import keras
import json

from google.colab import drive
drive.mount('/content/gdrive')
driveLoc='/content/gdrive/My Drive/MLSP Project/'

Mounted at /content/gdrive


### Model Parameters and imp. function

In [None]:
training_samples = 4000
validation_samples = 400
ratio_of_bonafide_samples = 0.5
data_iterations = 10
epochs_per_iteration = 5
size_of_batch = 128
alpha = 0.001 #learning_rate

In [None]:
bonafide_label = 0 #not-fake

desired_sampling_rate = 16000 # in Hz
time_duration = 4 # in seconds

window_size = 108 #in ms
window_shift = 10 #in ms
pointFFT = 1728

data_loc = driveLoc + 'main/'

training_bonafide_samples = 2580
training_spoof_samples = 22786

validation_bonafide_samples = 2548
validation_spoof_samples = 22284

samples_per_folder = 500

window_size = int(window_size*desired_sampling_rate/1000) # converting to samples
window_shift = int(window_shift*desired_sampling_rate/1000) # converting to samples

In [None]:
training_data_mean = np.loadtxt(data_loc+'training_data_mean.txt')
training_data_stddev = np.loadtxt(data_loc+'training_data_stddev.txt')
(dim1, dim2) = np.shape(training_data_mean)
print(dim1, dim2)

def GenerateFeatures(filename): #Generates Normalized Features
    signal, _ = librosa.load(filename, sr = None)
    while np.shape(signal)[0] < time_duration*desired_sampling_rate:
        signal = np.hstack((signal,signal))
    signal = signal[:time_duration*desired_sampling_rate]
    spectrogram = np.log(np.abs(librosa.stft(signal, n_fft = window_size, hop_length = window_shift)))
    return (spectrogram-training_data_mean)/training_data_stddev

865 401


### Pre-processing

Creating Bonafide Training Data

In [None]:
with open(data_loc + "bonafide_train_set_files.txt", "r") as fp:
    bonafide_train_files = json.load(fp)

bonafide_train_data = np.zeros((len(bonafide_train_files), dim1, dim2), dtype = np.float32)

for i in range(len(bonafide_train_files)):
    bonafide_train_data[i,:,:] = GenerateFeatures(bonafide_train_files[i])
    if (i+1)%1000 == 0:
        print('Generated {} bonafide features...'.format(i+1))

del bonafide_train_files
gc.collect()

Generated 1000 bonafide features...
Generated 2000 bonafide features...
Generated 3000 bonafide features...
Generated 4000 bonafide features...


11

Loading other file lists

In [None]:
with open(data_loc + "spoof_train_set_files.txt", "r") as fp:
    spoof_train_files = json.load(fp)

with open(data_loc + "bonafide_valid_set_files.txt", "r") as fp:
    bonafide_valid_files = json.load(fp)

with open(data_loc + "spoof_valid_set_files.txt", "r") as fp:
    spoof_valid_files = json.load(fp)

### Model

In [None]:
print(keras.backend.image_data_format())
img_shape = (dim1, dim2, 1)

channels_last


In [None]:
# def CreateModel(dropout_rate, kernel_no):
#     model = keras.models.Sequential()
#     # initializer = tf.keras.initializers.GlorotUniform()
#     opt = keras.optimizers.Adam(learning_rate=alpha)

#     # input-processing block
#     model.add(keras.layers.Conv2D(kernel_no, kernel_size=(5, 5), input_shape=img_shape, 
#                                 activation='relu', strides = 2, padding = 'same'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2))

#     # convolutional-block (1)
#     model.add(keras.layers.Conv2D(kernel_no, kernel_size=(1, 1), activation='relu'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.Conv2D(kernel_no, kernel_size=(3, 3), activation='relu'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2))

#     # convolutional-block (2)
#     model.add(keras.layers.Conv2D(kernel_no, kernel_size=(1, 1), activation='relu'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.Conv2D(kernel_no, kernel_size=(3, 3), activation='relu'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2))

#     # convolutional-block (3)
#     model.add(keras.layers.Conv2D(kernel_no, kernel_size=(1, 1), activation='relu'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.Conv2D(kernel_no, kernel_size=(3, 3), activation='relu'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2))

#     # convolutional-block (4)
#     model.add(keras.layers.Conv2D(kernel_no, kernel_size=(1, 1), activation='relu'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.Conv2D(kernel_no, kernel_size=(3, 3), activation='relu'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2))

#     model.add( keras.layers.Flatten() )

#     # classification-block
#     model.add(keras.layers.Dropout(dropout_rate))
#     model.add( keras.layers.Dense(64))
#     model.add( keras.layers.Dense(64, activation='relu'))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.Dropout(dropout_rate))
#     model.add( keras.layers.Dense(32))

#     # output layer
#     model.add( keras.layers.Dense(2, activation='softmax') )

#     model.compile(loss='sparse_categorical_crossentropy',
#                   optimizer=opt,
#                   metrics=['accuracy'])
#     return model

In [None]:
# model1 = CreateModel(0.1, 4)
# model2 = CreateModel(0.15, 4)
# model3 = CreateModel(0.2, 4)
# model4 = CreateModel(0.25, 4)
# model5 = CreateModel(0.3, 4)
# model6 = CreateModel(0.1, 8)
# model7 = CreateModel(0.15, 8)
# model8 = CreateModel(0.2, 8)
# model9 = CreateModel(0.25, 8)
# model10 = CreateModel(0.3, 8)

### Training

In [None]:
model1 = keras.models.load_model(driveLoc+'Models/17_june01/13')
model2 = keras.models.load_model(driveLoc+'Models/17_june02/13')
model3 = keras.models.load_model(driveLoc+'Models/17_june03/13')
model4 = keras.models.load_model(driveLoc+'Models/17_june04/13')
model5 = keras.models.load_model(driveLoc+'Models/17_june05/13')
model6 = keras.models.load_model(driveLoc+'Models/17_june06/13')
model7 = keras.models.load_model(driveLoc+'Models/17_june07/13')
model8 = keras.models.load_model(driveLoc+'Models/17_june08/13')
model9 = keras.models.load_model(driveLoc+'Models/17_june09/13')
model10 = keras.models.load_model(driveLoc+'Models/17_june10/13')

# keras.backend.set_value(model1.optimizer.learning_rate, alpha/2)
# keras.backend.set_value(model2.optimizer.learning_rate, alpha/2)
# keras.backend.set_value(model3.optimizer.learning_rate, alpha/2)
# keras.backend.set_value(model4.optimizer.learning_rate, alpha/2)
# keras.backend.set_value(model5.optimizer.learning_rate, alpha/2)
# keras.backend.set_value(model6.optimizer.learning_rate, alpha/2)
# keras.backend.set_value(model7.optimizer.learning_rate, alpha/2)
# keras.backend.set_value(model8.optimizer.learning_rate, alpha/2)
# keras.backend.set_value(model9.optimizer.learning_rate, alpha/2)
# keras.backend.set_value(model10.optimizer.learning_rate, alpha/2)

print(keras.backend.eval(model1.optimizer.lr))
print(keras.backend.eval(model2.optimizer.lr))
print(keras.backend.eval(model3.optimizer.lr))
print(keras.backend.eval(model4.optimizer.lr))
print(keras.backend.eval(model5.optimizer.lr))
print(keras.backend.eval(model6.optimizer.lr))
print(keras.backend.eval(model7.optimizer.lr))
print(keras.backend.eval(model8.optimizer.lr))
print(keras.backend.eval(model9.optimizer.lr))
print(keras.backend.eval(model10.optimizer.lr))

0.000125
0.000125
0.000125
0.000125
0.000125
0.000125
0.000125
0.000125
0.000125
0.000125


### Training Happens Below

In [None]:
training_data = np.zeros((training_samples, dim1, dim2, 1), dtype = np.float32)
validation_data = np.zeros((validation_samples, dim1, dim2, 1), dtype = np.float32)

training_fill = training_samples//2
validation_fill = validation_samples//2
len_spoof_train_file = len(spoof_train_files)
len_bonafide_valid_file = len(bonafide_valid_files)
len_spoof_valid_file = len(spoof_valid_files)
bon_train_samples = np.shape(bonafide_train_data)[0]
training_bonafide_number = int(ratio_of_bonafide_samples*training_samples)
validation_bonafide_number = int(ratio_of_bonafide_samples*validation_samples)

j = 1333
k = 233

count = 130
i = 39999
print('Generating Data for training process no. {}...'.format(count+1))
while  i <  data_iterations*len_spoof_train_file:
    i+=1
    training_data[i%training_fill,:,:,0] = GenerateFeatures(spoof_train_files[i%len_spoof_train_file])
    if (i+1)%training_fill == 0:
        print("Last data point was generated with index {}".format(i))
        for p in range(training_fill):
            training_data[p+training_fill,:,:,0] = bonafide_train_data[(j+p)%bon_train_samples, :, :]
        j += training_fill

        training_labels = np.append((1-bonafide_label)*np.ones(training_samples-training_bonafide_number), 
                                        bonafide_label*np.ones(training_bonafide_number))
                                        
        validation_labels = np.append(bonafide_label*np.ones(validation_bonafide_number), 
                                        (1-bonafide_label)*np.ones(validation_samples-validation_bonafide_number))
        for p in range(validation_fill):
            validation_data[p,:,:,0] = GenerateFeatures(bonafide_valid_files[k%len_bonafide_valid_file])
            validation_data[p+validation_fill,:,:,0] = GenerateFeatures(spoof_valid_files[k%len_spoof_valid_file])
            k += 1
        
        print("Training Model 01...")
        model1.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 02...")
        model2.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 03...")
        model3.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 04...")
        model4.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 05...")
        model5.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 06...")
        model6.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 07...")
        model7.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 08...")
        model8.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 09...")
        model9.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 10...")
        model10.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        
        if (count+1)%10 == 0:
            model1.save(driveLoc+'Models/17_june01/'+str((count+1)//10))
            model2.save(driveLoc+'Models/17_june02/'+str((count+1)//10))
            model3.save(driveLoc+'Models/17_june03/'+str((count+1)//10))
            model4.save(driveLoc+'Models/17_june04/'+str((count+1)//10))
            model5.save(driveLoc+'Models/17_june05/'+str((count+1)//10))
            model6.save(driveLoc+'Models/17_june06/'+str((count+1)//10))
            model7.save(driveLoc+'Models/17_june07/'+str((count+1)//10))
            model8.save(driveLoc+'Models/17_june08/'+str((count+1)//10))
            model9.save(driveLoc+'Models/17_june09/'+str((count+1)//10))
            model10.save(driveLoc+'Models/17_june10/'+str((count+1)//10))
        if (count+1)%20 == 0:
            alpha = keras.backend.eval(model1.optimizer.lr)
            print('Updating step size to {}...'.format(alpha/2))
            keras.backend.set_value(model1.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model2.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model3.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model4.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model5.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model6.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model7.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model8.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model9.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model10.optimizer.learning_rate, alpha/2)

        count+=1    
        print('Generating Data for training process no. {}...'.format(count+1))
        
        del training_labels
        del validation_labels
        gc.collect()

Generating Data for training process no. 131...
Last data point was generated with index 41999
Training Model 01...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 02...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 03...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 04...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 05...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 06...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 07...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 08...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 09...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 10...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Generating Data for training process no. 132...
Last data point was generated with index 43999
Training Model 01...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 02...
Epoch 1/

In [None]:
del training_data
del validation_data
gc.collect()

190