In [None]:
import numpy as np
import librosa
import os
import glob
import gc
import tensorflow as tf
from tensorflow import keras
import json

from google.colab import drive
drive.mount('/content/gdrive')
driveLoc='/content/gdrive/My Drive/MLSP Project/'

Mounted at /content/gdrive


In [None]:
data_loc = driveLoc + 'CQCC/'

training_bonafide_folders = 6
training_spoof_folders = 46

validation_bonafide_folders = 6
validation_spoof_folders = 45

samples_per_folder = 500

### Training Model

In [None]:
training_samples = 4000
validation_samples = 400
ratio_of_bonafide_samples = 0.5
data_iterations = 10
epochs_per_iteration = 10
size_of_batch = 128
alpha = 0.001 #learning_rate

In [None]:
training_data_mean = np.loadtxt(data_loc+'train_mean_CQCC.txt', delimiter=',')
training_data_stddev = np.loadtxt(data_loc+'train_stddev_CQCC.txt', delimiter=',')
(dim1, dim2) = np.shape(training_data_mean)
print(dim1, dim2)

def GenerateFeatures(filename): #Generates Normalized Features
    features = np.loadtxt(filename, delimiter=',')
    return (features-training_data_mean)/training_data_stddev

90 469


### Preprocessing

Creating Bonafide Training Data

In [None]:
with open(data_loc + "bonafide_train_set_files.txt", "r") as fp:
    bonafide_train_files = json.load(fp)

bonafide_train_data = np.zeros((len(bonafide_train_files), dim1, dim2), dtype = np.float32)

for i in range(len(bonafide_train_files)):
    bonafide_train_data[i,:,:] = GenerateFeatures(bonafide_train_files[i])
    if (i+1)%500 == 0:
        print('Generated {} bonafide features...'.format(i+1))

del bonafide_train_files
gc.collect()

Generated 500 bonafide features...
Generated 1000 bonafide features...
Generated 1500 bonafide features...
Generated 2000 bonafide features...
Generated 2500 bonafide features...
Generated 3000 bonafide features...
Generated 3500 bonafide features...
Generated 4000 bonafide features...


152

In [None]:
with open(data_loc + "spoof_train_set_files.txt", "r") as fp:
    spoof_train_files = json.load(fp)

with open(data_loc + "bonafide_valid_set_files.txt", "r") as fp:
    bonafide_valid_files = json.load(fp)

with open(data_loc + "spoof_valid_set_files.txt", "r") as fp:
    spoof_valid_files = json.load(fp)

### Model Creation and Loading

In [None]:
print(keras.backend.image_data_format())
img_shape = (dim1, dim2, 1)

channels_last


In [None]:
def CreateModel(dropout_rate, kernel_no):
    model = keras.models.Sequential()
    # initializer = tf.keras.initializers.GlorotUniform()
    opt = keras.optimizers.Adam(learning_rate=alpha)

    # input-processing block
    model.add(keras.layers.Conv2D(kernel_no, kernel_size=(3, 3), input_shape=img_shape, 
                                activation='relu', strides = 2, padding = 'same'))
    model.add(keras.layers.BatchNormalization())
    # model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2)) # remove thisssssss

    # convolutional-block (1)
    model.add(keras.layers.Conv2D(kernel_no, kernel_size=(4, 4), activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Conv2D(kernel_no, kernel_size=(3, 3), activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2))

    # # convolutional-block (2)
    model.add(keras.layers.Conv2D(kernel_no, kernel_size=(4, 4), activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Conv2D(kernel_no, kernel_size=(3, 3), activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2))

    # convolutional-block (3)
    model.add(keras.layers.Conv2D(kernel_no, kernel_size=(2, 2), activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Conv2D(kernel_no, kernel_size=(3, 3), activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2))

    # # convolutional-block (4)
    # model.add(keras.layers.Conv2D(kernel_no, kernel_size=(4, 4), activation='relu'))
    # model.add(keras.layers.BatchNormalization())
    # model.add(keras.layers.Conv2D(kernel_no, kernel_size=(3, 3), activation='relu'))
    # model.add(keras.layers.BatchNormalization())
    # model.add(keras.layers.MaxPool2D(pool_size=(2, 2), strides=2))

    model.add( keras.layers.Flatten() )

    # classification-block
    model.add(keras.layers.Dropout(dropout_rate))
    model.add( keras.layers.Dense(64))
    model.add( keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(dropout_rate))
    model.add( keras.layers.Dense(32))

    # output layer
    model.add( keras.layers.Dense(2, activation='softmax') )

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    return model

In [None]:
model1 = CreateModel(0.1, 4)
model2 = CreateModel(0.15, 4)
model3 = CreateModel(0.2, 4)
model4 = CreateModel(0.25, 4)
model5 = CreateModel(0.3, 4)
model6 = CreateModel(0.1, 8)
model7 = CreateModel(0.15, 8)
model8 = CreateModel(0.2, 8)
model9 = CreateModel(0.25, 8)
model10 = CreateModel(0.3, 8)

In [None]:
model1.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_78 (Conv2D)           (None, 45, 235, 4)        40        
_________________________________________________________________
batch_normalization_87 (Batc (None, 45, 235, 4)        16        
_________________________________________________________________
conv2d_79 (Conv2D)           (None, 42, 232, 4)        260       
_________________________________________________________________
batch_normalization_88 (Batc (None, 42, 232, 4)        16        
_________________________________________________________________
conv2d_80 (Conv2D)           (None, 40, 230, 4)        148       
_________________________________________________________________
batch_normalization_89 (Batc (None, 40, 230, 4)        16        
_________________________________________________________________
max_pooling2d_33 (MaxPooling (None, 20, 115, 4)      

In [None]:
model6.summary()

In [None]:
print(keras.backend.eval(model1.optimizer.lr))
print(keras.backend.eval(model2.optimizer.lr))
print(keras.backend.eval(model3.optimizer.lr))
print(keras.backend.eval(model4.optimizer.lr))
print(keras.backend.eval(model5.optimizer.lr))
print(keras.backend.eval(model6.optimizer.lr))
print(keras.backend.eval(model7.optimizer.lr))
print(keras.backend.eval(model8.optimizer.lr))
print(keras.backend.eval(model9.optimizer.lr))
print(keras.backend.eval(model10.optimizer.lr))

0.001
0.001
0.001
0.001
0.001
0.001
0.001
0.001
0.001
0.001


In [None]:
model1 = keras.models.load_model(driveLoc+'Models/21_june01/3')
model2 = keras.models.load_model(driveLoc+'Models/21_june02/3')
model3 = keras.models.load_model(driveLoc+'Models/21_june03/3')
model4 = keras.models.load_model(driveLoc+'Models/21_june04/3')
model5 = keras.models.load_model(driveLoc+'Models/21_june05/3')
model6 = keras.models.load_model(driveLoc+'Models/21_june06/3')
model7 = keras.models.load_model(driveLoc+'Models/21_june07/3')
model8 = keras.models.load_model(driveLoc+'Models/21_june08/3')
model9 = keras.models.load_model(driveLoc+'Models/21_june09/3')
model10 = keras.models.load_model(driveLoc+'Models/21_june10/3')

### Training

In [None]:
training_data = np.zeros((training_samples, dim1, dim2, 1), dtype = np.float32)
validation_data = np.zeros((validation_samples, dim1, dim2, 1), dtype = np.float32)

training_fill = training_samples//2
validation_fill = validation_samples//2
len_spoof_train_file = len(spoof_train_files)
len_bonafide_valid_file = len(bonafide_valid_files)
len_spoof_valid_file = len(spoof_valid_files)
bon_train_samples = np.shape(bonafide_train_data)[0]
training_bonafide_number = int(ratio_of_bonafide_samples*training_samples)
validation_bonafide_number = int(ratio_of_bonafide_samples*validation_samples)

bonafide_label = 0

j = 0
k = 0

count = 30
i = 20000
print('Generating Data for training process no. {}...'.format(count+1))
while  i <  data_iterations*len_spoof_train_file:
    i+=1
    training_data[i%training_fill,:,:,0] = GenerateFeatures(spoof_train_files[i%len_spoof_train_file])
    if (i+1)%training_fill == 0:
        print("Last data point was generated with index {}".format(i))
        for p in range(training_fill):
            training_data[p+training_fill,:,:,0] = bonafide_train_data[(j+p)%bon_train_samples, :, :]
        j += training_fill

        training_labels = np.append((1-bonafide_label)*np.ones(training_samples-training_bonafide_number), 
                                        bonafide_label*np.ones(training_bonafide_number))
                                        
        validation_labels = np.append(bonafide_label*np.ones(validation_bonafide_number), 
                                        (1-bonafide_label)*np.ones(validation_samples-validation_bonafide_number))
        for p in range(validation_fill):
            validation_data[p,:,:,0] = GenerateFeatures(bonafide_valid_files[k%len_bonafide_valid_file])
            validation_data[p+validation_fill,:,:,0] = GenerateFeatures(spoof_valid_files[k%len_spoof_valid_file])
            k += 1
        
        print("Training Model 01...")
        model1.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 02...")
        model2.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 03...")
        model3.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 04...")
        model4.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 05...")
        model5.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 06...")
        model6.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 07...")
        model7.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 08...")
        model8.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 09...")
        model9.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        print("Training Model 10...")
        model10.fit(x = training_data, y = training_labels, batch_size = size_of_batch, epochs = epochs_per_iteration,
              validation_data = (validation_data, validation_labels), shuffle = True)
        
        if (count+1)%10 == 0:
            model1.save(driveLoc+'Models/21_june01/'+str((count+1)//10))
            model2.save(driveLoc+'Models/21_june02/'+str((count+1)//10))
            model3.save(driveLoc+'Models/21_june03/'+str((count+1)//10))
            model4.save(driveLoc+'Models/21_june04/'+str((count+1)//10))
            model5.save(driveLoc+'Models/21_june05/'+str((count+1)//10))
            model6.save(driveLoc+'Models/21_june06/'+str((count+1)//10))
            model7.save(driveLoc+'Models/21_june07/'+str((count+1)//10))
            model8.save(driveLoc+'Models/21_june08/'+str((count+1)//10))
            model9.save(driveLoc+'Models/21_june09/'+str((count+1)//10))
            model10.save(driveLoc+'Models/21_june10/'+str((count+1)//10))
        if count > 20 and (count+1)%20 == 0:
            alpha = keras.backend.eval(model1.optimizer.lr)
            print('Updating step size to {}...'.format(alpha/2))
            keras.backend.set_value(model1.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model2.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model3.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model4.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model5.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model6.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model7.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model8.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model9.optimizer.learning_rate, alpha/2)
            keras.backend.set_value(model10.optimizer.learning_rate, alpha/2)
        if(count+1 == 40):
            epochs_per_iteration = 5
        count+=1    
        print('Generating Data for training process no. {}...'.format(count+1))
        
        del training_labels
        del validation_labels
        gc.collect()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training Model 10...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/MLSP Project/Models/21_june01/9/assets
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/MLSP Project/Models/21_june02/9/assets
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/MLSP Project/Models/21_june03/9/assets
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/MLSP Project/Models/21_june04/9/assets
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/MLSP Project/Models/21_june05/9/assets
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/MLSP Project/Models/21_june06/9/assets
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/MLSP Project/Models/21_june07/9/assets
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/MLSP Project/Models/21_june08/9/assets
INFO:tensorflow:Assets written t

### Creating Data Set

In [None]:
bonafide_files = []
spoof_files = []
print('loading bonafide training file names...')
for i in range(training_bonafide_folders + 1):
    bonafide_files.extend(glob.glob(data_loc+'train/bonafide_'+str(i)+'/*.txt'))

print('loading spoof training file names...')
for i in range(training_spoof_folders + 1):
    spoof_files.extend(glob.glob(data_loc+'train/spoof_'+str(i)+'/*.txt'))

print('loading validation bonafide file names...')
for i in range(validation_bonafide_folders + 1):
    bonafide_files.extend(glob.glob(data_loc+'dev/bonafide_'+str(i)+'/*.txt'))

print('loading validation spoof file names...')
for i in range(validation_spoof_folders + 1):
    spoof_files.extend(glob.glob(data_loc+'dev/spoof_'+str(i)+'/*.txt'))

loading bonafide training file names...
loading spoof training file names...
loading validation bonafide file names...
loading validation spoof file names...


In [None]:
import random
random.shuffle(bonafide_files)
random.shuffle(spoof_files)

bonafide_test_set = bonafide_files[0:500]
spoof_test_set = spoof_files[0:4400]

with open(data_loc + "bonafide_test_set_files.txt", "w") as fp:
    json.dump(bonafide_test_set, fp)

with open(data_loc + "spoof_test_set_files.txt", "w") as fp:
    json.dump(spoof_test_set, fp)

bonafide_valid_set = bonafide_files[500:1000]
spoof_valid_set = spoof_files[4400:8800]

with open(data_loc + "bonafide_valid_set_files.txt", "w") as fp:
    json.dump(bonafide_valid_set, fp)

with open(data_loc + "spoof_valid_set_files.txt", "w") as fp:
    json.dump(spoof_valid_set, fp)

bonafide_train_set = bonafide_files[1000:]
spoof_train_set = spoof_files[8800:]

with open(data_loc + "bonafide_train_set_files.txt", "w") as fp:
    json.dump(bonafide_train_set, fp)

with open(data_loc + "spoof_train_set_files.txt", "w") as fp:
    json.dump(spoof_train_set, fp)

### Extracting files to exploit caching

In [None]:
for i in range(46):
    print("!unzip -o '/content/gdrive/My Drive/MLSP Project/CQCC/train/spoof_{}.zip' -d '/content/gdrive/My Drive/MLSP Project/CQCC/train/'".format(i))