STAGE 1 DATA PREPROCESSING

In [None]:
#GET THE DATA FIRST

In [None]:
import tensorflow as tf
import librosa as lr
import numpy as np
import matplotlib.pyplot as plt
import json
import random
import os
from IPython.display import display, Audio
from sklearn.model_selection import train_test_split

In [None]:
with open('../util/tokenizer.txt','r') as infile:
    data = json.load(infile)
    tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(data)
VOCAB_SIZE = len(tokenizer.word_index)
TEXT_PADDING = 8
WINDOW_SIZE = 1000 #in ms
SLIDE_STRIDE = 300 # in ms

EPOCHS = 3
BATCH_SIZE = 64
LEARNING_RATE = 0.0001

In [None]:
#EXTRACT INPUTS AND TARGETS

In [None]:
X_mfccs = np.load('../util/X_mfccs_error_4000.npy')
X_transcripts = np.load('../util/X_transcripts_error_4000.npy')
clips = np.load('../util/X_clips_error_4000.npy')
Y = np.load('../util/labels_error_4000.npy')
print(X_mfccs.shape)

In [None]:
#LOAD TRAIN AND TEST SPLITS

In [None]:
X_train_mfcc, X_test_mfcc, X_train_transcript, X_test_transcript, clips_train, clips_test, y_train, y_test = train_test_split(X_mfccs,X_transcripts,clips,Y,test_size=0.1)
print(X_train_mfcc.shape)
print(X_train_transcript.shape)
print(clips_train.shape)
print(y_train.shape)

In [None]:
mu = np.mean(X_train_mfcc,axis=0)
std_dev = np.std(X_train_mfcc,axis=0)
X_train_mfcc -= mu
X_test_mfcc -= mu
X_train_mfcc /= std_dev
X_test_mfcc /= std_dev
print(X_train_mfcc.shape)

In [None]:
X_train_mfcc = X_train_mfcc[...,np.newaxis]
X_test_mfcc = X_test_mfcc[...,np.newaxis]

In [None]:
X_train_mfcc[0].shape
X_train_mfcc.shape

STAGE 2 BUILDING AND COMPILING THE MODEL

In [None]:
#BUILD THE MODEL

In [None]:
# model_mfcc = tf.keras.Sequential()

#conv layer 1
mfcc_input = tf.keras.layers.Input(shape=X_train_mfcc[0].shape, name='mfcc_input')
conv1 = tf.keras.layers.Conv2D(128,(3,3),
                                 activation='relu',
                                 input_shape=X_train_mfcc[0].shape,
                                 kernel_regularizer=tf.keras.regularizers.l2(0.001))(mfcc_input)
norm1 = tf.keras.layers.BatchNormalization()(conv1)
pool1 = tf.keras.layers.MaxPool2D((3,3),strides=(2,2),padding='same')(norm1)

#conv layer 2
conv2 = tf.keras.layers.Conv2D(64,(3,3),
                                 activation='relu',
                                 kernel_regularizer=tf.keras.regularizers.l2(0.001))(pool1)
norm2 = tf.keras.layers.BatchNormalization()(conv2)
pool2 = tf.keras.layers.MaxPool2D((3,3),strides=(2,2),padding='same')(norm2)


#conv layer 3
conv3 = tf.keras.layers.Conv2D(32,(2,2),
                                 activation='relu',
                                 kernel_regularizer=tf.keras.regularizers.l2(0.001))(pool2)
norm3 = tf.keras.layers.BatchNormalization()(conv3)
pool3 = tf.keras.layers.MaxPool2D((2,2),strides=(2,2),padding='same')(norm3)



# #flatten output
flat = tf.keras.layers.Flatten()(pool3)
dense1 = tf.keras.layers.Dense(64,activation = 'relu')(flat)
drop1 = tf.keras.layers.Dropout(0.25)(dense1)

dense2 = tf.keras.layers.Dense(64,activation = 'relu')(drop1)
drop2 = tf.keras.layers.Dropout(0.25)(dense2)



In [None]:
# model_trans = tf.keras.Sequential()
trans_input = tf.keras.layers.Input(shape=X_train_transcript[0].shape, name='trans_input')
emb = tf.keras.layers.Embedding(VOCAB_SIZE, 128, input_length=TEXT_PADDING)(trans_input)

#LMST Layer 1
lstm1 = tf.keras.layers.LSTM(128,activation='relu',return_sequences = True)(emb)
lstm_drop_1 = tf.keras.layers.Dropout(0.25)(lstm1)

#LMST Layer 2
lstm2 = tf.keras.layers.LSTM(128,activation='relu')(lstm_drop_1)
lstm_drop_2 = tf.keras.layers.Dropout(0.25)(lstm2)

#Dense 
lstm_dense_1 = tf.keras.layers.Dense(64,activation='relu')(lstm_drop_2)
lstm_drop_3 = tf.keras.layers.Dropout(0.25)(lstm_dense_1)

#Dense 
lstm_dense_2 = tf.keras.layers.Dense(64,activation='relu')(lstm_drop_3)
lstm_drop_4 = tf.keras.layers.Dropout(0.25)(lstm_dense_2)

In [None]:
#output layer
concat = tf.keras.layers.concatenate([drop2,lstm_drop_4])
output = tf.keras.layers.Dense(1,activation='sigmoid')(concat)
# print(concat.shape)
# model.add()
# model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
model = tf.keras.Model(inputs=[mfcc_input,trans_input],outputs=output)

In [None]:
#COMPILE

In [None]:
op = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(loss='binary_crossentropy',optimizer=op, metrics=['accuracy'])
model.summary()

In [None]:
#FIT

In [None]:
result = model.fit([X_train_mfcc,X_train_transcript],y_train, epochs = EPOCHS, batch_size = BATCH_SIZE,validation_split = 0.2)

In [None]:
history = result.history
tr_acc = history['accuracy']
val_acc = history['val_accuracy']
epochs = range(1, len(tr_acc)+1)

In [None]:
plt.figure(figsize=(12,9))
plt.plot(epochs,tr_acc,'bo',label='Training acc')
plt.plot(epochs,val_acc,'b',label='Validation acc')
plt.title('Training and Val Accuracy for Signals + Semantics')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc='lower right')
plt.ylim((0.6,1))
plt.show()

In [None]:
#INFERENCE:
X_test_mfcc

In [None]:
results = model.evaluate([X_test_mfcc,X_test_transcript],y_test)

In [None]:
#ERROR ANALYSIS:

In [None]:
predictions = model.predict([X_test_mfcc,X_test_transcript])
valid_indexes = []
invalid_indexes=[]
for i in range(len(y_test)):
    if(y_test[i] == 0):
        invalid_indexes.append(i)
    else:
        valid_indexes.append(i)

In [None]:
def get_pred(val):
    return np.around(val)
print("PERFORMANCE ON POSITIVE EXAMPLES:")
total_pos = len(valid_indexes)
total_right_pops = 0
false_neg_clips = []
for i in valid_indexes:
    if(get_pred(predictions[i]) == get_pred(y_test[i])):
        total_right_pops += 1
    else:
        print(clips_test[i])
        false_neg_clips.append(clips_test[i])
        print("Prediction: ",get_pred(predictions[i]),". Label: ", get_pred(y_test[i]))
    
print("TOTAL POSITIVE EXMPLES: ", total_pos)
print("TOTAL CORRECT POSITIVE:", total_right_pops)
print("POSITIVE EXAMPLES ACC: ", total_right_pops/total_pos)

In [None]:
print("PERFORMANCE ON NEGATIVE EXAMPLES:")
total_neg = len(invalid_indexes)
total_right_neg = 0
false_pos_clips = []
for i in invalid_indexes:
    if(get_pred(predictions[i]) == get_pred(y_test[i])):
        total_right_neg += 1
    else:
        print(clips_test[i])
        false_pos_clips.append(clips_test[i])
        print("Prediction: ", get_pred(predictions[i]),". Label: ",get_pred(y_test[i]))
print("TOTAL NEGATIVE EXMPLES: ", total_neg)
print("TOTAL CORRECT NEGATIVE:", total_right_neg)
print("NEGATIVE EXAMPLES ACC: ", total_right_neg/total_neg)

In [None]:
print("TOTAL EXMPLES: ", total_neg + total_pos)
print("TOTAL CORRECT :", total_right_neg + total_right_pops)
print("ACC: ", (total_right_neg + total_right_pops)/(total_neg + total_pos))

In [None]:
false_pos_clips = np.array(false_pos_clips)
false_neg_clips = np.array(false_neg_clips)
np.save('../util/false_positives_4000.npy',false_pos_clips)
np.save('../util/false_negatives_4000.npy',false_neg_clips)

In [None]:
#OLD MODEL

In [None]:
model = tf.keras.Sequential()

#conv layer 1
model.add(tf.keras.layers.Conv2D(64,(3,3),
                                 activation='relu',
                                 input_shape=X_train[0].shape,
                                 kernel_regularizer=tf.keras.regularizers.l2(0.001)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D((3,3),strides=(2,2),padding='same'))

#conv layer 2
model.add(tf.keras.layers.Conv2D(32,(3,3),
                                 activation='relu',
                                 kernel_regularizer=tf.keras.regularizers.l2(0.001)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D((3,3),strides=(2,2),padding='same'))

#conv layer 3
model.add(tf.keras.layers.Conv2D(32,(2,2),
                                 activation='relu',
                                 kernel_regularizer=tf.keras.regularizers.l2(0.001)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D((2,2),strides=(2,2),padding='same'))

#flatten output
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64,activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.3))

#flatten output
model.add(tf.keras.layers.Dense(64,activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.3))

#output layer
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))

In [None]:
op = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
model.compile(loss='binary_crossentropy',optimizer=op, metrics=['accuracy'])
model.summary()

TRAIN MODEL

In [None]:
model.fit(X_train,y_train, epochs = EPOCHS, batch_size = BATCH_SIZE,validation_split = 0.2)