
# Best Performing Model

We will load the fully-trained convolutional neural network trained on the previous jupyter notebook with logistic regression and Naive Bayes

In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from scipy import signal, stats
from scipy.io import wavfile
from tensorflow.keras.preprocessing.sequence import pad_sequences
import csv
import keras
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from sklearn.preprocessing import LabelEncoder
from pydub import AudioSegment


model = tf.keras.models.load_model('./model12')
model.summary()

2021-12-09 20:44:57.197833: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential_46"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_137 (Conv2D)          (None, 125, 22, 32)       832       
_________________________________________________________________
conv2d_138 (Conv2D)          (None, 121, 18, 32)       25600     
_________________________________________________________________
batch_normalization_47 (Batc (None, 121, 18, 32)       128       
_________________________________________________________________
activation_35 (Activation)   (None, 121, 18, 32)       0         
_________________________________________________________________
max_pooling2d_113 (MaxPoolin (None, 60, 9, 32)         0         
_________________________________________________________________
dropout_36 (Dropout)         (None, 60, 9, 32)         0         
_________________________________________________________________
conv2d_139 (Conv2D)          (None, 58, 7, 64)       

### Loading the test spectrograms 

In [16]:
PATH = './train/train_new/train_'
TEST_PATH = './test/test_new/test_'

def load_speeches_test(path):
    all_waves = []
    for i in range(24750):
        file = path + str(i) + '.wav'
        _, samples = wavfile.read(file)
        all_waves.append(samples)
    return all_waves
def get_spectrograms(waves):
    sample_rate = 8000
    spectros = []
    freqs = []
    tims = []
    for wav in waves:
        frequencies, times, spectrogram = signal.spectrogram(wav, sample_rate)
        freqs.append(frequencies)
        tims.append(times)
        spectros.append(spectrogram)
    return freqs,tims,spectros

test_waves = load_speeches_test(TEST_PATH)
_, _, test_spectros = get_spectrograms(test_waves)
test_spectros = np.array(test_spectros)
test_spectros = test_spectros.reshape(24750, 129, 26, 1)



### Load training data to retraing our model after every prediction

In [19]:
def load_speeches(path):
    all_waves = []
    for i in range(18000):
        file = path + str(i) + '.wav'
        _, samples = wavfile.read(file)
        all_waves.append(samples)
    data = pd.read_csv('train.csv')
    labels = [data.iloc[:, 1][i] for i in range(18000)]
    return all_waves,labels
def append_43(all_waves, labels, intersection):
    for i in intersection:
        file = TEST_PATH + str(i) + '.wav'
        _, samples = wavfile.read(file)
        all_waves.append(samples)
        labels.append(43)
    return all_waves, labels
intersection = np.loadtxt("./intersection.txt").astype(int)

all_waves,labels = load_speeches(PATH)
all_waves, labels = append_43(all_waves, labels, intersection)
labelencoder = LabelEncoder().fit(labels)
encoded_labels = tf.keras.utils.to_categorical(labelencoder.transform(labels), 6)

freqs,tims,spectros = get_spectrograms(all_waves)
spectros = np.array(spectros) #spectros[0].shape --> (129, 26)
spectros = spectros.reshape(len(all_waves), 129, 26, 1)
X, X_test, Y, Y_test = train_test_split(spectros, encoded_labels, test_size=0.15, random_state=98)

We used this fully trained model to continuously generate predictions, thresholded at 99.99 validation accuracy, terminating after 51 predictions

In [20]:
model.fit(X,Y,batch_size=128,epochs=1,validation_data=(X_test,Y_test)) #initialize model history

i = 0
while i<51:
    if model.history.history['val_accuracy'][0] < 0.9999: #if validation accuracy not 99.99, don't take predictions
        print('skipped, too low')
        model.fit(X,Y,batch_size=128,epochs=1,validation_data=(X_test,Y_test))
        continue
    predictions = model.predict(test_spectros)
    inverse_predictions = np.array([ np.array([i, np.argmax(prediction)]) for i, prediction in enumerate(predictions)])
    inverse_predictions[:, 1] = labelencoder.inverse_transform(inverse_predictions[:, 1])
    np.savetxt(f"./predictions/prediction{i}.txt", inverse_predictions[:, 1])
    print(f'iteration {i}')
    model.fit(X,Y,batch_size=128,epochs=1,validation_data=(X_test,Y_test))
    i+=1

2021-12-09 20:57:12.365069: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)


skipped, too low
skipped, too low
 12/135 [=>............................] - ETA: 1:29 - loss: 0.0044 - accuracy: 0.9993

KeyboardInterrupt: 

As we threshold validation accuracy higher, this can take a while (I stopped it early)! We proceed by loading predictions we already obtained from overnight training.

In [22]:
all_predictions = np.loadtxt("./predictions_last/prediction0.txt").astype(int).reshape(24750, 1) #shape:(24750, 50)
for i in range(1,51):
     next_prediction = np.loadtxt(f"./predictions_last/prediction{i}.txt").astype(int).reshape(24750,1)
     all_predictions = np.append(all_predictions, next_prediction, axis=1)

all_predictions.shape


(24750, 51)

Checking where predictions aren't all the same and analyzing statistics

In [26]:
modes = stats.mode(all_predictions, axis=1)[0].flatten() #shape:(24750, 1)
find_discrepancies = np.array([np.all(mode == row_prediction) for mode, row_prediction in zip(modes, all_predictions)])
discrepancies = np.argwhere(find_discrepancies==False)
print(f'We have {len(discrepancies)} predictions that disagree ')
percentage_equals_mode = np.array( [ ((all_predictions[i] == modes[i]).sum() /51) for i in range(24570) ] )
print(f'Predictions where mode is less than 98% of all predictions : {np.argwhere(percentage_equals_mode<0.98).shape[0]}')
print(f'Predictions where mode is less than 70% of all predictions : {np.argwhere(percentage_equals_mode<0.7).shape[0]}')

We have 209 predictions that disagree 
Predictions where mode is less than 98% of all predictions : 83
Predictions where mode is less than 70% of all predictions : 4


In [29]:
array = np.array(range(24750)).reshape(24750,1)
inverse_predictions = np.append(array, modes.reshape(24750, 1), axis=1)
df = pd.DataFrame(inverse_predictions, columns=['ID', 'Label'])
df

Unnamed: 0,ID,Label
0,0,31
1,1,32
2,2,43
3,3,43
4,4,42
...,...,...
24745,24745,32
24746,24746,32
24747,24747,41
24748,24748,41


Lastly, we regenerate the intersection of 43 labels to be replaced - should approach 2250

In [32]:
intersection = np.argwhere(all_predictions[:, 0] ==43).flatten()
for i in range(1,26):
    pred = np.argwhere(all_predictions[:, i] ==43).flatten()
    intersection = np.intersect1d(intersection, pred)
print(f'This is how many 43 labels that 51 models agree on: { intersection.shape[0]}')
print('IDs of 43 predictions:')
intersection

This is how many 43 labels that 51 models agree on: 2233
IDs of 43 predictions:


array([    2,     3,     7, ..., 24711, 24731, 24742])