# Speech - Attention

In [1]:
import numpy as np
import pandas as pd

from keras.models import Model, load_model

from keras.layers import Input, Activation, Concatenate, Permute, Reshape, Flatten, Lambda, Dot, Softmax
from keras.layers import Add, Dropout, BatchNormalization, Conv2D, Reshape, MaxPooling2D, Dense, CuDNNLSTM, Bidirectional
from keras import backend as K
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
from keras import backend as K
from keras import optimizers
from keras_radam import RAdam

import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
from scipy.io import wavfile
import plotly.graph_objects as go
import plotly
from plotly.subplots import make_subplots
import winsound
import time
from keras.utils.vis_utils import plot_model

from IAHOS import IAHOS
from extraction_performances import extraction_performances
from hyperparams_initialization import hyperparams_initialization
from plots import plot_IAHOS,plot_confusion_matrix
from plots import plot_training_accuracy,plot_validation_accuracy,plot_test_scores
from plots import plot_output_NN

import keras
from keras import backend as K
from keras.models import Sequential,Input,Model,load_model
from keras.layers import Conv2D,Conv1D,MaxPooling2D,AveragePooling1D,MaxPooling1D
from keras.layers import Dense,Flatten,Dropout
from keras import initializers,optimizers,backend as k
from keras_radam import RAdam
from keras.callbacks import EarlyStopping,ModelCheckpoint
from sklearn.metrics import confusion_matrix,accuracy_score
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

Using TensorFlow backend.


In [2]:
def Classifier(network_input_shape, number_of_classes, optimizer, rnn_func = CuDNNLSTM):

    inputs = Input(shape=(int(network_input_shape[0]), int(network_input_shape[1]), 1))
    
    x = Permute((2,1,3)) (inputs)
    
    x = Conv2D(10, (5,1) , activation='relu', padding='same') (x)
    x = BatchNormalization() (x)
    x = Conv2D(1, (5,1) , activation='relu', padding='same') (x)
    x = BatchNormalization() (x)

    x = Lambda(lambda q: K.squeeze(q, -1), name='squeeze_last_dim') (x) #keras.backend.squeeze(x, axis)

    x = Bidirectional(rnn_func(64, return_sequences = True)) (x) # [b_s, seq_len, vec_dim]
    x = Bidirectional(rnn_func(64, return_sequences = True)) (x) # [b_s, seq_len, vec_dim]

    xFirst = Lambda(lambda q: q[:,49]) (x) #[b_s, vec_dim] #32
    query = Dense(128) (xFirst)

    #dot product attention
    attScores = Dot(axes=[1,2])([query, x]) 
    attScores = Softmax(name='attSoftmax')(attScores) #[b_s, seq_len]

    #rescale sequence
    attVector = Dot(axes=[1,1])([attScores, x]) #[b_s, vec_dim]

    x = Dense(64, activation = 'relu')(attVector)
    x = Dense(32)(x)

    output = Dense(number_of_classes, activation = 'softmax', name='output')(x)

    model = Model(inputs=[inputs], outputs=[output])
    
    # Optimizer choice
    if optimizer=='radam':
        Optimizer=RAdam()
    elif optimizer=='sgd':
        Optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)
    elif optimizer=='rmsprop':
        Optimizer=keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
    elif optimizer=='adagrad':
        Optimizer=keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
    elif optimizer=='adadelta':
        Optimizer=keras.optimizers.Adadelta(lr=1.0, rho=0.95,epsilon=None, decay=0.0)
    elif optimizer=='adam':
        Optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    elif optimizer=='adamax':
        Optimizer=keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
    else:
        Optimizer=keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
        
    model.compile(optimizer=Optimizer,loss='categorical_crossentropy',metrics=['accuracy'])
    
    return model

In [3]:
def is_a_number(s):
    try: 
        int(s)
        return True
    except ValueError:
        return False

In [4]:
network = "Attention"
features = ["filter_banks", "mfcc", "mfcc_deltas"]
dataset_folder = './dataset/'
results_folder = "./results/"
version_folder = "1567874784415"

In [None]:
for feature in features:
    
    print("### " + feature.upper() + " ###")
    
    dir_name = results_folder + version_folder
    if not os.path.exists(dir_name):
        os.mkdir(dir_name)
        
    dir_name = dir_name + "/" + network
    if not os.path.exists(dir_name):
        os.mkdir(dir_name)
    
    dir_name = dir_name + "/" + feature
    if not os.path.exists(dir_name):
        os.mkdir(dir_name)
        output_folder = dir_name + "/1"
        os.mkdir(output_folder)
    else:
        subfolder = [int(f) for f in os.listdir(dir_name) if is_a_number(f)]
        if len(subfolder) > 0:
            new_folder_number = np.max(subfolder) + 1
        else:
            new_folder_number = 1
        output_folder = dir_name + "/" + str(new_folder_number)
        os.mkdir(output_folder)
    
    print(" - dataset loading")
    # Dataset loading
    folder = dataset_folder + version_folder + "/" + feature 
    training_set=np.load(folder + '_training_set.npy')
    validation_set=np.load(folder + '_validation_set.npy')
    training_labels=np.load(folder + '_training_labels.npy')
    validation_labels=np.load(folder + '_validation_labels.npy')
    test_set=np.load(folder + '_test_set.npy')
    test_labels=np.load(folder + '_test_labels.npy')
    #words_name=np.load('words_name.npy')
    
    training_set = training_set.reshape((-1,training_set.shape[1],training_set.shape[2],1))
    validation_set = validation_set.reshape((-1,validation_set.shape[1],validation_set.shape[2],1))
    test_set = test_set.reshape((-1,test_set.shape[1],test_set.shape[2],1))
    
    # Training and validation shuffling
    randomize = np.arange(len(training_set))
    np.random.shuffle(randomize)
    training_set = training_set[randomize]
    training_labels = training_labels[randomize]
    
    randomize = np.arange(len(validation_set))
    np.random.shuffle(randomize)
    validation_set = validation_set[randomize]
    validation_labels = validation_labels[randomize]
    
    # Training
    training_accuracy=[]
    validation_accuracy=[]
    test_scores=[]
    optimizers = ['radam']

    network_input = [training_set.shape[1], training_set.shape[2]]
    epochs=50
    j=0
    for optimizer in tqdm(optimizers):
        
        mc = ModelCheckpoint(output_folder + '/best_model_' + optimizer + '.h5', monitor='val_loss', mode='min', verbose=0, save_best_only=True)
        classifier = Classifier((training_set.shape[1], training_set.shape[2]), training_labels.shape[1], optimizer)
        history=classifier.fit(training_set, training_labels, validation_data=[validation_set,validation_labels],
                               epochs=epochs, batch_size=1024, verbose=2, callbacks=[mc])
        training_accuracy.append(history.history['acc'])
        validation_accuracy.append(history.history['val_acc'])
        
        np.save(output_folder + "/training_accuracy_" + optimizer + '.npy', training_accuracy)
        np.save(output_folder + "/validation_accuracy_" + optimizer + '.npy', validation_accuracy)
        
        j+=1
        if j<7:
            K.clear_session()

In [5]:
file = open(results_folder + version_folder + "/" + network + "/test_results.txt", "w+") 
for feature in features:
    print("### " + feature.upper() + " ###")
    folder = dataset_folder + version_folder + "/" + feature
    model_folder = results_folder + version_folder + "/" + network + "/" + feature
    words_name=np.load(dataset_folder + version_folder + "/" + 'words_name.npy')
    test_set=np.load(folder + '_test_set.npy')
    test_labels=np.load(folder + '_test_labels.npy')
    test_set = test_set.reshape((-1,test_set.shape[1],test_set.shape[2],1))
    
    dir_name = results_folder + version_folder + "/" + network + "/" + feature
    subfolder = [int(f) for f in os.listdir(dir_name) if is_a_number(f)]
    actual_folder = dir_name + "/" + str(subfolder[0])
    files = [file for file in os.listdir(actual_folder)]
    training_paths = [f for f in files if str(f).find("training_accuracy_") >= 0]
    validation_paths = [f for f in files if str(f).find("validation_accuracy_") >= 0]
    
    training_accuracies = np.array([np.load(actual_folder + "/" + p) for p in training_paths])[0]
    validation_accuracies = np.array([np.load(actual_folder + "/" + p) for p in validation_paths])[0]
    '''
    plt.figure(figsize=(8, 8))
    plt.plot(training_accuracies[0], label="radam")
    plt.plot(training_accuracies[1], label="sgd")
    plt.plot(training_accuracies[2], label="rmsprop")
    plt.plot(training_accuracies[3], label="adagrad")
    plt.plot(training_accuracies[4], label="adadelta")
    plt.plot(training_accuracies[5], label="adam")
    plt.plot(training_accuracies[6], label="adamax")
    plt.plot(training_accuracies[7], label="nadam")
    plt.title((feature + " training accuracies").upper())
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.xlim(-1, training_accuracies.shape[1])
    plt.ylim(0.5, 1.05)
    plt.legend()
    plt.savefig(results_folder + version_folder + "/" + network + "/" + "training_accuracies_" + feature + ".jpg")
    
    plt.figure(figsize=(8, 8))
    plt.plot(validation_accuracies[0], label="radam")
    plt.plot(validation_accuracies[1], label="sgd")
    plt.plot(validation_accuracies[2], label="rmsprop")
    plt.plot(validation_accuracies[3], label="adagrad")
    plt.plot(validation_accuracies[4], label="adadelta")
    plt.plot(validation_accuracies[5], label="adam")
    plt.plot(validation_accuracies[6], label="adamax")
    plt.plot(validation_accuracies[7], label="nadam")
    plt.title((feature + " validation accuracies").upper())
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.xlim(-1, validation_accuracies.shape[1])
    plt.ylim(0.5, 1.05)
    plt.legend()
    plt.savefig(results_folder + version_folder + "/" + network + "/" + "validation_accuracies_" + feature + ".jpg")
'''
    optimizers = ['radam']
    
    
    file.write("### " + feature.upper() + " ###\n")
    
    network_input = [test_set.shape[1], test_set.shape[2]]
    j=0
    test_scores=[]
    for optimizer in optimizers:
        classifier=Classifier(network_input, test_labels.shape[1], optimizer)
        classifier.load_weights(model_folder + '/1/best_model_'+str(optimizer)+'.h5')
        start_time = int(round(time.time() * 1000))
        predictions = classifier.predict(test_set)
        end_time = int(round(time.time() * 1000))
        new_test_labels = np.zeros(test_labels.shape[0])
        for i in range(test_labels.shape[0]):
            new_test_labels[i]=np.argmax(test_labels[i])
        y_pred = np.zeros(test_labels.shape[0])
        for i in range(test_labels.shape[0]):
            y_pred[i]=np.argmax(predictions[i])
        score=accuracy_score(y_true=new_test_labels,y_pred=y_pred, normalize=True)
        test_scores.append(score)
        epoch = np.argmax(validation_accuracies[j])
        print(optimizer + " score = " + str(score) + " in epoch " + str(epoch))
        file.write(optimizer + "val score = " + str(score) + " in epoch " + str(epoch) + ", inference time = " + (str(end_time - start_time)) + " ms \n")
        
        r = results_folder + version_folder + "/" + network + "/"
        plot_confusion_matrix(new_test_labels,y_pred,words_name,feature,optimizer,r)
        
        j+=1
        del classifier
    #y = np.zeros((8,1))
    #for i in range(8):
    #    y[i,0]=test_scores[i]
        
file.close() 

### FILTER_BANKS ###
radam score = 0.8781720430107527 in epoch 40
### MFCC ###
radam score = 0.89 in epoch 47
### MFCC_DELTAS ###
radam score = 0.8878494623655914 in epoch 41


<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>