# Build a MLP model

# Load the data preprocessed in the previous notebook

In [1]:
%store -r x_train
%store -r x_test
%store -r y_train
%store -r y_test
%store -r yy
%store -r le

# Architecture

In [3]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [4]:
num_labels = yy.shape[1]

# Construct the modedl
model = Sequential()

model.add(Dense(256, input_shape=(40, )))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_labels))
model.add(Activation('softmax'))

## Compile the model

In [5]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

## Display architecture

In [8]:
model.summary()

# Calculate pre-training accuracy
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print(f'Pre-training accuracy: {accuracy}%')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               10496     
                                                                 
 activation (Activation)     (None, 256)               0         
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 256)               65792     
                                                                 
 activation_1 (Activation)   (None, 256)               0         
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                2

# Training

In [9]:
from keras.callbacks import ModelCheckpoint
from datetime import datetime

num_epochs = 100
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_mlp.hdf5',
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train,
          y_train,
          batch_size=num_batch_size,
          epochs=num_epochs,
          validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)

duration = datetime.now() - start
print(f'Training completed in time: {duration}')

Epoch 1/100
Epoch 00001: val_loss improved from inf to 2.22190, saving model to saved_models\weights.best.basic_mlp.hdf5
Epoch 2/100
Epoch 00002: val_loss improved from 2.22190 to 2.12806, saving model to saved_models\weights.best.basic_mlp.hdf5
Epoch 3/100
Epoch 00003: val_loss improved from 2.12806 to 1.92011, saving model to saved_models\weights.best.basic_mlp.hdf5
Epoch 4/100
Epoch 00004: val_loss improved from 1.92011 to 1.77915, saving model to saved_models\weights.best.basic_mlp.hdf5
Epoch 5/100
Epoch 00005: val_loss improved from 1.77915 to 1.62822, saving model to saved_models\weights.best.basic_mlp.hdf5
Epoch 6/100
Epoch 00006: val_loss improved from 1.62822 to 1.53800, saving model to saved_models\weights.best.basic_mlp.hdf5
Epoch 7/100
Epoch 00007: val_loss improved from 1.53800 to 1.40326, saving model to saved_models\weights.best.basic_mlp.hdf5
Epoch 8/100
Epoch 00008: val_loss improved from 1.40326 to 1.30746, saving model to saved_models\weights.best.basic_mlp.hdf5
Epoc

## Test the model

In [10]:
score = model.evaluate(x_train, y_train, verbose=0)
print(f'Training acurracy: {score[1]}')

score = model.evaluate(x_test, y_test, verbose=0)
print(f'Training acurracy: {score[1]}')

Training acurracy: 0.9454545378684998
Training acurracy: 0.8900973200798035


## Predictions

In [12]:
import librosa
import numpy as np

In [13]:
def extract_feature(file_name):

    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)

    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None, None

    return np.array([mfccsscaled])

In [32]:
def print_prediction(file_name):
    prediction_feature = extract_feature(file_name)

    predicted_vector = np.argmax(model.predict(prediction_feature))
    predicted_class = le.inverse_transform(predicted_vector)
    print("The predicted class is:", predicted_class[0], '\n')

    predicted_proba_vector = model.predict_proba(prediction_feature)
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)):
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

# Validation with samples

In [31]:
file_name = 'samples/100852-0-0-0.wav'
print_prediction(file_name)

ValueError: y should be a 1d array, got an array of shape () instead.