In [1]:
import h5py
import librosa
import numpy as np
from sklearn.model_selection import train_test_split

file = h5py.File('E:/dataset_6ms.h5', 'r')

gender_labels = file['gender'][...].squeeze().astype(str)
print('loading data from disk...')

# female_indexes =  np.where(gender_labels == 'female')
indexes =  np.where(gender_labels == 'female')
y = file['label'][indexes]
X = file['mfcc'][indexes]
print('data loaded! calculating mfcc delta features for data...')

# print(f" {X.shape}, {y.shape}")

# extract delta and double delta features using librosa
X_deltas = np.zeros((X.shape[0], X.shape[1]*3, X.shape[2]))
print(X_deltas.shape)
for i in range(X.shape[0]):
    X_delta = librosa.feature.delta(X[i])
    X_delta2 = librosa.feature.delta(X[i], order=2)
    # np.vstack all the X, X_delta and X_delta2 
    new_features = np.vstack([X[i], X_delta, X_delta2])
    X_deltas[i] = new_features
    # print(X_deltas[i].shape)

print("done.")
print("creating test train split...")
X_train, X_test, y_train, y_test = train_test_split(X_deltas, y, test_size=0.33, random_state=32)
print(f"{X_train.shape}, {X_test.shape}, {y_train.shape}, {y_test.shape}")

loading data from disk...
data loaded! calculating mfcc delta features for data...
(24133, 39, 50)
done.
creating test train split...
(16169, 39, 50), (7964, 39, 50), (16169, 1), (7964, 1)


In [17]:

import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Model
from keras.layers import Dense, BatchNormalization, GlobalAveragePooling2D, GlobalMaxPooling2D, Add
from keras.layers import Flatten, Dropout, LeakyReLU, Permute, Activation
from keras.layers import Conv2D, MaxPooling2D, Reshape, AveragePooling2D
from keras.layers import Conv1D, Layer, Input, BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import adam_v2
import tensorflow as tf
import datetime
# import rms prop from keras

timestr = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
name = 'mfcc_resnet-'+timestr  # or 'cifar-10_plain_net_30-'+timestr
checkpoint_path = "checkpoints/"+name+"/cp-{epoch:04d}.ckpt"


es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15)
cp = cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=0
)
opt = adam_v2.Adam()


def res_block(x_in, channels_in, kernel: tuple, downsample=False):
    x = x_in
    x = Conv2D(channels_in, kernel, padding='same',
               strides=((1, 2)[int(downsample)]))(x_in)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.08)(x)

    x = Conv2D(channels_in, kernel, padding='same')(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.08)(x)
    if downsample:
        # match x_in to shape of x
        x_in = Conv2D(kernel_size=1, strides=2,
                      filters=channels_in, padding='same')(x_in)
    res = Add()([x, x_in])

    return res


feature_shape = X_train[0].shape
print(feature_shape)

inputs = Input(shape=feature_shape)
initial_layer = Reshape((feature_shape[0], feature_shape[1], 1))(inputs)
x_transpose = Permute((2, 1, 3))(initial_layer)
# x = initial_layer
x = x_transpose

x = BatchNormalization()(x)
# 1st residual block
x = Conv2D(32, (1, 1),  activation='linear')(x)
x = res_block(x , 32, (3, 5), True)
x = Dropout(0.2)(x)

# 2nd residual block
x = Conv2D(64, (1, 1),  activation='linear')(x)
x = res_block(x, 64, (3, 5), True)
x = Dropout(0.2)(x)

# 3rd residual block
x = Conv2D(128, (1, 1))(x)
x = res_block(x, 128, (3, 5), True)
x = Dropout(0.2)(x)

x_inital = res_block(x_transpose , 128, (1, 1), True)
x_inital = res_block(x_inital , 128, (1, 1), True)
x_inital = res_block(x_inital , 128, (1, 1), True)

# # another residual connection
print (x_inital.shape, x.shape)
x = Add()([x, x_inital])

x_avg = GlobalAveragePooling2D()(x)
x_global = GlobalMaxPooling2D()(x)

x = Add()([x_avg, x_global])
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(64, activation="relu")(x)
out = Dense(6, activation="softmax")(x)


model = Model(inputs=inputs, outputs=out)

model.summary()
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt, metrics=['sparse_categorical_accuracy'])


(39, 50)
(None, 7, 5, 128) (None, 7, 5, 128)
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 39, 50)]     0           []                               
                                                                                                  
 reshape_1 (Reshape)            (None, 39, 50, 1)    0           ['input_2[0][0]']                
                                                                                                  
 permute_1 (Permute)            (None, 50, 39, 1)    0           ['reshape_1[0][0]']              
                                                                                                  
 batch_normalization_13 (BatchN  (None, 50, 39, 1)   4           ['permute_1[0][0]']              
 ormalization)                                 

In [18]:
cnnhistory=model.fit(X_train, y_train, batch_size=1000, epochs=200,validation_data=(X_test, y_test), shuffle=True, callbacks=[es], verbose=1)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 41: early stopping


In [None]:
# my_model.save('my_model_984.h5')
acc = cnnhistory.history['sparse_categorical_accuracy']
val_acc = cnnhistory.history['val_sparse_categorical_accuracy']
epochs = range(1, len(acc) + 1)
loss = cnnhistory.history['loss']
val_loss = cnnhistory.history['val_loss']

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15,5), dpi=80)

ax[0].plot(epochs, acc,'--r', label='Training Acc')
ax[0].plot(epochs, val_acc, '-b', label='Validation Acc')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Accuracy')
ax[0].legend()
ax[0].set_title('Training and Validation accuracy')

ax[1].plot(epochs, loss,'--r' ,label='Training Loss')
ax[1].plot(epochs, val_loss, '-g',label='Validation Loss')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Loss')
ax[1].legend()
ax[1].set_title('Training and Validation Loss')

print("Model Accuracy Metrics: ")
plt.show()
plt.close()

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score
labels = ['neutral','calm','happy','sad','angry','fearful']
## Display the visualization of the Confusion Matrix.
y_pred = model.predict(X_test).argmax(axis=1)
# print(y_pred.argmax(axis=1))
fig, ax = plt.subplots(figsize=(8,8))
print("Model Accuracy: ", accuracy_score(y_test, y_pred))
ax = ConfusionMatrixDisplay.from_predictions(y_pred, y_test, display_labels=labels).plot(ax=ax)


In [13]:
from tensorflow import keras
old = keras.models.load_model('cnn_63.h5')
old.summary()

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_16 (Reshape)        (None, 39, 50, 1)         0         
                                                                 
 conv2d_56 (Conv2D)          (None, 39, 50, 32)        320       
                                                                 
 batch_normalization_45 (Bat  (None, 39, 50, 32)       128       
 chNormalization)                                                
                                                                 
 leaky_re_lu_71 (LeakyReLU)  (None, 39, 50, 32)        0         
                                                                 
 max_pooling2d_18 (MaxPoolin  (None, 19, 25, 32)       0         
 g2D)                                                            
                                                                 
 dropout_73 (Dropout)        (None, 19, 25, 32)      

In [16]:
from IPython.display import Image 
from keras.utils import vis_utils

vis_utils.plot_model(model, show_shapes=True, show_layer_names=True, to_file='model.png')

# Image('model.png')




You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.
