In [47]:
import os
import numpy as np

In [48]:
import pathlib
allPaths = []
dataPath = pathlib.Path('Data')
for i in dataPath.rglob('*.wav'):
    allPaths.append(i)

In [49]:
def pathToLabel(path):
    labels = ['angry','disgust','fear','happy','neutral','surprise','sad']
    for label in labels:
        if (label in str(path).lower()): return label
    return None

In [50]:
labels = {
    'angry':0,
    'disgust':1,
    'fear':2,
    'happy':3,
    'neutral':4,
    'surprise':5,
    'sad':6
}
def labelToInt(label):
    return labels[label]

In [51]:
labeledPaths = np.array(list(map(lambda x: pathToLabel(x),allPaths)))

In [52]:
y = np.array(list(map(lambda x: labelToInt(x),labeledPaths)))

In [53]:
import librosa
data, sampling_rate = librosa.load(allPaths[0])
data2, sr2 = librosa.load(allPaths[1])

In [78]:
x = []
for i in range(len(allPaths)):
    try:
        a, sr = librosa.load(allPaths[i])
        feat = librosa.feature.melspectrogram(y=a,sr=sr)
        x.append(feat)
    except:
        y = np.delete(y,i)
        print('Path '+str(allPaths[i])+' not found.')

In [55]:
cnt = 0
for i in range(len(x)):
    cnt+=len(x[i][0])
print(cnt/len(x))

89.01821428571428


In [56]:
mx = 0
for i in range(len(x)):
    if (len(x[i][0])>mx): mx = len(x[i][0])
print(mx)

129


In [57]:
length = 100
def pad(x):
    newX = []
    for row in x:
        if (len(row)>=length): newX.append(row[0:length])
        else: newX.append(np.concatenate((row,[0]*(length-len(row)))))
    return np.array(newX)

In [58]:
padX = np.array(list(map(lambda a: pad(a),x)))

In [59]:
padX[0].shape

(128, 100)

In [60]:
padX.shape

(2800, 128, 100)

In [61]:
from sklearn.model_selection import train_test_split
# Using the first and last as an example
xTrain, xTest, yTrain, yTest = train_test_split(padX[1:len(padX)-1],y[1:len(y)-1],test_size=.2,random_state=50)
xTest, xVal, yTest, yVal = train_test_split(xTest,yTest,test_size=.5)

In [75]:
import tensorflow as tf
from tensorflow.keras import layers
model = tf.keras.Sequential()
model.add(layers.Input((128,100)))

model.add(layers.Bidirectional(layers.LSTM(128,return_sequences=True)))

model.add(layers.Conv1D(128,3))
model.add(layers.Conv1D(128,3))
model.add(layers.MaxPooling1D())

model.add(layers.Conv1D(64,3))
model.add(layers.Conv1D(64,3))
model.add(layers.MaxPooling1D())

model.add(layers.Bidirectional(layers.LSTM(64)))
model.add(layers.Dropout(.2))

model.add(layers.Dense(32))
model.add(layers.Dense(32))

model.add(layers.Dense(7,activation="softmax"))
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_11 (Bidirect  (None, 128, 256)          234496    
 ional)                                                          
                                                                 
 conv1d_49 (Conv1D)          (None, 126, 128)          98432     
                                                                 
 conv1d_50 (Conv1D)          (None, 124, 128)          49280     
                                                                 
 max_pooling1d_23 (MaxPooli  (None, 62, 128)           0         
 ng1D)                                                           
                                                                 
 conv1d_51 (Conv1D)          (None, 60, 64)            24640     
                                                                 
 conv1d_52 (Conv1D)          (None, 58, 64)          

In [76]:
model.fit(xTrain,yTrain,epochs=10,validation_data=(xVal,yVal))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x246b37a25f0>

In [77]:
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
testPreds = model.predict(xTest)
maxedTestPreds = np.array(list(map(lambda x: np.argmax(x),testPreds)))
print("Accuracy "+str(accuracy_score(maxedTestPreds,yTest)))
confusion_matrix(yTest,maxedTestPreds)

Accuracy 0.9892857142857143


array([[41,  0,  0,  0,  0,  0,  0],
       [ 0, 44,  0,  0,  0,  1,  0],
       [ 0,  0, 38,  0,  0,  0,  0],
       [ 0,  0,  0, 44,  0,  0,  0],
       [ 0,  0,  0,  0, 27,  0,  0],
       [ 0,  2,  0,  0,  0, 36,  0],
       [ 0,  0,  0,  0,  0,  0, 47]], dtype=int64)

In [65]:
labels

{'angry': 0,
 'disgust': 1,
 'fear': 2,
 'happy': 3,
 'neutral': 4,
 'surprise': 5,
 'sad': 6}

In [66]:
preds = model.predict(np.array([padX[0],padX[-1]]))
argMaxedPreds = [np.argmax(preds[0]),np.argmax(preds[1])]
print("Predicted Angry and Sad from data outside the train and val sets")
argMaxedPreds

Predicted Angry and Sad from data outside the train and val sets


[0, 6]

In [67]:
from IPython.display import Audio
print("Predicted Angry")
Audio(allPaths[0])

Predicted Angry


In [68]:
print("Predicted Sad")
Audio(allPaths[-1])

Predicted Sad
