In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
import os
import librosa
# import librosa.display
# import matplotlib.pyplot as plt
# from IPython.display import Audio

In [2]:
# load data
data = "AudioWAV/"

dir_list = os.listdir(data)
# print(dir_list)

In [3]:
emotion = []
file_path = []

for file in dir_list:
    part = file.split('_')
    if part[2] == 'ANG':
        emotion.append(0)
    elif part[2] == 'DIS':
        emotion.append(1)
    elif part[2] == 'FEA':
        emotion.append(2)
    elif part[2] == 'HAP':
        emotion.append(3)
    elif part[2] == 'NEU':
        emotion.append(4)
    elif part[2] == 'SAD':
        emotion.append(5)
    else:
        emotion.append(6)
    file_path.append(data + file)

In [4]:
# create dataframe
emotion_df = pd.DataFrame(emotion, columns = ['label'])
# print(emotion_df)
path_df = pd.DataFrame(file_path,columns = ['path'])
# print(path_df)
data_df = pd.concat([emotion_df,path_df],axis=1)
print(data_df)
data_df.to_csv('data_test.csv',index=False)

      label                          path
0         0  AudioWAV/1001_DFA_ANG_XX.wav
1         1  AudioWAV/1001_DFA_DIS_XX.wav
2         2  AudioWAV/1001_DFA_FEA_XX.wav
3         3  AudioWAV/1001_DFA_HAP_XX.wav
4         4  AudioWAV/1001_DFA_NEU_XX.wav
...     ...                           ...
7437      1  AudioWAV/1091_WSI_DIS_XX.wav
7438      2  AudioWAV/1091_WSI_FEA_XX.wav
7439      3  AudioWAV/1091_WSI_HAP_XX.wav
7440      4  AudioWAV/1091_WSI_NEU_XX.wav
7441      5  AudioWAV/1091_WSI_SAD_XX.wav

[7442 rows x 2 columns]


In [5]:
# ----------------------------------------------------------
# data visualization

# IF WE HAVE TIME, WE COULD DO THIS PART
# -----------------------------------------------------------

In [6]:
# Extract feature(MFCC,MEL,CONTRAST,CHROMA)

df = pd.DataFrame(columns=['feature'])
counter = 0
for label, path in enumerate(data_df.path):
    # https: // librosa.org / doc / main / generated / librosa.load.html
    data, sample_rate = librosa.load(path,res_type='kaiser_fast')
    sample_rate = np.array(sample_rate)

    # STFT
    stft = np.abs(librosa.stft(data))
    result = np.array([])

    # spectral contrast
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, contrast))

    # Chroma
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft))
    df.loc[counter] = [chroma_stft]

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data,sr=sample_rate).T,axis=0)
    result = np.hstack((result, mfcc))

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms))

    # MEL
    mel = np.mean(librosa.feature.melspectrogram(data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel))
    
#     # Tonnetz
#     Tonnetz = np.mean(librosa.feature.tonnetz(data, sr=22050, chroma=None).T, axis=0)
#     result = np.hstack((result, Tonnetz))
    
    df.loc[counter] = [result]
    counter = counter +1
    
df = pd.DataFrame(df['feature'].values.tolist())
feature = pd.concat([emotion_df, df], axis=1)
feature = feature.to_csv('feature_test.csv',index=False)
df = df.fillna(0)
print(df)
print(data_df.path.shape)



            0          1          2          3          4          5    \
0     20.665973  13.837704  17.602001  16.122210  19.265245  18.448060   
1     19.175850  13.283503  16.042372  14.837284  18.154683  17.212843   
2     20.424459  15.250732  17.076782  15.769927  18.830277  17.358493   
3     19.758952  14.592866  17.149728  15.582128  19.181371  17.833759   
4     18.518519  13.345799  17.008969  15.237777  18.579113  18.544686   
...         ...        ...        ...        ...        ...        ...   
7437  16.382885  14.767572  15.895687  15.811464  16.937577  16.794025   
7438  17.276781  15.676635  16.071182  15.034707  17.490497  16.381528   
7439  16.099104  14.620276  17.171196  16.677368  17.334266  17.110930   
7440  16.011852  14.242217  16.271848  16.366024  17.018290  17.100947   
7441  15.688974  13.864524  15.947184  16.232368  17.207940  16.606170   

            6         7         8         9    ...           158  \
0     46.351879  0.659947  0.701333  0.6900

In [7]:
x_train, x_test, y_train, y_test = train_test_split(df, emotion_df, test_size=0.2, shuffle=True)
# scaling data with sklearn's RobustScaler
scaler = RobustScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
print(y_test)

(5953, 168)
(1489, 168)
(5953, 1)
(1489, 1)
      label
4173      1
6445      5
3814      3
12        2
3683      2
...     ...
1987      4
5462      5
124       2
148       2
7192      2

[1489 rows x 1 columns]


In [8]:
# Add a dimension, in order to makes the data and the model compatible
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
print(x_train.shape)
print(x_test.shape)

(5953, 168, 1)
(1489, 168, 1)


In [9]:
import keras
from keras.models import Sequential
from keras.layers import Conv1D, Activation, Dropout, MaxPooling1D, Flatten, Dense, BatchNormalization

In [10]:
model = Sequential()
model.add(Conv1D(256, 8, padding='same', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Conv1D(256, kernel_size=5, strides=1, padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Conv1D(128, kernel_size=5, strides=1, padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))

model.add(Conv1D(64, kernel_size=5, strides=1, padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Flatten())
model.add(Dense(units=32))
model.add(Activation('relu'))
model.add(Dropout(0.3))

model.add(Dense(units=8, activation='sigmoid'))
opt = keras.optimizers.Adam(lr=0.0001)
model.compile(optimizer=opt, loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 168, 256)          2304      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 84, 256)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 84, 256)           327936    
_________________________________________________________________
activation (Activation)      (None, 84, 256)           0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 42, 256)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 42, 128)           163968    
_________________________________________________________________
activation_1 (Activation)    (None, 42, 128)           0

In [11]:
cnnhistory = model.fit(x_train, y_train, epochs=50, verbose=0)
print("Accuracy of our model on test data : ", model.evaluate(x_test,y_test)[1]*100 , "%")

# score = model.evaluate(x_test, y_test, verbose=0)
# print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy of our model on test data :  44.99664306640625 %


In [14]:
# from sklearn.preprocessing import OneHotEncoder
import numpy as np
pred_test = model.predict(x_test)
print(pred_test.shape)
# encoder = OneHotEncoder()
y_pred = np.array(y_pred)
print(y_pred)
y_test = np.array(y_test)
print(y_test)
# y_test = encoder.inverse_transform(y_test)
# df = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])

df['Predicted Labels'] = y_pred.flatten()
df['Actual Labels'] = y_test.flatten()

(1489, 8)
[[2.0966530e-03 4.7179511e-01 3.0835739e-01 ... 9.7553122e-01
  6.0040139e-07 4.8222745e-08]
 [6.7973435e-03 2.4422112e-01 3.1934974e-01 ... 9.5826435e-01
  9.2439586e-06 8.8424650e-07]
 [8.1042838e-01 1.2272149e-01 1.4944679e-01 ... 8.0990791e-03
  1.5937710e-05 8.5048150e-06]
 ...
 [4.5298070e-02 3.8749814e-01 5.3516394e-01 ... 5.0236952e-01
  5.7893991e-04 2.2125244e-04]
 [8.3762705e-01 1.3875446e-01 7.4188411e-03 ... 1.0635257e-03
  6.4342816e-09 2.0609896e-08]
 [1.9649923e-02 4.9435955e-01 6.6419554e-01 ... 8.5258037e-01
  1.9936298e-05 5.8121936e-06]]
[[1]
 [5]
 [3]
 ...
 [2]
 [2]
 [2]]


ValueError: Length of values (11912) does not match length of index (7442)

In [None]:
import matplotlib.pyplot as plt

epochs = [i for i in range(50)]
fig , ax = plt.subplots(1,2)
train_acc = cnnhistory.history['accuracy']
train_loss = cnnhistory.history['loss']
test_acc = cnnhistory.history['val_accuracy']
test_loss = cnnhistory.history['val_loss']

plt.show()