In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy, scipy as sklearn, librosa, urllib
import librosa.display
from IPython.display import Audio
import json 
import seaborn as sns
from sklearn.cluster import KMeans
import csv
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score
import keras
from sklearn.decomposition import PCA

from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler

from itertools import cycle
from sklearn.manifold import TSNE
from sklearn.metrics import roc_curve, auc, silhouette_score,roc_auc_score, precision_recall_fscore_support
from tqdm import tqdm


In [38]:
def get_data_df():
    data_df = pd.read_csv("RAVDESS/data.csv")
    data_df.drop(data_df[data_df['label'] == "calm"].index, inplace = True)
    data_df = data_df.reset_index()
    return data_df


In [25]:
#add white noise to the original signal
def noise_addition(data,noise_percentage_factor=0.035):
    noise = np.random.normal(0, data.std(), data.size)
    augmented_data = data + noise * noise_percentage_factor
    return augmented_data

#lower the pitch of the original signal
def pitch_scaling(data, sr, num_semitones=-2):
    return librosa.effects.pitch_shift(y = data,sr = sr,n_steps = num_semitones)

#increase the pitch of the original signal
def pitch_scaling2(data, sr, num_semitones=2):
    return librosa.effects.pitch_shift(y = data,sr = sr,n_steps = num_semitones)



In [26]:
#mfccs and deltas extraction
import math
def extract_mfccs_delta(path):
    y,sr=librosa.load(path)  
    duration=librosa.get_duration(y=y,sr=sr)
    #reduce the duration of files longer than 3 seconds
    if(duration > 3.0):
        y,sr=librosa.load(path, offset=(duration/2)-1.5 , duration = 3)
    else:
        y,sr=librosa.load(path)

    mfcc= librosa.feature.mfcc( y=y,  sr=sr, n_mfcc=13 )
    
    return mfcc

def extract_mfccs_delta_with_noise(path):
    y, sr = librosa.load(path)
    duration=librosa.get_duration(y=y,sr=sr)
    #reduce the duration of files longer than 3 seconds
    if(duration > 3.0):
        y,sr=librosa.load(path, offset=(duration/2)-1.5 , duration = 3)
        y=noise_addition(y)
    else:
        y,sr=librosa.load(path)
        y=noise_addition(y)
    mfcc= librosa.feature.mfcc( y=y,  sr=sr, n_mfcc=13 )
    
    return mfcc


def extract_mfccs_delta_with_pitch_scaling(path):
    y, sr = librosa.load(path)
    duration=librosa.get_duration(y=y,sr=sr)
    #reduce the duration of files longer than 3 seconds
    if(duration > 3.0):
        y,sr=librosa.load(path, offset=(duration/2)-1.5 , duration = 3)
        y=pitch_scaling(y,sr)
    else:
        y,sr=librosa.load(path)
        y=pitch_scaling(y,sr)
    mfcc= librosa.feature.mfcc( y=y,  sr=sr, n_mfcc=13 )
    
    return mfcc

def extract_mfccs_delta_with_pitch_scaling2(path):
    y, sr = librosa.load(path)
    duration=librosa.get_duration(y=y,sr=sr)
    #reduce the duration of files longer than 3 seconds
    if(duration > 3.0):
        y,sr=librosa.load(path, offset=(duration/2)-1.5 , duration = 3)
        y=pitch_scaling2(y,sr)
    else:
        y,sr=librosa.load(path)
        y=pitch_scaling2(y,sr)
    mfcc= librosa.feature.mfcc( y=y,  sr=sr, n_mfcc=13 )
    
    return mfcc

In [27]:
#log-mel spectrogram extraction
def extract_logmel(path):
    y, sr = librosa.load(path)
    duration=librosa.get_duration(y=y,sr=sr)
    #reduce the duration of files longer than 3 seconds
    if(duration > 3.0):
        y,sr=librosa.load(path, offset=(duration/2)-1.5 , duration = 3)
    else:
        y, sr = librosa.load(path)
    ps = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=60)
    ps_db= librosa.power_to_db(ps)
    return ps_db

def extract_logmel_with_noise(path):
  y, sr = librosa.load(path)
  duration=librosa.get_duration(y=y,sr=sr)
  #reduce the duration of files longer than 3 seconds
  if(duration > 3.0):
      y,sr=librosa.load(path, offset=(duration/2)-1.5 , duration = 3)
      y=noise_addition(y)
  else:
      y, sr = librosa.load(path)
      y=noise_addition(y)
  ps = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=60)
  ps_db= librosa.power_to_db(ps)
  return ps_db

def extract_logmel_with_pitch_scaling(path):
  y, sr = librosa.load(path)
  duration=librosa.get_duration(y=y,sr=sr)
  #reduce the duration of files longer than 3 seconds
  if(duration > 3.0):
      y,sr=librosa.load(path, offset=(duration/2)-1.5 , duration = 3)
      y=pitch_scaling(y,sr)
  else:
      y, sr = librosa.load(path)
      y=pitch_scaling(y,sr)
  ps = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=60)
  ps_db= librosa.power_to_db(ps)
  return ps_db
  

def extract_logmel_with_pitch_scaling2(path):
  y, sr = librosa.load(path)
  duration=librosa.get_duration(y=y,sr=sr)
  #reduce the duration of files longer than 3 seconds
  if(duration > 3.0):
      y,sr=librosa.load(path, offset=(duration/2)-1.5 , duration = 3)
      y=pitch_scaling2(y,sr)
  else:
      y, sr = librosa.load(path)
      y=pitch_scaling2(y,sr)
  ps = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=60)
  ps_db= librosa.power_to_db(ps)
  return ps_db

In [39]:
not_aug_df = get_data_df()

for i in range(0,13):
    not_aug_df["mfcc_"+str(i)]= None
    
for i in range(0,13):
    not_aug_df["delta_"+str(i)]= None
    
for i in range(0,60):
    not_aug_df["logmel_"+str(i)]= None

for  (index ,path) in tqdm(zip(not_aug_df.index,not_aug_df.file_name)):
        mfccs=extract_mfccs_delta('RAVDESS/'+path)
        delta=librosa.feature.delta(mfccs)
        logmel=extract_logmel('RAVDESS/'+path)
        #add zero padding
        if mfccs.shape[1]<130:
          mfccs=librosa.util.pad_center(mfccs, size=130, axis=1)
          delta=librosa.util.pad_center(delta, size=130, axis=1)
          logmel=librosa.util.pad_center(logmel, size=130, axis=1)
        for i in range(0,13):
            not_aug_df.at[index, "mfcc_"+str(i)] = mfccs[i]
        for j in range(0,13):
            not_aug_df.at[index, "delta_"+str(j)] = delta[j]
        for z in range(0,60):
            not_aug_df.at[index, "logmel_"+str(z)] = logmel[z]


0it [00:00, ?it/s]

1248it [00:16, 77.18it/s]


In [40]:
pitch_df = get_data_df()
pitch_df= pd.concat([pitch_df]*2, ignore_index=True)   
for i in range(0,13):
    pitch_df["mfcc_"+str(i)]= None

for i in range(0,13):
    pitch_df["delta_"+str(i)]= None

for i in range(0,60):
    pitch_df["logmel_"+str(i)]= None

for  (index ,path) in tqdm(zip(pitch_df[0:len(pitch_df)//2].index,pitch_df[0:len(pitch_df)//2].file_name)):
        mfccs=extract_mfccs_delta_with_pitch_scaling('RAVDESS/'+path)
        logmel=extract_logmel_with_pitch_scaling('RAVDESS/'+path)
        delta=librosa.feature.delta(mfccs)
        #add zero padding
        if mfccs.shape[1]<130:
          mfccs=librosa.util.pad_center(mfccs, size=130, axis=1)
          delta=librosa.util.pad_center(delta, size=130, axis=1)
          logmel=librosa.util.pad_center(logmel, size=130, axis=1)
        for i in range(0,13):
            pitch_df.at[index, "mfcc_"+str(i)] = mfccs[i]
        for j in range(0,13):
            pitch_df.at[index, "delta_"+str(j)] = delta[j]
        for z in range(0,60):
            pitch_df.at[index, "logmel_"+str(z)] = logmel[z]
    
for  (index ,path) in tqdm(zip(pitch_df[len(pitch_df)//2:].index,pitch_df[len(pitch_df)//2:].file_name)):
        mfccs=extract_mfccs_delta_with_pitch_scaling2('RAVDESS/'+path)
        logmel=extract_logmel_with_pitch_scaling2('RAVDESS/'+path)
        delta=librosa.feature.delta(mfccs)
        #add zero padding
        if mfccs.shape[1]<130:
          mfccs=librosa.util.pad_center(mfccs, size=130, axis=1)
          delta=librosa.util.pad_center(delta, size=130, axis=1)
          logmel=librosa.util.pad_center(logmel, size=130, axis=1)
        for i in range(0,13):
            pitch_df.at[index, "mfcc_"+str(i)] = mfccs[i]
        for j in range(0,13):
            pitch_df.at[index, "delta_"+str(j)] = delta[j]
        for z in range(0,60):
            pitch_df.at[index, "logmel_"+str(z)] = logmel[z]


#join the two datasets
frames = [ not_aug_df, pitch_df]
semi_aug_df=pd.concat(frames ,ignore_index=True)

1248it [00:53, 23.23it/s]
1248it [01:02, 19.98it/s]


In [41]:
noise_df = get_data_df()
for i in range(0,13):
    noise_df["mfcc_"+str(i)]= None
for i in range(0,13):
    noise_df["delta_"+str(i)]= None
for i in range(0,60):
    noise_df["logmel_"+str(i)]= None


for  (index ,path) in tqdm(zip(noise_df.index,noise_df.file_name)):
        mfccs=extract_mfccs_delta_with_noise('RAVDESS/'+path)
        logmel=extract_logmel_with_noise('RAVDESS/'+path)
        delta=librosa.feature.delta(mfccs)
        #add zero padding
        if mfccs.shape[1]<130:
          mfccs=librosa.util.pad_center(mfccs, size=130, axis=1)
          delta=librosa.util.pad_center(delta, size=130, axis=1)
          logmel=librosa.util.pad_center(logmel, size=130, axis=1)
        for i in range(0,13):
            noise_df.at[index, "mfcc_"+str(i)] = mfccs[i]
        for j in range(0,13):
            noise_df.at[index, "delta_"+str(j)] = delta[j]
        for z in range(0,60):
            noise_df.at[index, "logmel_"+str(z)] = logmel[z]

frames = [ semi_aug_df, noise_df]
aug_df=pd.concat(frames,ignore_index=True)


1248it [00:24, 50.71it/s]


In [42]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(not_aug_df['label'])
not_aug_df['label_id']=le.transform(not_aug_df['label'])
semi_aug_df['label_id']=le.transform(semi_aug_df['label'])
aug_df['label_id']=le.transform(aug_df['label'])


In [43]:
not_aug_df.to_pickle('RAVDESS/not_aug_df.pkl')
semi_aug_df.to_pickle('RAVDESS/semi_aug_df.pkl')
aug_df.to_pickle('RAVDESS/aug_df.pkl')

In [7]:
not_aug_df=pd.read_pickle('RAVDESS/not_aug_df.pkl')
semi_aug_df=pd.read_pickle('RAVDESS/semi_aug_df.pkl')
aug_df=pd.read_pickle('RAVDESS/aug_df.pkl')

In [8]:
scaler = StandardScaler()

#extraction of the log mel specrogram from the datasets - not aug
X_logmel_k = np.array(not_aug_df.loc[:, ['logmel' in i for i in not_aug_df.columns]])
X_logmel_k=np.array(X_logmel_k.tolist())
X_logmel_k=scaler.fit_transform(X_logmel_k.reshape(-1, X_logmel_k.shape[-1])).reshape(X_logmel_k.shape)
Y_logmel_k=not_aug_df['label']

#reshape the data from 3D to 2D - not aug
X_logmel_k=X_logmel_k.reshape(X_logmel_k.shape[0],X_logmel_k.shape[1]*X_logmel_k.shape[2])

In [9]:
#extraction of labels_id from datasets
Y_not_aug=not_aug_df['label_id']
Y_semi_aug=semi_aug_df['label_id']
Y_aug=aug_df['label_id']

#take the log mel spectrogram from the datasets
X_logmel = np.array(not_aug_df.loc[:, ['logmel' in i for i in not_aug_df.columns]])
X_logmel=np.array(X_logmel.tolist())
X_logmel_semi_aug = np.array(semi_aug_df.loc[:, ['logmel' in i for i in semi_aug_df.columns]])
X_logmel_semi_aug=np.array(X_logmel_semi_aug.tolist())
X_logmel_aug = np.array(aug_df.loc[:, ['logmel' in i for i in aug_df.columns]])
X_logmel_aug=np.array(X_logmel_aug.tolist())


X_logmel = np.reshape(X_logmel, (X_logmel.shape[0],X_logmel.shape[1],X_logmel.shape[2],1))
X_logmel_semi_aug= np.reshape(X_logmel_semi_aug, (X_logmel_semi_aug.shape[0],X_logmel_semi_aug.shape[1],X_logmel_semi_aug.shape[2],1))
X_logmel_aug= np.reshape(X_logmel_aug, (X_logmel_aug.shape[0],X_logmel_aug.shape[1],X_logmel_aug.shape[2],1))

In [10]:
def get_train_val_test(X,Y):
    X = scaler.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)
    Y = keras.utils.to_categorical(Y.to_numpy())
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=22)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)
    return X_train, X_val, X_test, y_train,  y_val, y_test


In [11]:
def get_cnn(input_shape):
    model = keras.Sequential()

    model.add(keras.layers.Input(shape=input_shape))

    model.add(keras.layers.Conv2D(256, 3, activation='relu' ))
    model.add(keras.layers.MaxPooling2D(padding='same'))
    model.add(keras.layers.Dropout(rate=0.3))

    model.add(keras.layers.Conv2D(128, 3, activation='relu'))
    model.add(keras.layers.MaxPooling2D(padding='same'))
    model.add(keras.layers.Dropout(rate=0.3))


    model.add(keras.layers.Conv2D(64, 3, activation='relu'))
    model.add(keras.layers.MaxPooling2D(padding='same'))
    model.add(keras.layers.Dropout(rate=0.3))

    model.add(keras.layers.GlobalAveragePooling2D())
    model.add(keras.layers.Dense(1024, activation='relu'))
    
    model.add(keras.layers.Dense(256, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))

    model.add(keras.layers.Dense(7, activation='softmax'))

    optimzer = keras.optimizers.Adam()
    model.compile(loss='categorical_crossentropy', optimizer=optimzer, metrics=['accuracy'])

    return model

### TRAINING mel

In [18]:
from datetime import datetime  
name = datetime.now().strftime("models/augmented_vs_clean/SER_RAVDESS_clean_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

X_train, X_val, X_test, y_train,  y_val, y_test = get_train_val_test(X_logmel,Y_not_aug)

model = get_cnn((X_train.shape[1:]))
# model.summary()
history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=256,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.1251 - loss: 1.9436
Epoch 1: val_loss improved from inf to 1.88091, saving model to models/augmented_vs_clean/SER_RAVDESS_clean_07_10_2024_16_32_30.keras
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 5s/step - accuracy: 0.1264 - loss: 1.9423 - val_accuracy: 0.2800 - val_loss: 1.8809
Epoch 2/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.2319 - loss: 1.8309
Epoch 2: val_loss improved from 1.88091 to 1.77193, saving model to models/augmented_vs_clean/SER_RAVDESS_clean_07_10_2024_16_32_30.keras
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 4s/step - accuracy: 0.2318 - loss: 1.8272 - val_accuracy: 0.3022 - val_loss: 1.7719
Epoch 3/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.2512 - loss: 1.7790
Epoch 3: val_loss improved from 1.77193 to 1.73725, saving model to models/augmente

In [20]:
print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.3228 - loss: 1.6525
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.3228 - loss: 1.6525
Loss : 1.6669886112213135, Accuracy : 0.3199999928474426


In [23]:
from datetime import datetime  
name = datetime.now().strftime("models/augmented_vs_clean/SER_RAVDESS_semi_aug_scaled_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=5,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

X_train, X_val, X_test, y_train,  y_val, y_test = get_train_val_test(X_logmel_semi_aug,Y_semi_aug)

model = get_cnn((X_train.shape[1:]))
# model.summary()

history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=256,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.2163 - loss: 1.9007
Epoch 1: val_loss improved from inf to 1.68281, saving model to models/augmented_vs_clean/SER_RAVDESS_semi_aug_scaled_07_10_2024_16_53_06.keras
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4s/step - accuracy: 0.2175 - loss: 1.8966 - val_accuracy: 0.2938 - val_loss: 1.6828
Epoch 2/1000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.2411 - loss: 1.7922
Epoch 2: val_loss did not improve from 1.68281
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 4s/step - accuracy: 0.2410 - loss: 1.7913 - val_accuracy: 0.2493 - val_loss: 1.7419
Epoch 3/1000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.2684 - loss: 1.7389
Epoch 3: val_loss improved from 1.68281 to 1.67930, saving model to models/augmented_vs_clean/SER_RAVDESS_semi_aug_scaled_07_10_2024_16_53_06.keras
[1m11

In [25]:
from datetime import datetime  
name = datetime.now().strftime("models/augmented_vs_clean/SER_RAVDESS_aug_scaled_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=5,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

X_train, X_val, X_test, y_train,  y_val, y_test = get_train_val_test(X_logmel_aug,Y_aug)

model = get_cnn((X_train.shape[1:]))
# model.summary()

history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=256,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.2047 - loss: 1.8803
Epoch 1: val_loss improved from inf to 1.78322, saving model to models/augmented_vs_clean/SER_RAVDESS_aug_scaled_07_10_2024_18_06_56.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 2s/step - accuracy: 0.2062 - loss: 1.8775 - val_accuracy: 0.2581 - val_loss: 1.7832
Epoch 2/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.2587 - loss: 1.7653
Epoch 2: val_loss improved from 1.78322 to 1.74484, saving model to models/augmented_vs_clean/SER_RAVDESS_aug_scaled_07_10_2024_18_06_56.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 3s/step - accuracy: 0.2590 - loss: 1.7650 - val_accuracy: 0.2625 - val_loss: 1.7448
Epoch 3/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.2849 - loss: 1.7334
Epoch 3: val_loss did not improve from 1.74484
[1m15/15[0m [

In [26]:
import keras
from keras import layers, models

def get_model(input_shape):
    inputs = layers.Input(shape=input_shape)
    encoder = layers.LSTM(128)(inputs)
    drop = layers.Dropout(0.3)(encoder)
    hidden = layers.Dense(32, activation='relu')(drop)
    outputs = layers.Dense(7, activation='softmax')(hidden)
    
    model = models.Model(inputs, outputs)
    optimizer = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

    return model

In [34]:
X_train,X_val,X_test,y_train,y_val,y_test = get_train_val_test(X_logmel.squeeze(3),Y_not_aug)
LSTM_model = get_model(X_train.shape[1:])
# LSTM_model.summary()


In [36]:
X_test.shape

(125, 60, 130)

In [None]:


from datetime import datetime  
name = datetime.now().strftime("models/RAVDESS_lstm_clean_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


print(f"Loss : {LSTM_model.evaluate(X_test,y_test)[0]}, Accuracy : {LSTM_model.evaluate(X_test,y_test)[1]}")

In [28]:
X_train,X_val,X_test,y_train,y_val,y_test = get_train_val_test(X_logmel_semi_aug.squeeze(3),Y_semi_aug)
LSTM_model = get_model(X_train.shape[1:])
# LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("models/RAVDESS_lstm_semi_aug_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


print(f"Loss : {LSTM_model.evaluate(X_test,y_test)[0]}, Accuracy : {LSTM_model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m82/85[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - accuracy: 0.2589 - loss: 1.7925
Epoch 1: val_loss improved from inf to 1.50034, saving model to models/RAVDESS_lstm_semi_aug_07_10_2024_18_33_53.keras
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.2617 - loss: 1.7879 - val_accuracy: 0.4199 - val_loss: 1.5003
Epoch 2/1000
[1m82/85[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - accuracy: 0.4409 - loss: 1.4692
Epoch 2: val_loss improved from 1.50034 to 1.37527, saving model to models/RAVDESS_lstm_semi_aug_07_10_2024_18_33_53.keras
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.4403 - loss: 1.4687 - val_accuracy: 0.4659 - val_loss: 1.3753
Epoch 3/1000
[1m82/85[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - accuracy: 0.4593 - loss: 1.3786
Epoch 3: val_loss improved from 1.37527 to 1.28430, saving model to models/RAVDESS_lstm_semi_au

In [29]:
X_train,X_val,X_test,y_train,y_val,y_test = get_train_val_test(X_logmel_aug.squeeze(3),Y_aug)
LSTM_model = get_model(X_train.shape[1:])
# LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("models/RAVDESS_lstm_aug_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


print(f"Loss : {LSTM_model.evaluate(X_test,y_test)[0]}, Accuracy : {LSTM_model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m110/113[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - accuracy: 0.2925 - loss: 1.8095
Epoch 1: val_loss improved from inf to 1.56622, saving model to models/RAVDESS_lstm_aug_07_10_2024_18_35_30.keras
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.2937 - loss: 1.8061 - val_accuracy: 0.4016 - val_loss: 1.5662
Epoch 2/1000
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.4113 - loss: 1.5116
Epoch 2: val_loss improved from 1.56622 to 1.41713, saving model to models/RAVDESS_lstm_aug_07_10_2024_18_35_30.keras
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.4114 - loss: 1.5115 - val_accuracy: 0.4472 - val_loss: 1.4171
Epoch 3/1000
[1m110/113[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - accuracy: 0.4665 - loss: 1.3647
Epoch 3: val_loss improved from 1.41713 to 1.34637, saving model to models/RAVDESS_lstm_aug_07_

In [30]:
from sklearn.svm import SVC
X_train,X_val,X_test,y_train,y_val,y_test = get_train_val_test(X_logmel.squeeze(3),Y_not_aug)
X_train = X_train.reshape(X_train.shape[0],-1)
X_test = X_test.reshape(X_test.shape[0],-1)


SVC_model = SVC(kernel = 'rbf', gamma = 'auto', probability = True, verbose=True)


SVC_history = SVC_model.fit(X_train, np.argmax(y_train,axis=-1))
SVC_model.score(X_test,np.argmax(y_test,axis=-1))

[LibSVM]

0.552

In [31]:
from sklearn.svm import SVC
X_train,X_val,X_test,y_train,y_val,y_test = get_train_val_test(X_logmel_semi_aug.squeeze(3),Y_semi_aug)
X_train = X_train.reshape(X_train.shape[0],-1)
X_test = X_test.reshape(X_test.shape[0],-1)


SVC_model = SVC(kernel = 'rbf', gamma = 'auto', probability = True, verbose=True)


SVC_history = SVC_model.fit(X_train, np.argmax(y_train,axis=-1))
SVC_model.score(X_test,np.argmax(y_test,axis=-1))

[LibSVM]

0.712

In [32]:
from sklearn.svm import SVC
X_train,X_val,X_test,y_train,y_val,y_test = get_train_val_test(X_logmel_aug.squeeze(3),Y_aug)
X_train = X_train.reshape(X_train.shape[0],-1)
X_test = X_test.reshape(X_test.shape[0],-1)


SVC_model = SVC(kernel = 'rbf', gamma = 'auto', probability = True, verbose=True)


SVC_history = SVC_model.fit(X_train, np.argmax(y_train,axis=-1))
SVC_model.score(X_test,np.argmax(y_test,axis=-1))

[LibSVM]

0.726

### Training mfccs

In [16]:
# extraction of the mfccs from the datasets - not aug
X_mfccs_k = np.array(not_aug_df.iloc[:, 4:30])
X_mfccs_k = np.array(X_mfccs_k.tolist())
# X_mfccs_k=scaler.fit_transform(X_mfccs_k.reshape(-1, X_mfccs_k.shape[-1])).reshape(X_mfccs_k.shape)
Y_mfccs_k = not_aug_df['label']

#reshape the data from 3D to 2D - not aug
X_mfccs_k=X_mfccs_k.reshape(X_mfccs_k.shape[0],X_mfccs_k.shape[1]*X_mfccs_k.shape[2])

In [17]:
#extraction of labels_id from datasets
Y_not_aug=not_aug_df['label_id']
Y_semi_aug=semi_aug_df['label_id']
Y_aug=aug_df['label_id']


X_mfccs = np.array(not_aug_df.iloc[:, 4:30]) 
X_mfccs = np.array(X_mfccs.tolist())
X_mfccs_semi_aug = np.array(semi_aug_df.iloc[:, 4:30])
X_mfccs_semi_aug=np.array(X_mfccs_semi_aug.tolist())
X_mfccs_aug = np.array(aug_df.iloc[:, 4:30])
X_mfccs_aug=np.array(X_mfccs_aug.tolist())


X_mfccs = np.reshape(X_mfccs, (X_mfccs.shape[0],X_mfccs.shape[1],X_mfccs.shape[2],1))
X_mfccs_semi_aug= np.reshape(X_mfccs_semi_aug, (X_mfccs_semi_aug.shape[0],X_mfccs_semi_aug.shape[1],X_mfccs_semi_aug.shape[2],1))
X_mfccs_aug= np.reshape(X_mfccs_aug, (X_mfccs_aug.shape[0],X_mfccs_aug.shape[1],X_mfccs_aug.shape[2],1))

In [24]:
from datetime import datetime  
name = datetime.now().strftime("models/augmented_vs_clean/SER_RAVDESS_mfccs_clean_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

X_train, X_val, X_test, y_train,  y_val, y_test = get_train_val_test(X_mfccs,Y_not_aug)

model = get_cnn((X_train.shape[1:]))
# model.summary()
history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m28/29[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 229ms/step - accuracy: 0.1675 - loss: 2.8416
Epoch 1: val_loss improved from inf to 1.93378, saving model to models/augmented_vs_clean/SER_RAVDESS_mfccs_clean_03_10_2024_17_38_30.keras
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 250ms/step - accuracy: 0.1690 - loss: 2.8055 - val_accuracy: 0.1733 - val_loss: 1.9338
Epoch 2/1000
[1m28/29[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 241ms/step - accuracy: 0.2219 - loss: 1.9147
Epoch 2: val_loss improved from 1.93378 to 1.90474, saving model to models/augmented_vs_clean/SER_RAVDESS_mfccs_clean_03_10_2024_17_38_30.keras
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 257ms/step - accuracy: 0.2220 - loss: 1.9145 - val_accuracy: 0.1956 - val_loss: 1.9047
Epoch 3/1000
[1m28/29[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 251ms/step - accuracy: 0.2722 - loss: 1.8759
Epoch 3: val_loss improved from 1.90474 to 1.848

In [25]:
from datetime import datetime  
name = datetime.now().strftime("models/augmented_vs_clean/SER_RAVDESS_mfccs_clean_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

X_train, X_val, X_test, y_train,  y_val, y_test = get_train_val_test(X_mfccs_semi_aug,Y_semi_aug)

model = get_cnn((X_train.shape[1:]))
# model.summary()
history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step - accuracy: 0.1699 - loss: 2.2345
Epoch 1: val_loss improved from inf to 1.90343, saving model to models/augmented_vs_clean/SER_RAVDESS_mfccs_clean_03_10_2024_17_49_08.keras
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 250ms/step - accuracy: 0.1701 - loss: 2.2320 - val_accuracy: 0.2062 - val_loss: 1.9034
Epoch 2/1000
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step - accuracy: 0.2067 - loss: 1.8898
Epoch 2: val_loss improved from 1.90343 to 1.80600, saving model to models/augmented_vs_clean/SER_RAVDESS_mfccs_clean_03_10_2024_17_49_08.keras
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 314ms/step - accuracy: 0.2067 - loss: 1.8898 - val_accuracy: 0.2567 - val_loss: 1.8060
Epoch 3/1000
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 328ms/step - accuracy: 0.2409 - loss: 1.8268
Epoch 3: val_loss improved from 1.80600 to 1.7

In [26]:
from datetime import datetime  
name = datetime.now().strftime("models/augmented_vs_clean/SER_RAVDESS_mfccs_aug_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

X_train, X_val, X_test, y_train,  y_val, y_test = get_train_val_test(X_mfccs_aug,Y_aug)

model = get_cnn((X_train.shape[1:]))
# model.summary()
history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 354ms/step - accuracy: 0.1564 - loss: 2.4315
Epoch 1: val_loss improved from inf to 1.90303, saving model to models/augmented_vs_clean/SER_RAVDESS_mfccs_aug_03_10_2024_18_13_49.keras
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 385ms/step - accuracy: 0.1566 - loss: 2.4282 - val_accuracy: 0.2269 - val_loss: 1.9030
Epoch 2/1000
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 351ms/step - accuracy: 0.2225 - loss: 1.8718
Epoch 2: val_loss improved from 1.90303 to 1.75626, saving model to models/augmented_vs_clean/SER_RAVDESS_mfccs_aug_03_10_2024_18_13_49.keras
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 380ms/step - accuracy: 0.2227 - loss: 1.8714 - val_accuracy: 0.3059 - val_loss: 1.7563
Epoch 3/1000
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step - accuracy: 0.2694 - loss: 1.7743
Epoch 3: val_loss improved from 1.75626 