In [28]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy, scipy as sklearn, librosa, urllib
import librosa.display
from IPython.display import Audio
import json 
import seaborn as sns
from sklearn.cluster import KMeans
import csv
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score
import keras
from sklearn.decomposition import PCA

from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler

from itertools import cycle
from sklearn.manifold import TSNE
from sklearn.metrics import roc_curve, auc, silhouette_score,roc_auc_score, precision_recall_fscore_support


In [54]:
with open("RAVDESS/data.csv", 'r+') as f:
    f.truncate(0)

In [None]:
def setLabel(name):
    subname= name[:3]
    if subname=="dis":
        return "disgust";
    elif subname=="sor":
        return "surprise";
    elif subname=="pau":
        return "fear";
    elif subname=="rab":
        return "anger";
    elif subname=="gio":
        return "joy";
    elif subname=="tri":
        return "sadness";
    else:
        return "neutrality";
    

In [3]:
def setLabel(i): 
    emotions = ["neutral", "calm", "happy", "sad", "angry", "fearful", "disgust", "surprised"] #calm è extra
    return emotions[i]

In [53]:
def setLabel(i): 
    emotions = ["neutrality", "calm", "joy", "sadness", "anger", "fear", "disgust", "surprise"] #calm è extra
    return emotions[i]

In [55]:
main_dir = "RAVDESS"
with open('RAVDESS/data.csv', 'w', newline='') as csvfile:
    fieldnames = ['file_name', 'label','actor']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for index,dir in enumerate(os.listdir(main_dir)):
        actual = os.path.join(main_dir,dir)
        if dir == "data.csv":
            continue
        if int(dir.split("_")[-1]) % 2 == 0:
            actor = "f" + str((index//2) +1)    
        else:
            actor = "m" + str((index//2) +1)
        for audio in os.listdir(actual):
            emo = int(audio.split("-")[2]) 
            writer.writerow({'file_name':os.path.join(dir,audio),'label':setLabel(emo-1),'actor':actor})



In [60]:
data_df = pd.read_csv("RAVDESS/data.csv")

In [62]:
data_df.drop(data_df[data_df['label'] == "calm"].index, inplace = True)

In [64]:
def get_max_min(files):
    min_, max_ = 100, 0
    for file in files:
        sound_file, samplerate = librosa.load(file)
        t = sound_file.shape[0] / samplerate
        if t < min_:
            min_ = t
        if t > max_:
            max_ = t

    return max_, min_

In [65]:
def extract_new(file,pad):
    X, sample_rate = librosa.load(file)
    max_ = X.shape[0] / sample_rate
    if pad:
        length = (max_ * sample_rate) - X.shape[0]
        X = np.pad(X, (0, int(length)), 'constant')
    
    stft = np.abs(librosa.stft(X))
    # result = np.array([])
    result = []

    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T, axis=0)
    # result = np.hstack((result, mfccs))
    result.append((mfccs))

    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    # result = np.hstack((result, chroma))
    result.append(chroma)

    mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0) 
    # result = np.hstack((result, mel))
    result.append(mel)
    
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
    # result = np.hstack((result, contrast))
    result.append(contrast)

    # tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    # result = np.hstack((result, tonnetz))
    return result

In [68]:
max, min = get_max_min('RAVDESS/'+data_df.file_name)
train_data = pd.DataFrame(columns=['filename', 'features', 'label'])

features = []
for index, file in zip(data_df.index, data_df.file_name):
    train_data.loc[index] = [file, extract_new('RAVDESS/'+file, max), data_df.label[index]]

In [69]:
X1 = np.empty((0, 50))
X2 = np.empty((0, 12))
X3 = np.empty((0, 128))
X4 = np.empty((0, 7))


for data in train_data["features"]:
    X1 = np.vstack((X1, data[0]))
    X2 = np.vstack((X2, data[1]))
    X3 = np.vstack((X3, data[2]))
    X4 = np.vstack((X4, data[3]))

In [70]:
data_classes = (list((train_data["label"].unique())))
Y = keras.utils.to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = np.stack(train_data["features"])
X = np.hstack([X1,X2,X3,X4])

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=22)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)


In [73]:
model = keras.Sequential()
kernel_sizes = [5, 5]
model.add(keras.layers.Input(shape=(X_train.shape[1],1)))
for size in kernel_sizes:
    model.add(keras.layers.Conv1D(
        filters = 32,
        kernel_size = size,
        padding = 'same'
    ))  # 卷积层
    model.add(keras.layers.BatchNormalization(axis=-1))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(32))
model.add(keras.layers.BatchNormalization(axis = -1))
model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Dense(7, activation='softmax'))  # 分类层
optimzer = keras.optimizers.Adam(learning_rate= 0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimzer, metrics=['accuracy'])

In [74]:
from datetime import datetime  
name = datetime.now().strftime("SER_RAVDESS_1d_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]



history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=256,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m3/4[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 29ms/step - accuracy: 0.1491 - loss: 2.5217
Epoch 1: val_loss improved from inf to 2.64083, saving model to SER_RAVDESS_1d_30_09_2024_17_06_27.keras
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 89ms/step - accuracy: 0.1496 - loss: 2.5179 - val_accuracy: 0.1511 - val_loss: 2.6408
Epoch 2/1000
[1m3/4[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 26ms/step - accuracy: 0.1562 - loss: 2.4253
Epoch 2: val_loss improved from 2.64083 to 2.46668, saving model to SER_RAVDESS_1d_30_09_2024_17_06_27.keras
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.1557 - loss: 2.4284 - val_accuracy: 0.0800 - val_loss: 2.4667
Epoch 3/1000
[1m3/4[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 32ms/step - accuracy: 0.1684 - loss: 2.4380
Epoch 3: val_loss improved from 2.46668 to 2.40112, saving model to SER_RAVDESS_1d_30_09_2024_17_06_27.keras
[1m4/4[0m [32m━━━━━━━

In [75]:
data_df = pd.read_csv("EMOVO_dataset/data.csv")

train_data = pd.DataFrame(columns=['filename', 'features', 'label'])

features = []
for index, file in zip(data_df.index, data_df.file_name):
    train_data.loc[index] = [file, extract_new('EMOVO_dataset/'+file, max), data_df.label[index]]


X1 = np.empty((0, 50))
X2 = np.empty((0, 12))
X3 = np.empty((0, 128))
X4 = np.empty((0, 7))


for data in train_data["features"]:
    X1 = np.vstack((X1, data[0]))
    X2 = np.vstack((X2, data[1]))
    X3 = np.vstack((X3, data[2]))
    X4 = np.vstack((X4, data[3]))




data_classes = (list((train_data["label"].unique())))
Y = keras.utils.to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = np.stack(train_data["features"])
X = np.hstack([X1,X2,X3,X4])

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=22)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)


In [77]:
print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1425 - loss: 2.8680 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1425 - loss: 2.8680 
Loss : 2.8005177974700928, Accuracy : 0.1355932205915451


In [33]:
def obtain_df(X):
    X_df = pd.DataFrame(columns=['filename', 'features', 'label'])

    for index, file in zip(data_df.index, data_df.file_name):
        X_df.loc[index] = [file, pd.Series(X[index]), data_df.label[index]]
    X = pd.DataFrame(X_df["features"])
    return X

from sktime.transformations.panel.rocket import Rocket

def get_rocket(X):
    trf = Rocket(num_kernels=512) 
    trf.fit(X) 
    X_ = trf.transform(X)
    return X_


In [34]:
X1_ = get_rocket(obtain_df(X1)).to_numpy()
X2_ = get_rocket(obtain_df(X2)).to_numpy()
X3_ = get_rocket(obtain_df(X3)).to_numpy()
X4_ = get_rocket(obtain_df(X4)).to_numpy()


In [35]:
data_classes = (list((train_data["label"].unique())))
Y = keras.utils.to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = np.stack(train_data["features"])
X_ = np.hstack([X1_,X2_,X3_,X4_])

X_train_, X_test_, y_train_, y_test_ = train_test_split(X_, Y, test_size=0.1, random_state=22)
X_train_, X_val_, y_train_, y_val_ = train_test_split(X_train_, y_train_, test_size=0.2, random_state=22)


In [43]:
import keras
model = keras.Sequential()
kernel_sizes = [5, 5]
model.add(keras.layers.Input(shape=(X_train_.shape[1],1)))
for size in kernel_sizes:
    model.add(keras.layers.Conv1D(
        filters = 32,
        kernel_size = size,
        padding = 'same'
    ))  # 卷积层
    model.add(keras.layers.BatchNormalization(axis=-1))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(32))
model.add(keras.layers.BatchNormalization(axis = -1))
model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Dense(8, activation='softmax')) 
optimzer = keras.optimizers.Adam(learning_rate= 0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimzer, metrics=['accuracy'])

In [44]:
from datetime import datetime  
name = datetime.now().strftime("ser_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=40,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]



history = model.fit(X_train_, y_train_, 
                       validation_data=(X_val_,y_val_), 
                       batch_size=256,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test_,y_test_)[0]}, Accuracy : {model.evaluate(X_test_,y_test_)[1]}")

Epoch 1/1000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 342ms/step - accuracy: 0.1438 - loss: 2.5767 - val_accuracy: 0.1269 - val_loss: 6.4232
Epoch 2/1000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 297ms/step - accuracy: 0.2023 - loss: 2.3074 - val_accuracy: 0.1192 - val_loss: 7.9273
Epoch 3/1000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 300ms/step - accuracy: 0.2240 - loss: 2.1809 - val_accuracy: 0.1192 - val_loss: 6.7272
Epoch 4/1000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 305ms/step - accuracy: 0.2563 - loss: 2.0891 - val_accuracy: 0.1192 - val_loss: 6.2217
Epoch 5/1000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 305ms/step - accuracy: 0.2471 - loss: 2.0664 - val_accuracy: 0.1192 - val_loss: 6.0132
Epoch 6/1000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 306ms/step - accuracy: 0.2263 - loss: 2.0727 - val_accuracy: 0.1231 - val_loss: 5.4377
Epoch 7/1000
[1m5/5[0m [3

In [49]:
from keras import layers, models
def get_model(X_train):
    inputs = layers.Input(shape=(X_train.shape[1],1))
    encoder = layers.LSTM(128)(inputs)
    drop = layers.Dropout(0.3)(encoder)
    hidden = layers.Dense(32, activation='relu')(drop)
    outputs = layers.Dense(8, activation='softmax')(hidden)
    
    model = models.Model(inputs, outputs)
    optimizer = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

    return model

In [50]:
LSTM_model = get_model(X_train)
LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("ser_lstm_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


print(f"Loss : {LSTM_model.evaluate(X_test,y_test)[0]}, Accuracy : {LSTM_model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 51ms/step - accuracy: 0.1101 - loss: 2.1408 - val_accuracy: 0.1154 - val_loss: 2.0746
Epoch 2/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - accuracy: 0.1573 - loss: 2.0885 - val_accuracy: 0.1192 - val_loss: 2.0928
Epoch 3/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - accuracy: 0.1527 - loss: 2.0576 - val_accuracy: 0.1346 - val_loss: 2.0657
Epoch 4/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step - accuracy: 0.1369 - loss: 2.0647 - val_accuracy: 0.1115 - val_loss: 2.0717
Epoch 5/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step - accuracy: 0.1680 - loss: 2.0638 - val_accuracy: 0.1500 - val_loss: 2.0798
Epoch 6/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - accuracy: 0.1561 - loss: 2.0601 - val_accuracy: 0.1192 - val_loss: 2.0715
Epoch 7/1000
[1m33/33

In [51]:
LSTM_model = get_model(X_train_)
LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("ser_lstm_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train_, y_train_, 
                       validation_data=(X_val_,y_val_), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


print(f"Loss : {LSTM_model.evaluate(X_test_,y_test_)[0]}, Accuracy : {LSTM_model.evaluate(X_test_,y_test_)[1]}")

Epoch 1/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 1s/step - accuracy: 0.1467 - loss: 2.0815 - val_accuracy: 0.1115 - val_loss: 2.0795
Epoch 2/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - accuracy: 0.1322 - loss: 2.0666 - val_accuracy: 0.1077 - val_loss: 2.0730
Epoch 3/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.1350 - loss: 2.0676 - val_accuracy: 0.1192 - val_loss: 2.0797
Epoch 4/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.1420 - loss: 2.0600 - val_accuracy: 0.1231 - val_loss: 2.0687
Epoch 5/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.1446 - loss: 2.0593 - val_accuracy: 0.1308 - val_loss: 2.0678
Epoch 6/1000
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - accuracy: 0.1392 - loss: 2.0516 - val_accuracy: 0.1038 - val_loss: 2.0715
Epoch 7/1000
[1m33/33[0m 

In [52]:
keras.models.save_model(LSTM_model, "LSTM_rocket.keras")