In [23]:
import pandas as pd
import opensmile
from tqdm import tqdm
import keras
import numpy as np
from sklearn.model_selection import train_test_split
import librosa

In [24]:
def return_features(file,pad):
    X, sample_rate = librosa.load(file)
    max_ = X.shape[0] / sample_rate
    if max_ < pad:
        length = (pad * sample_rate) -  (X.shape[0] / sample_rate)
        X = librosa.util.pad_center(X, size = length, mode = 'constant')
    smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
    )
    y = smile.process_signal(X,sample_rate)
    #  smile.process_file(file, end=3)
    return y
def get_max_min(files):
    min_, max_ = 100, 0
    for file in files:
        sound_file, samplerate = librosa.load(file)
        t = sound_file.shape[0] / samplerate
        if t < min_:
            min_ = t
        if t >= max_:
            max_ = t

    return np.round(max_,2) + 0.01, min_

In [25]:
data_df = pd.read_csv("EMOVO_dataset/data.csv")

In [26]:
smile = opensmile.Smile(
feature_set=opensmile.FeatureSet.eGeMAPSv02,
feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
)


In [27]:
max, min = get_max_min('EMOVO_dataset/'+data_df.file_name)
u = return_features('EMOVO_dataset/'+data_df.file_name[0],max)

In [28]:
train_data = pd.DataFrame(columns=['filename', 'features', 'label'])
max, min = get_max_min('EMOVO_dataset/'+data_df.file_name)
features = []
for index, file in tqdm(zip(data_df.index, data_df.file_name)):
    train_data.loc[index] = [file, return_features('EMOVO_dataset/'+file,max), data_df.label[index]]

588it [02:23,  4.11it/s]


In [29]:
data_classes = (list((train_data["label"].unique())))
Y = keras.utils.to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = np.concatenate([X1,X2,X3,X4], axis=2)
X = np.expand_dims(np.stack(train_data["features"]),3)


X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=22)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)

In [32]:
def get_cnn(input_shape):
    model = keras.Sequential()

    model.add(keras.layers.Input(shape=input_shape))

    model.add(keras.layers.Conv2D(256, 3, activation='relu' ))
    model.add(keras.layers.MaxPooling2D(padding='same'))
    model.add(keras.layers.Dropout(rate=0.3))

    model.add(keras.layers.Conv2D(128, 3, activation='relu'))
    model.add(keras.layers.MaxPooling2D(padding='same'))
    model.add(keras.layers.Dropout(rate=0.3))


    model.add(keras.layers.Conv2D(64, 3, activation='relu'))
    model.add(keras.layers.MaxPooling2D(padding='same'))
    model.add(keras.layers.Dropout(rate=0.3))

    model.add(keras.layers.GlobalAveragePooling2D())
    model.add(keras.layers.Dense(1024, activation='relu'))
    
    model.add(keras.layers.Dense(256, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))

    model.add(keras.layers.Dense(7, activation='softmax'))

    optimzer = keras.optimizers.Adam()
    model.compile(loss='categorical_crossentropy', optimizer=optimzer, metrics=['accuracy'])

    return model

In [33]:
from datetime import datetime  
name = datetime.now().strftime("model/egemaps_feat/SER_EMOVO_Clean%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

model = get_cnn((X_train.shape[1:]))
# model.summary()

In [34]:
history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1169 - loss: 8.5663
Epoch 1: val_loss improved from inf to 2.05729, saving model to model/egemaps_feat/SER_EMOVO_Clean14_10_2024_11_20_50.keras
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.1175 - loss: 8.4354 - val_accuracy: 0.1509 - val_loss: 2.0573
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1448 - loss: 2.0661
Epoch 2: val_loss improved from 2.05729 to 1.95136, saving model to model/egemaps_feat/SER_EMOVO_Clean14_10_2024_11_20_50.keras
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.1454 - loss: 2.0634 - val_accuracy: 0.1509 - val_loss: 1.9514
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1321 - loss: 1.9555
Epoch 3: val_loss improved from 1.95136 to 1.95018, saving model to model/egemaps_feat/SER_EM

In [35]:
import keras
from keras import layers, models

def get_model(input_shape):
    inputs = layers.Input(shape=input_shape)
    encoder = layers.LSTM(128)(inputs)
    drop = layers.Dropout(0.3)(encoder)
    hidden = layers.Dense(32, activation='relu')(drop)
    outputs = layers.Dense(7, activation='softmax')(hidden)
    
    model = models.Model(inputs, outputs)
    optimizer = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

    return model

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X.squeeze(3), Y, test_size=0.1, random_state=22)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)
LSTM_model = get_model(X_train.shape[1:])
# LSTM_model.summary()



In [39]:

from datetime import datetime  
name = datetime.now().strftime("models/egemaps_feat/EMOVO_LSTM_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 326ms/step - accuracy: 0.1428 - loss: 2.0416
Epoch 1: val_loss improved from inf to 1.97266, saving model to models/egemaps_feat/EMOVO_LSTM_14_10_2024_12_12_27.keras
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 376ms/step - accuracy: 0.1440 - loss: 2.0381 - val_accuracy: 0.1038 - val_loss: 1.9727
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 333ms/step - accuracy: 0.1514 - loss: 1.9932
Epoch 2: val_loss did not improve from 1.97266
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 370ms/step - accuracy: 0.1514 - loss: 1.9929 - val_accuracy: 0.0755 - val_loss: 1.9784
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334ms/step - accuracy: 0.1757 - loss: 1.9437
Epoch 3: val_loss improved from 1.97266 to 1.97100, saving model to models/egemaps_feat/EMOVO_LSTM_14_10_2024_12_12_27.keras
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━

In [40]:
print(f"Loss : {LSTM_model.evaluate(X_test,y_test)[0]}, Accuracy : {LSTM_model.evaluate(X_test,y_test)[1]}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step - accuracy: 0.1312 - loss: 1.9429
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - accuracy: 0.1312 - loss: 1.9429
Loss : 1.9463363885879517, Accuracy : 0.11864406615495682


In [None]:
from sklearn.svm import SVC

X_train, X_test, y_train, y_test = train_test_split(X.squeeze(3), Y, test_size=0.1, random_state=22)

X_train = X_train.reshape(X_train.shape[0],-1)
X_test = X_test.reshape(X_test.shape[0],-1)

SVC_model = SVC(kernel = 'rbf', gamma = 'auto', probability = True, verbose=True)

SVC_history = SVC_model.fit(X_train, np.argmax(y_train,axis=-1))
SVC_model.score(X_test,np.argmax(y_test,axis=-1))

[LibSVM]

In [4]:
data_df = pd.read_csv("RAVDESS/data.csv")
data_df.drop(data_df[data_df['label'] == "calm"].index, inplace = True)
data_df = data_df.reset_index()

In [5]:
train_data = pd.DataFrame(columns=['filename', 'features', 'label'])
max, min = get_max_min('RAVDESS/'+data_df.file_name)
features = []
for index, file in tqdm(zip(data_df.index, data_df.file_name)):
    train_data.loc[index] = [file, return_features('RAVDESS/'+file,max), data_df.label[index]]

1248it [01:06, 18.84it/s]


In [6]:
data_classes = (list((train_data["label"].unique())))
Y = keras.utils.to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = np.concatenate([X1,X2,X3,X4], axis=2)
X = np.expand_dims(np.stack(train_data["features"]),3)


X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=22)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)

In [10]:
from datetime import datetime  
name = datetime.now().strftime("model/emobase_feat/CNN_RAVDESS_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

model = get_cnn((X_train.shape[1:]))
# model.summary()

In [11]:
history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m28/29[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 392ms/step - accuracy: 0.1456 - loss: 2.0663
Epoch 1: val_loss improved from inf to 1.91414, saving model to model/emobase_feat/CNN_RAVDESS_08_10_2024_11_56_27.keras
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 415ms/step - accuracy: 0.1453 - loss: 2.0610 - val_accuracy: 0.1778 - val_loss: 1.9141
Epoch 2/1000
[1m28/29[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 466ms/step - accuracy: 0.1497 - loss: 1.9405
Epoch 2: val_loss improved from 1.91414 to 1.91393, saving model to model/emobase_feat/CNN_RAVDESS_08_10_2024_11_56_27.keras
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 485ms/step - accuracy: 0.1499 - loss: 1.9394 - val_accuracy: 0.1511 - val_loss: 1.9139
Epoch 3/1000
[1m28/29[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 509ms/step - accuracy: 0.1954 - loss: 1.9153
Epoch 3: val_loss did not improve from 1.91393
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━

In [106]:
model.evaluate(X,Y)

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 252ms/step - accuracy: 0.1483 - loss: 1.9782


[1.9752070903778076, 0.1428571492433548]

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X.squeeze(3), Y, test_size=0.1, random_state=22)
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)
LSTM_model = get_model(X_train.shape[1:])
# LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("models/RAVDESS_emobase_LSTM_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


Epoch 1/1000
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - accuracy: 0.1648 - loss: 1.9710
Epoch 1: val_loss improved from inf to 1.94001, saving model to models/RAVDESS_emobase_LSTM_08_10_2024_12_26_03.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 145ms/step - accuracy: 0.1647 - loss: 1.9707 - val_accuracy: 0.1733 - val_loss: 1.9400
Epoch 2/1000
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step - accuracy: 0.1683 - loss: 1.9337
Epoch 2: val_loss improved from 1.94001 to 1.91303, saving model to models/RAVDESS_emobase_LSTM_08_10_2024_12_26_03.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 168ms/step - accuracy: 0.1684 - loss: 1.9336 - val_accuracy: 0.1911 - val_loss: 1.9130
Epoch 3/1000
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step - accuracy: 0.1732 - loss: 1.9284
Epoch 3: val_loss did not improve from 1.91303
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [15]:
LSTM_model = keras.models.load_model(name)

print(f"Loss : {LSTM_model.evaluate(X_test,y_test)[0]}, Accuracy : {LSTM_model.evaluate(X_test,y_test)[1]}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 89ms/step - accuracy: 0.4497 - loss: 3.0904
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 115ms/step - accuracy: 0.4497 - loss: 3.0904
Loss : 3.1126747131347656, Accuracy : 0.40799999237060547


In [None]:
from sklearn.svm import SVC

X_train, X_test, y_train, y_test = train_test_split(X.squeeze(3), Y, test_size=0.1, random_state=22)

X_train = X_train.reshape(X_train.shape[0],-1)
X_test = X_test.reshape(X_test.shape[0],-1)

SVC_model = SVC(kernel = 'rbf', gamma = 'auto', probability = True, verbose=True)

SVC_history = SVC_model.fit(X_train, np.argmax(y_train,axis=-1))
SVC_model.score(X_test,np.argmax(y_test,axis=-1))