In [10]:
import os
import re
import sys
import librosa
from random import shuffle
import numpy as np
from typing import Tuple, Union
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
import keras

from keras import models, layers
from tqdm import tqdm 

In [11]:
data_df = pd.read_csv("EMOVO_dataset/data.csv")

In [12]:
def get_max_min(files):
    min_, max_ = 100, 0
    for file in files:
        sound_file, samplerate = librosa.load(file)
        t = sound_file.shape[0] / samplerate
        if t < min_:
            min_ = t
        if t > max_:
            max_ = t

    return max_, min_

In [13]:
def extract_new(file,pad):
    X, sample_rate = librosa.load(file)
    max_ = X.shape[0] / sample_rate
    if max_ < pad:
        length = (pad * sample_rate) -  (X.shape[0] / sample_rate)
        X = librosa.util.pad_center(X, size = length, mode = 'constant')   
    stft = np.abs(librosa.stft(X, n_fft=662, hop_length=221))
    # result = np.array([])
    result = []

    # mfccs = np.mean(.T, axis=0)

    mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13, n_fft=662, hop_length=221)

    # result = np.hstack((result, mfccs))
    result.append((mfccs))

    # chroma = np.mean(.T, axis=0)
    chroma = librosa.feature.chroma_stft(S=stft, sr=sample_rate, n_fft=662, hop_length=221)
    # result = np.hstack((result, chroma))
    result.append(chroma)

    # mel = np.mean(.T, axis=0) 
    mel = librosa.feature.melspectrogram(y=X, sr=sample_rate, n_fft=662, hop_length=221)
    # result = np.hstack((result, mel))
    result.append(mel)
    
    # contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
    contrast = librosa.feature.spectral_contrast(S=stft, sr=sample_rate, n_fft=662, hop_length=221)
    # result = np.hstack((result, contrast))
    result.append(contrast)

    # tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    # result = np.hstack((result, tonnetz))
    return result

In [14]:
max, min = get_max_min('EMOVO_dataset/'+data_df.file_name)
train_data = pd.DataFrame(columns=['filename', 'features', 'label'])
output_length = int(np.ceil(max * (22050) / (221)))
features = []
for index, file in zip(data_df.index, data_df.file_name):
    train_data.loc[index] = [file, extract_new('EMOVO_dataset/'+file, max), data_df.label[index]]

In [15]:
X1 = np.empty((0, output_length))
X2 = np.empty((0, output_length))
X3 = np.empty((0, output_length))
X4 = np.empty((0, output_length))

for data in tqdm(train_data["features"]):
    X1 = np.vstack((X1, data[0]))
    X2 = np.vstack((X2, data[1]))
    X3 = np.vstack((X3, data[2]))
    X4 = np.vstack((X4, data[3]))

X1 = X1.reshape(len(data_df),output_length,-1)
X2 = X2.reshape(len(data_df),output_length,-1)
X3 = X3.reshape(len(data_df),output_length,-1)
X4 = X4.reshape(len(data_df),output_length,-1)

  0%|          | 0/588 [00:00<?, ?it/s]

100%|██████████| 588/588 [03:22<00:00,  2.90it/s]


In [8]:
data_classes = (list((train_data["label"].unique())))
Y = keras.utils.to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = np.stack(train_data["features"])
X = np.expand_dims(np.concatenate([X1,X2,X3,X4], axis=2),3)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=22)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)


In [24]:
def get_cnn(input_shape):
    model = keras.Sequential()

    model.add(keras.layers.Input(shape=input_shape))

    model.add(keras.layers.Conv2D(256, 3, activation='relu' ))
    model.add(keras.layers.MaxPooling2D(padding='same'))
    model.add(keras.layers.Dropout(rate=0.3))

    model.add(keras.layers.Conv2D(128, 3, activation='relu'))
    model.add(keras.layers.MaxPooling2D(padding='same'))
    model.add(keras.layers.Dropout(rate=0.3))


    model.add(keras.layers.Conv2D(64, 3, activation='relu'))
    model.add(keras.layers.MaxPooling2D(padding='same'))
    model.add(keras.layers.Dropout(rate=0.3))

    model.add(keras.layers.GlobalAveragePooling2D())
    model.add(keras.layers.Dense(1024, activation='relu'))
    
    model.add(keras.layers.Dense(256, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))

    model.add(keras.layers.Dense(7, activation='softmax'))

    optimzer = keras.optimizers.Adam()
    model.compile(loss='categorical_crossentropy', optimizer=optimzer, metrics=['accuracy'])

    return model

In [21]:
from datetime import datetime  
name = datetime.now().strftime("models/EMOVO_4feat_CNN_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=5,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

model = get_cnn((X_train.shape[1:]))


In [22]:
history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.1524 - loss: 1.9767 
Epoch 1: val_loss improved from inf to 1.94252, saving model to models/EMOVO_4feat_CNN_08_10_2024_10_25_41.keras
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 13s/step - accuracy: 0.1501 - loss: 1.9761 - val_accuracy: 0.1792 - val_loss: 1.9425
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.1578 - loss: 1.9429 
Epoch 2: val_loss did not improve from 1.94252
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m185s[0m 13s/step - accuracy: 0.1572 - loss: 1.9432 - val_accuracy: 0.1132 - val_loss: 1.9463
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.1801 - loss: 1.9464 
Epoch 3: val_loss did not improve from 1.94252
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 13s/step - accuracy: 0.1783 - loss: 1.9464 - val_accuracy: 0.1

In [24]:
import keras
from keras import layers, models

def get_model(input_shape):
    inputs = layers.Input(shape=input_shape)
    encoder = layers.LSTM(128)(inputs)
    drop = layers.Dropout(0.3)(encoder)
    hidden = layers.Dense(32, activation='relu')(drop)
    outputs = layers.Dense(7, activation='softmax')(hidden)
    
    model = models.Model(inputs, outputs)
    optimizer = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

    return model

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X.squeeze(3), Y, test_size=0.1, random_state=22)
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)
LSTM_model = get_model(X_train.shape[1:])
# LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("models/EMOVO_4feat_LSTM_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


Epoch 1/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 398ms/step - accuracy: 0.1173 - loss: 2.0442
Epoch 1: val_loss improved from inf to 1.89372, saving model to models/EMOVO_4feat_LSTM_08_10_2024_10_45_21.keras
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.1187 - loss: 2.0412 - val_accuracy: 0.2358 - val_loss: 1.8937
Epoch 2/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 406ms/step - accuracy: 0.2027 - loss: 1.9175
Epoch 2: val_loss improved from 1.89372 to 1.82492, saving model to models/EMOVO_4feat_LSTM_08_10_2024_10_45_21.keras
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 443ms/step - accuracy: 0.2015 - loss: 1.9177 - val_accuracy: 0.2264 - val_loss: 1.8249
Epoch 3/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 389ms/step - accuracy: 0.2127 - loss: 1.8922
Epoch 3: val_loss improved from 1.82492 to 1.79454, saving model to models/EMOVO_4feat_LSTM_08_10_2

In [29]:

LSTM_model = keras.models.load_model(name)
print(f"Loss : {LSTM_model.evaluate(X_test,y_test)[0]}, Accuracy : {LSTM_model.evaluate(X_test,y_test)[1]}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 242ms/step - accuracy: 0.2850 - loss: 5.3449
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 219ms/step - accuracy: 0.2850 - loss: 5.3449
Loss : 5.528655052185059, Accuracy : 0.2711864411830902


In [None]:
from sklearn.svm import SVC

X_train, X_test, y_train, y_test = train_test_split(X.squeeze(3), Y, test_size=0.1, random_state=22)

X_train = X_train.reshape(X_train.shape[0],-1)
X_test = X_test.reshape(X_test.shape[0],-1)

SVC_model = SVC(kernel = 'rbf', gamma = 'auto', probability = True, verbose=True)

SVC_history = SVC_model.fit(X_train, np.argmax(y_train,axis=-1))
SVC_model.score(X_test,np.argmax(y_test,axis=-1))

[LibSVM]

In [34]:
X[0].shape

(1397, 160, 1)

In [17]:
def obtain_df(X):
    X_df = pd.DataFrame(columns=['filename', 'features', 'label'])

    for index, file in tqdm(zip(data_df.index, data_df.file_name)):
        X_df.loc[index] = [file, pd.Series(X[index].flatten()), data_df.label[index]]
    X = pd.DataFrame(X_df["features"])
    return X

from sktime.transformations.panel.rocket import Rocket

def get_rocket(X):
    trf = Rocket(num_kernels=512) 
    trf.fit(X) 
    X_ = trf.transform(X)
    return X_


In [18]:
X1_ = get_rocket(obtain_df(X1)).to_numpy()


588it [00:02, 281.29it/s]


In [21]:
X2_ = get_rocket(obtain_df(X2)).to_numpy()
X3_ = get_rocket(obtain_df(X3)).to_numpy()
X4_ = get_rocket(obtain_df(X4)).to_numpy()


588it [00:01, 315.98it/s]
588it [00:08, 71.45it/s]
588it [00:00, 1635.87it/s]


In [22]:
import tensorflow as tf
from keras.utils import to_categorical

data_classes = (list((train_data["label"].unique())))
Y = to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = np.stack(train_data["features"])
X_ = np.hstack([X1_,X2_,X3_,X4_])

X_train_, X_test_, y_train_, y_test_ = train_test_split(X_, Y, test_size=0.1, random_state=22)
X_train_, X_val_, y_train_, y_val_ = train_test_split(X_train_, y_train_, test_size=0.2, random_state=22)


In [23]:
X_train.shape

(423, 1397, 160, 1)

In [27]:
from datetime import datetime  
name = datetime.now().strftime("models/EMOVO_rocket_CNN_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=5,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]

model = get_cnn((X_train.shape[1:]))

In [28]:
history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16s/step - accuracy: 0.1156 - loss: 2.0481 
Epoch 1: val_loss improved from inf to 1.95082, saving model to models/EMOVO_rocket_CNN_08_10_2024_12_30_32.keras
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 17s/step - accuracy: 0.1159 - loss: 2.0451 - val_accuracy: 0.1132 - val_loss: 1.9508
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15s/step - accuracy: 0.1393 - loss: 1.9446 
Epoch 2: val_loss improved from 1.95082 to 1.94794, saving model to models/EMOVO_rocket_CNN_08_10_2024_12_30_32.keras
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 15s/step - accuracy: 0.1398 - loss: 1.9447 - val_accuracy: 0.1226 - val_loss: 1.9479
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14s/step - accuracy: 0.1324 - loss: 1.9466 
Epoch 3: val_loss improved from 1.94794 to 1.94787, saving model to models/EMOVO_rocket_CNN_08_10_2024_

KeyboardInterrupt: 

# LSTM

In [27]:
from keras import layers, models
def get_model(X_train):
    inputs = layers.Input(shape=(X_train.shape[1],1))
    encoder = layers.LSTM(128)(inputs)
    drop = layers.Dropout(0.3)(encoder)
    hidden = layers.Dense(32, activation='relu')(drop)
    outputs = layers.Dense(7, activation='softmax')(hidden)
    
    model = models.Model(inputs, outputs)
    optimizer = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

    return model

In [28]:
LSTM_model = get_model(X_train)
LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("ser_lstm_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


print(f"Loss : {LSTM_model.evaluate(X_test,y_test)[0]}, Accuracy : {LSTM_model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 61ms/step - accuracy: 0.1444 - loss: 2.0246 - val_accuracy: 0.1226 - val_loss: 1.9662
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.1358 - loss: 1.9650 - val_accuracy: 0.1132 - val_loss: 1.9588
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.1250 - loss: 1.9794 - val_accuracy: 0.0755 - val_loss: 1.9632
Epoch 4/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step - accuracy: 0.1612 - loss: 1.9545 - val_accuracy: 0.0755 - val_loss: 1.9591
Epoch 5/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.1323 - loss: 1.9667 - val_accuracy: 0.1132 - val_loss: 1.9597
Epoch 6/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 0.1600 - loss: 1.9467 - val_accuracy: 0.1132 - val_loss: 1.9565
Epoch 7/1000
[1m14/14

In [29]:
LSTM_model = get_model(X_train_)
LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("ser_lstm_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train_, y_train_, 
                       validation_data=(X_val_,y_val_), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


print(f"Loss : {LSTM_model.evaluate(X_test_,y_test_)[0]}, Accuracy : {LSTM_model.evaluate(X_test_,y_test_)[1]}")

Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1s/step - accuracy: 0.1555 - loss: 1.9471 - val_accuracy: 0.1226 - val_loss: 1.9549
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.1704 - loss: 1.9410 - val_accuracy: 0.1132 - val_loss: 1.9488
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.1875 - loss: 1.9412 - val_accuracy: 0.1321 - val_loss: 1.9456
Epoch 4/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.1806 - loss: 1.9379 - val_accuracy: 0.1038 - val_loss: 1.9393
Epoch 5/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.2036 - loss: 1.9309 - val_accuracy: 0.1321 - val_loss: 1.9336
Epoch 6/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.1958 - loss: 1.9277 - val_accuracy: 0.1226 - val_loss: 1.9385
Epoch 7/1000
[1m14/14[0m 