In [1]:
import os
import re
import sys
import librosa
from random import shuffle
import numpy as np
from typing import Tuple, Union
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
import keras

from tqdm import tqdm 

In [2]:
data_df = pd.read_csv("EMOVO_dataset/data.csv")

In [3]:
def get_max_min(files):
    min_, max_ = 100, 0
    for file in files:
        sound_file, samplerate = librosa.load(file)
        t = sound_file.shape[0] / samplerate
        if t < min_:
            min_ = t
        if t > max_:
            max_ = t

    return max_, min_

In [4]:
def extract_new(file,pad):
    X, sample_rate = librosa.load(file)
    max_ = X.shape[0] / sample_rate
    if max_ < pad:
        length = (pad * sample_rate) - X.shape[0]
        X = np.pad(X, (0, int(length)), 'constant')
    
    stft = np.abs(librosa.stft(X))
    # result = np.array([])
    result = []

    # mfccs = np.mean(.T, axis=0)

    mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50)

    # result = np.hstack((result, mfccs))
    result.append((mfccs))

    # chroma = np.mean(.T, axis=0)
    chroma = librosa.feature.chroma_stft(S=stft, sr=sample_rate)
    # result = np.hstack((result, chroma))
    result.append(chroma)

    # mel = np.mean(.T, axis=0) 
    mel = librosa.feature.melspectrogram(y=X, sr=sample_rate)
    # result = np.hstack((result, mel))
    result.append(mel)
    
    # contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
    contrast = librosa.feature.spectral_contrast(S=stft, sr=sample_rate)
    # result = np.hstack((result, contrast))
    result.append(contrast)

    # tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    # result = np.hstack((result, tonnetz))
    return result

In [5]:
max, min = get_max_min('EMOVO_dataset/'+data_df.file_name)
u = extract_new('EMOVO_dataset/'+data_df.file_name[0], max)

In [6]:
train_data = pd.DataFrame(columns=['filename', 'features', 'label'])

features = []
for index, file in zip(data_df.index, data_df.file_name):
    train_data.loc[index] = [file, extract_new('EMOVO_dataset/'+file, max), data_df.label[index]]

In [7]:
output_length = int(np.ceil(max * (22050) / (512)))

X1 = np.empty((0, output_length))
X2 = np.empty((0, output_length))
X3 = np.empty((0, output_length))
X4 = np.empty((0, output_length))

for data in tqdm(train_data["features"]):
    X1 = np.vstack((X1, data[0]))
    X2 = np.vstack((X2, data[1]))
    X3 = np.vstack((X3, data[2]))
    X4 = np.vstack((X4, data[3]))

X1 = X1.reshape(len(data_df),output_length,-1)
X2 = X2.reshape(len(data_df),output_length,-1)
X3 = X3.reshape(len(data_df),output_length,-1)
X4 = X4.reshape(len(data_df),output_length,-1)

In [18]:
data_classes = (list((train_data["label"].unique())))
Y = keras.utils.to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = np.stack(train_data["features"])
X = np.expand_dims(np.concatenate([X1,X2,X3,X4], axis=2),3)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=22)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)


In [9]:
import keras
model = keras.Sequential()
kernel_sizes = [5, 5]
model.add(keras.layers.Input(shape=(X_train.shape[1],1)))
for size in kernel_sizes:
    model.add(keras.layers.Conv1D(
        filters = 32,
        kernel_size = size,
        padding = 'same'
    ))  # 卷积层
    model.add(keras.layers.BatchNormalization(axis=-1))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(32))
model.add(keras.layers.BatchNormalization(axis = -1))
model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Dense(7, activation='softmax'))  # 分类层
optimzer = keras.optimizers.Adam(learning_rate= 0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimzer, metrics=['accuracy'])

In [10]:
from datetime import datetime  
name = datetime.now().strftime("ser_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]



history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=256,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 179ms/step - accuracy: 0.1210 - loss: 2.4292 - val_accuracy: 0.1226 - val_loss: 4.0383
Epoch 2/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.1137 - loss: 2.4334 - val_accuracy: 0.1226 - val_loss: 3.6698
Epoch 3/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.1661 - loss: 2.3374 - val_accuracy: 0.1509 - val_loss: 3.2965
Epoch 4/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.1663 - loss: 2.2875 - val_accuracy: 0.1509 - val_loss: 2.9396
Epoch 5/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.1789 - loss: 2.2737 - val_accuracy: 0.1509 - val_loss: 2.6035
Epoch 6/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.2035 - loss: 2.2103 - val_accuracy: 0.1509 - val_loss: 2.3651
Epoch 7/1000
[1m2/2[0m [32m━━━

In [11]:
def obtain_df(X):
    X_df = pd.DataFrame(columns=['filename', 'features', 'label'])

    for index, file in zip(data_df.index, data_df.file_name):
        X_df.loc[index] = [file, pd.Series(X[index]), data_df.label[index]]
    X = pd.DataFrame(X_df["features"])
    return X

from sktime.transformations.panel.rocket import Rocket

def get_rocket(X):
    trf = Rocket(num_kernels=512) 
    trf.fit(X) 
    X_ = trf.transform(X)
    return X_


In [21]:
X1_ = get_rocket(obtain_df(X1)).to_numpy()
X2_ = get_rocket(obtain_df(X2)).to_numpy()
X3_ = get_rocket(obtain_df(X3)).to_numpy()
X4_ = get_rocket(obtain_df(X4)).to_numpy()


In [22]:

import tensorflow as tf
from keras.utils import to_categorical

data_classes = (list((train_data["label"].unique())))
Y = to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = np.stack(train_data["features"])
X_ = np.hstack([X1_,X2_,X3_,X4_])

X_train_, X_test_, y_train_, y_test_ = train_test_split(X_, Y, test_size=0.1, random_state=22)
X_train_, X_val_, y_train_, y_val_ = train_test_split(X_train_, y_train_, test_size=0.2, random_state=22)


In [16]:
import keras
model = keras.Sequential()
kernel_sizes = [5, 5]
model.add(keras.layers.Input(shape=(X_train_.shape[1],1)))
for size in kernel_sizes:
    model.add(keras.layers.Conv1D(
        filters = 32,
        kernel_size = size,
        padding = 'same'
    ))  # 卷积层
    model.add(keras.layers.BatchNormalization(axis=-1))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(32))
model.add(keras.layers.BatchNormalization(axis = -1))
model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Dense(7, activation='softmax')) 
optimzer = keras.optimizers.Adam(learning_rate= 0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimzer, metrics=['accuracy'])

In [17]:
from datetime import datetime  
name = datetime.now().strftime("ser_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=40,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]



history = model.fit(X_train_, y_train_, 
                       validation_data=(X_val_,y_val_), 
                       batch_size=256,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test_,y_test_)[0]}, Accuracy : {model.evaluate(X_test_,y_test_)[1]}")

Epoch 1/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 439ms/step - accuracy: 0.1339 - loss: 2.5274 - val_accuracy: 0.1792 - val_loss: 8.9938
Epoch 2/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 315ms/step - accuracy: 0.2483 - loss: 2.0211 - val_accuracy: 0.1792 - val_loss: 15.2443
Epoch 3/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 307ms/step - accuracy: 0.2677 - loss: 1.9988 - val_accuracy: 0.1792 - val_loss: 15.7616
Epoch 4/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 322ms/step - accuracy: 0.3321 - loss: 1.7911 - val_accuracy: 0.1792 - val_loss: 13.8166
Epoch 5/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 307ms/step - accuracy: 0.3609 - loss: 1.7095 - val_accuracy: 0.1887 - val_loss: 11.2275
Epoch 6/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 319ms/step - accuracy: 0.3316 - loss: 1.7135 - val_accuracy: 0.1887 - val_loss: 9.3695
Epoch 7/1000
[1m2/2[0m

# LSTM

In [27]:
from keras import layers, models
def get_model(X_train):
    inputs = layers.Input(shape=(X_train.shape[1],1))
    encoder = layers.LSTM(128)(inputs)
    drop = layers.Dropout(0.3)(encoder)
    hidden = layers.Dense(32, activation='relu')(drop)
    outputs = layers.Dense(7, activation='softmax')(hidden)
    
    model = models.Model(inputs, outputs)
    optimizer = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

    return model

In [28]:
LSTM_model = get_model(X_train)
LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("ser_lstm_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


print(f"Loss : {LSTM_model.evaluate(X_test,y_test)[0]}, Accuracy : {LSTM_model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 61ms/step - accuracy: 0.1444 - loss: 2.0246 - val_accuracy: 0.1226 - val_loss: 1.9662
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.1358 - loss: 1.9650 - val_accuracy: 0.1132 - val_loss: 1.9588
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.1250 - loss: 1.9794 - val_accuracy: 0.0755 - val_loss: 1.9632
Epoch 4/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step - accuracy: 0.1612 - loss: 1.9545 - val_accuracy: 0.0755 - val_loss: 1.9591
Epoch 5/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.1323 - loss: 1.9667 - val_accuracy: 0.1132 - val_loss: 1.9597
Epoch 6/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 0.1600 - loss: 1.9467 - val_accuracy: 0.1132 - val_loss: 1.9565
Epoch 7/1000
[1m14/14

In [29]:
LSTM_model = get_model(X_train_)
LSTM_model.summary()


from datetime import datetime  
name = datetime.now().strftime("ser_lstm_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.001,
        patience=20,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


LSTM_history = LSTM_model.fit(X_train_, y_train_, 
                       validation_data=(X_val_,y_val_), 
                       batch_size=32,
                       epochs=1000,
                       verbose=1,
                       callbacks=callbacks)


print(f"Loss : {LSTM_model.evaluate(X_test_,y_test_)[0]}, Accuracy : {LSTM_model.evaluate(X_test_,y_test_)[1]}")

Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1s/step - accuracy: 0.1555 - loss: 1.9471 - val_accuracy: 0.1226 - val_loss: 1.9549
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.1704 - loss: 1.9410 - val_accuracy: 0.1132 - val_loss: 1.9488
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.1875 - loss: 1.9412 - val_accuracy: 0.1321 - val_loss: 1.9456
Epoch 4/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.1806 - loss: 1.9379 - val_accuracy: 0.1038 - val_loss: 1.9393
Epoch 5/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.2036 - loss: 1.9309 - val_accuracy: 0.1321 - val_loss: 1.9336
Epoch 6/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.1958 - loss: 1.9277 - val_accuracy: 0.1226 - val_loss: 1.9385
Epoch 7/1000
[1m14/14[0m 