In [1]:
import os
import re
import sys
import librosa
from random import shuffle
import numpy as np
from typing import Tuple, Union
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

In [2]:
data_df = pd.read_csv("EMOVO_dataset/data.csv")

In [3]:
def get_max_min(files):
    min_, max_ = 100, 0
    for file in files:
        sound_file, samplerate = librosa.load(file)
        t = sound_file.shape[0] / samplerate
        if t < min_:
            min_ = t
        if t > max_:
            max_ = t

    return max_, min_

In [4]:
def extract(file,pad):
    X, sample_rate = librosa.load(file)
    max_ = X.shape[0] / sample_rate
    if pad:
        length = (max_ * sample_rate) - X.shape[0]
        X = np.pad(X, (0, int(length)), 'constant')
    
    stft = np.abs(librosa.stft(X))
    result = np.array([])

    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T, axis=0)
    result = np.hstack((result, mfccs))

    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma))
    
    mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0) 
    result = np.hstack((result, mel))
    
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, contrast))
    
    # tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    # result = np.hstack((result, tonnetz))
    return pd.DataFrame(result)

In [5]:
max, min = get_max_min('EMOVO_dataset/'+data_df.file_name)

In [55]:
u = extract('EMOVO_dataset/'+data_df.file_name[0], max)

In [56]:
u

Unnamed: 0,0
0,-407.066681
1,43.688465
2,0.330512
3,8.860258
4,9.041548
...,...
192,17.163601
193,16.742468
194,16.851534
195,16.808489


In [7]:
train_data = pd.DataFrame(columns=['filename', 'features', 'label'])

features = []
for index, file in zip(data_df.index, data_df.file_name):
    train_data.loc[index] = [file, extract('EMOVO_dataset/'+file, max), data_df.label[index]]

In [8]:
train_data

Unnamed: 0,filename,features,label
0,f1/dis-f1-b1.wav,0 0 -407.066681 1 43.68846...,disgust
1,f1/dis-f1-b2.wav,0 0 -406.009399 1 35.83183...,disgust
2,f1/dis-f1-b3.wav,0 0 -393.554535 1 58.40808...,disgust
3,f1/dis-f1-d1.wav,0 0 -395.404083 1 67.96301...,disgust
4,f1/dis-f1-d2.wav,0 0 -450.395935 1 69.13523...,disgust
...,...,...,...
583,m3/tri-m3-n1.wav,0 0 -442.870667 1 106.22269...,sadness
584,m3/tri-m3-n2.wav,0 0 -464.318817 1 81.76235...,sadness
585,m3/tri-m3-n3.wav,0 0 -493.679565 1 84.39754...,sadness
586,m3/tri-m3-n4.wav,0 0 -528.805054 1 75.34912...,sadness


In [22]:
import tensorflow as tf
from keras.utils import to_categorical

data_classes = (list((train_data["label"].unique())))
Y = to_categorical(list((train_data["label"].apply(data_classes.index))))
# X = pd.DataFrame(train_data["features"])

X = np.array(train_data["features"])

X = np.stack(train_data["features"])

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=22)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)

input_shape = (X_train.shape[1],1)

In [24]:
X.shape

(588, 197, 1)

In [25]:
import keras
model = keras.Sequential()
kernel_sizes = [5, 5]
model.add(keras.layers.Input(shape=input_shape))
for size in kernel_sizes:
    model.add(keras.layers.Conv1D(
        filters = 32,
        kernel_size = size,
        padding = 'same'
    )) 
    model.add(keras.layers.BatchNormalization(axis=-1))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(32))
model.add(keras.layers.BatchNormalization(axis = -1))
model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Dense(7, activation='softmax'))  # 分类层
optimzer = keras.optimizers.Adam(learning_rate= 0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimzer, metrics=['accuracy'])

In [26]:
from datetime import datetime  
name = datetime.now().strftime("models/ser_1d_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.01,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=256,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000


[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m1s[0m 1s/step - accuracy: 0.1602 - loss: 2.5228
Epoch 1: val_loss improved from inf to 3.71491, saving model to models/ser_1d_26_09_2024_15_17_47.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 222ms/step - accuracy: 0.1401 - loss: 2.5812 - val_accuracy: 0.0849 - val_loss: 3.7149
Epoch 2/1000
[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 36ms/step - accuracy: 0.1328 - loss: 2.3597
Epoch 2: val_loss improved from 3.71491 to 3.03836, saving model to models/ser_1d_26_09_2024_15_17_47.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 0.1341 - loss: 2.3719 - val_accuracy: 0.0943 - val_loss: 3.0384
Epoch 3/1000
[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 36ms/step - accuracy: 0.1562 - loss: 2.5953
Epoch 3: val_loss improved from 3.03836 to 2.72056, saving model to models/ser_1d_26_09_2024_15_17_47.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [14]:
from sktime.transformations.panel.rocket import Rocket

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=22)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=22)



In [15]:
print(X_train.shape)
trf = Rocket() 
trf.fit(X_train) 
X_train = trf.transform(X_train) 
print(X_train.shape)
X_val = trf.transform(X_val)
X_test = trf.transform(X_test) 

(423, 197, 1)
(423, 20000)


In [16]:
X_train = np.expand_dims(X_train,axis = 2)
X_val = np.expand_dims(X_val,axis = 2)
X_test = np.expand_dims(X_test,axis = 2)

In [17]:
import keras
model = keras.Sequential()
kernel_sizes = [5, 5]
model.add(keras.layers.Input(shape=(X_train.shape[1],1)))
for size in kernel_sizes:
    model.add(keras.layers.Conv1D(
        filters = 32,
        kernel_size = size,
        padding = 'same'
    ))  # 卷积层
    model.add(keras.layers.BatchNormalization(axis=-1))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(32))
model.add(keras.layers.BatchNormalization(axis = -1))
model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dropout(0.5))

model.add(keras.layers.Dense(7, activation='softmax'))  # 分类层
optimzer = keras.optimizers.Adam(learning_rate= 0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimzer, metrics=['accuracy'])

In [18]:
from datetime import datetime  
name = datetime.now().strftime("models/ser_rocket_%d_%m_%Y_%H_%M_%S.keras")  

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = name,
        save_best_only=True,
        verbose=1,
        monitor="val_loss"),

    keras.callbacks.EarlyStopping(  
        monitor="val_loss",
        min_delta=0.01,
        patience=10,
        verbose=1,
        mode="auto",
        restore_best_weights=True
    )
]


history = model.fit(X_train, y_train, 
                       validation_data=(X_val,y_val), 
                       batch_size=256,
                       epochs=1000,
                       callbacks=callbacks)


print(f"Loss : {model.evaluate(X_test,y_test)[0]}, Accuracy : {model.evaluate(X_test,y_test)[1]}")

Epoch 1/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.1396 - loss: 2.4651
Epoch 1: val_loss improved from inf to 3.83423, saving model to models/ser_rocket_26_09_2024_15_09_59.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step - accuracy: 0.1380 - loss: 2.4583 - val_accuracy: 0.1792 - val_loss: 3.8342
Epoch 2/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1130 - loss: 2.5742
Epoch 2: val_loss did not improve from 3.83423
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step - accuracy: 0.1116 - loss: 2.5806 - val_accuracy: 0.1792 - val_loss: 4.9840
Epoch 3/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1557 - loss: 2.5171
Epoch 3: val_loss did not improve from 3.83423
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step - accuracy: 0.1543 - loss: 2.5176 - val_accuracy: 0.1792 - val_loss: 4.9219
Epoch 4/1

In [49]:
data = X_test[0].reshape(1,-1,1)

In [53]:
import gc
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import cv2
import tensorflow as tf
import seaborn
import matplotlib.pyplot as plt
import matplotlib.collections as mcoll
import matplotlib as mpl
import numpy as np
import itertools
import logging
# Set random seed
np.random.seed(123)

def multicolored_lines(x,y,heatmap,title_name):
    fig, ax = plt.subplots()
    lc = colorline(x, y, heatmap,cmap='rainbow')
    plt.colorbar(lc)
    lc.set_linewidth(2)
    lc.set_alpha(0.8)
    plt.xlim(x.min(), x.max())
    plt.ylim(y.min(), y.max())
    plt.title(title_name)
    plt.grid(False)
    plt.show()

def colorline(x, y, heatmap,cmap='rainbow'):
    z = np.array(heatmap)
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    lc = mcoll.LineCollection(segments, array=z, cmap=cmap)
    ax = plt.gca()
    ax.add_collection(lc)
    return lc
def compute_cam_1d_output (model, data , layer_name , N):
        """
        model: The Deep Learning model
        data : A input data. Data shape has to be (n,1,1)
        layer_name : The target layer for explanation
        N: signal length in seconds
        """
        # input layer, model output layer and target layer
        grad_model = tf.keras.models.Model(inputs=[model.inputs],
                                           outputs=[model.get_layer(layer_name).output,model.output])     
        
        # Getting gradients of input layer, model output layer (predictions) and target layer
        with tf.GradientTape() as tape:
            inputs = np.expand_dims(data,axis=0)
            conv_outs, predictions = grad_model(inputs) 
            class_idx = tf.argmax(predictions[0])
            y_c = predictions[:, class_idx]

        batch_grads = tape.gradient(y_c, conv_outs) 
        grads = batch_grads[0]
        
        # First, second and third derivative of output gradient
        first = tf.exp(y_c) * grads
        second = tf.exp(y_c) * tf.pow(grads, 2)
        third = tf.exp(y_c) * tf.pow(grads, 3)
        
        # Compute salienty maps for the class_idx prediction
        global_sum = tf.reduce_sum(tf.reshape(conv_outs[0], shape=(-1, first.shape[1])), axis=0)
        alpha_num = second
        alpha_denom = second * 2.0 + third * tf.reshape(global_sum, shape=(1,1,first.shape[1]))
        alpha_denom = tf.where(alpha_denom != 0.0, alpha_denom, tf.ones(shape=alpha_denom.shape))
        alphas = alpha_num / alpha_denom
        weights = tf.maximum(first, 0.0)
        alpha_normalization_constant = tf.reduce_sum(tf.reduce_sum(alphas, axis=0), axis=0)
        alphas /= tf.reshape(alpha_normalization_constant, shape=(1,1,first.shape[1]))
        alphas_thresholding = np.where(weights, alphas, 0.0)

        alpha_normalization_constant = tf.reduce_sum(tf.reduce_sum(alphas_thresholding, axis=0),axis=0)
        alpha_normalization_constant_processed = tf.where(alpha_normalization_constant != 0.0, alpha_normalization_constant,
                                                          tf.ones(alpha_normalization_constant.shape))

        alphas /= tf.reshape(alpha_normalization_constant_processed, shape=(1,1,first.shape[1]))
        deep_linearization_weights = tf.reduce_sum(tf.reshape((weights*alphas), shape=(-1,first.shape[1])), axis=0)
        grad_CAM_map = tf.reduce_sum(deep_linearization_weights * conv_outs[0], axis=-1)
        
        # Normalization
        cam = np.maximum(grad_CAM_map, 0)
        cam = cam / np.max(cam)  
        
        # Turn result into a heatmap
        heatmap=[]
        heatmap.append(cam.tolist())
        big_heatmap = cv2.resize(np.array(heatmap), dsize=(data.shape[0], 500),interpolation=cv2.INTER_CUBIC)
        x = np.linspace(0, N, data.shape[0])
        plt.style.use("seaborn-whitegrid")
        multicolored_lines(x,np.array([i[0] for i in data]),big_heatmap[0],f"GradCAM ++ Visualization")

In [54]:
compute_cam_1d_output (model, data , "dense_6" , 3)

ValueError: The layer sequential_3 has never been called and thus has no defined output.