Imports

In [None]:
import sys
from tempfile import TemporaryFile

import joblib
import pandas as pd
import numpy as np
import os
import numpy
import tensorflow
import keras
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import librosa.display
import warnings
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Conv1D, Flatten, Activation, MaxPooling1D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings('ignore')

###SEED###
from numpy.random import seed

seed(1)
tensorflow.random.set_seed(1)

Functions

In [None]:
def load_dataset():
    paths = []
    testpaths = []
    testlabels = []
    terminator = 'D:/Uni/19.Master/Daten/terminator.wav'
    print(sys.executable)
    labels = []
    # for dirname, _, filenames in os.walk('Daten/TESS Toronto emotional speech set data'):
    # D:\Uni\19.Master\DATEN
    for dirname, _, filenames in os.walk('/content/drive/My Drive/Colab Notebooks/data'):
        for filename in filenames:
            label = filename.split('_')[-1]
            label = label.split('.')[0]
            if (label != 'neutral'):
                labels.append(label.lower())
                paths.append(os.path.join(dirname, filename))
    for dirname, _, filenames in os.walk('D:/Uni/19.Master/Daten/Stimuli_Intensitätsmorphs'):
        for filename in filenames:

            intens = filename.split('_')[-2]
            emot = filename.split('_')[1]
            label = emot
            match label:
                case 'ang':
                    label = 'angry'
                case 'dis':
                    label = 'disgust'
                case 'fea':
                    label = 'fear'
                case 'hap':
                    label = 'happy'
                case 'sad':
                    label = 'sad'
                case 'sur':
                    label = 'ps'
            if (emot != 'ple'):
                testpaths.append(os.path.join(dirname, filename))
                testlabels.append(label.lower())
    com_labels = testlabels + labels
    com_paths = testpaths + paths
    print(testlabels)
    print(testpaths)
    print('Dataset is loaded')
    return paths, labels, testpaths, testlabels


def waveplot(data, sr, emotion):
    plt.figure(figsize=(10, 4))
    plt.title(emotion, size=20)
    librosa.display.waveshow(data, sr=sr)
    plt.show()


def spectrogramm(data, sr, emotion):
    fourier = librosa.stft(data)
    fourierdb = librosa.amplitude_to_db(abs(fourier))
    plt.figure(figsize=(10, 4))
    plt.title(emotion, size=20)
    librosa.display.specshow(fourierdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()


def extract_mfcc(filename, layer):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=layer).T, axis=0)
    return mfcc


def create_new_model(layer):
    model = Sequential([
        LSTM(123, return_sequences=False, input_shape=(layer, 1)),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(6, activation='softmax')
    ])

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()
    return model


def create_new_cnnmodelsea(layer):
    model = Sequential()

    model.add(Conv1D(256, 5, padding='same',
                     input_shape=(layer, 1)))
    model.add(Activation('relu'))
    model.add(Conv1D(128, 5, padding='same'))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    model.add(MaxPooling1D(pool_size=(8)))
    model.add(Conv1D(128, 5, padding='same', ))
    model.add(Activation('relu'))
    # model.add(Conv1D(128, 5,padding='same',))
    # model.add(Activation('relu'))
    # model.add(Conv1D(128, 5,padding='same',))
    # model.add(Activation('relu'))
    # model.add(Dropout(0.2))
    model.add(Conv1D(128, 5, padding='same', ))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(6))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer="Adam", metrics=['accuracy'])
    model.summary()
    return model


def create_new_cnnmodel(layer):
    # Build sequential CNN
    CNN_model = Sequential()

    # Build first layer
    CNN_model.add(Conv1D(16, 5, padding='same',
                         input_shape=(layer, 1), activation='relu'))

    # Build second layer
    CNN_model.add(Conv1D(32, 5, padding='same', activation='relu'))

    # Build third layer
    #CNN_model.add(Conv1D(64, 5, padding='same', activation='relu'))

    # Build forth layer
    CNN_model.add(Conv1D(128, 5, padding='same', activation='relu'))

    # Add dropout
    CNN_model.add(Dropout(0.1))

    # Flatten
    CNN_model.add(Flatten())

    CNN_model.add(Dense(128, activation='relu'))
    CNN_model.add(Dropout(0.1))
    CNN_model.add(Dense(64, activation='relu'))
    CNN_model.add(Dense(6, activation='softmax'))
    CNN_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    CNN_model.summary()
    return CNN_model


def extract_mfcc_verbose(layer, filename, offset, duration):
    y, sr = librosa.load(filename, duration=duration, offset=offset)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=layer).T, axis=0)
    return mfcc


def plot_accuracy(history, len):
    epochs = list(range(len))
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    plt.plot(epochs, acc, label='train accuracy')
    plt.plot(epochs, val_acc, label='val accuracy')
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.legend()
    plt.show()


def plot_loss(history, len):
    epochs = list(range(len))
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.plot(epochs, loss, label='train loss')
    plt.plot(epochs, val_loss, label='val loss')
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.legend()
    plt.show()


def normalize_data(datatrain, datatest):
    scaler = StandardScaler()
    scaler.fit(datatrain)
    data_scaled_train = scaler.transform(datatrain)
    data_scaled_test = scaler.transform(datatest)
    return data_scaled_train, data_scaled_test

Load data

In [None]:
layer = 40
trainpaths, trainlabels, testpaths, testlabels = load_dataset()


###create dataframes for training and testing###
trainDF = pd.DataFrame()
trainDF["speech"] = trainpaths
trainDF["label"] = trainlabels
testDF = pd.DataFrame()
testDF["speech"] = testpaths
testDF["label"] = testlabels

### extracting features mfccs ###
#train_mfccs = trainDF["speech"].apply(lambda x: extract_mfcc_verbose(x, 0, 0.8))
#test_mfccs = testDF["speech"].apply(lambda x: extract_mfcc_verbose(x, 0, 0.8))

#train_mfccs.to_csv("train_mfccs.csv")
#test_mfccs.to_csv("test_mfccs.csv")

### make model ###

model = create_new_cnnmodel(layer=layer)

# train_shaped = [x for x in train_mfccs]
#train_shaped = np.array(train_shaped)

#np.save("train_shaped_mfccs.npy",train_shaped)
train_shaped = np.load("train_shaped_mfccs.npy")


#test_shaped = [x for x in test_mfccs]
#test_shaped = np.array(test_shaped)

#np.save("test_shaped_mfccs.npy", test_shaped)
test_shaped = np.load("test_shaped_mfccs.npy")

### normalizing ###
scaler = StandardScaler()
scaler.fit(train_shaped)
data_scaled_train = scaler.transform(train_shaped)
data_scaled_test = scaler.transform(test_shaped)

train_shaped = np.expand_dims(data_scaled_train, -1)
test_shaped = np.expand_dims(data_scaled_test, -1)

from sklearn.preprocessing import OneHotEncoder, StandardScaler

enc = OneHotEncoder()
y = enc.fit_transform(trainDF[['label']])
y = y.toarray()

y_test2 = enc.fit_transform(testDF[['label']])
y_test2 = y_test2.toarray()

###split train and test data###
X_train, X_test, y_train, y_test = train_test_split(train_shaped, y, test_size=0.2, random_state=1)

### training model ###
epochs = 25
history = model.fit(X_train, y_train, validation_split=0.2, epochs=epochs, batch_size=50, shuffle=True,
                    validation_data=(X_test, y_test))

### save the model ###
# filename = "Completed Cnn model.joblib"
# joblib.dump(model, filename)

### plotting accuracy and loss ###
plot_accuracy(history, epochs)
plot_loss(history, epochs)

predicted_test = model.predict(test_shaped)

testres = pd.DataFrame()
pred_emotions_for_test = []
i = -1
for val in predicted_test:
    i = i + 1
    maxima = max(val)
    match np.argmin(val):
        case 0:
            pred_emotions_for_test.append(f'angy {maxima} original: {testlabels[i]}')
        case 1:
            pred_emotions_for_test.append(f'disgust{maxima} original: {testlabels[i]}')
        case 2:
            pred_emotions_for_test.append(f'fear{maxima} original: {testlabels[i]}')
        case 3:
            pred_emotions_for_test.append(f'happy{maxima} original: {testlabels[i]}')
        case 4:
            pred_emotions_for_test.append(f'neutral{maxima} original: {testlabels[i]}')
        case 5:
            pred_emotions_for_test.append(f'pleas_surprise{maxima} original: {testlabels[i]}')
        case 6:
            pred_emotions_for_test.append(f'sad{maxima} original: {testlabels[i]}')
# testlabels
pred_right = []
pred_wrong = []
for val in pred_emotions_for_test:
    if val.split(' ')[0][0] == val.split(' ')[-1][0]:
        pred_right.append(1)
    else:
        pred_wrong.append(1)
print(f"right+{len(pred_right)}")
print(f"wrong+{len(pred_wrong)}")