## Emotion recognition

Building emotion recognition model based on CREMA-D dataset and providing our own recordings to test its ability to generalise. 

In [None]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import models, layers, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
def load_and_process_data(dataset_path):
    # Loading the CREMA-D dataset
    crema_directory_list = os.listdir(dataset_path)

    file_emotion = []
    file_path = []

    for file in crema_directory_list:
        # storing file paths
        file_path.append(dataset_path + file)
        # storing file emotions
        part = file.split('_')
        if part[2] == 'SAD':
            file_emotion.append('sad')
        elif part[2] == 'ANG':
            file_emotion.append('angry')
        elif part[2] == 'DIS':
            file_emotion.append('disgust')
        elif part[2] == 'FEA':
            file_emotion.append('fear')
        elif part[2] == 'HAP':
            file_emotion.append('happy')
        elif part[2] == 'NEU':
            file_emotion.append('neutral')
        else:
            file_emotion.append('Unknown')

    # Create a DataFrame for emotion of files
    emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

    # Create a DataFrame for the path of files
    path_df = pd.DataFrame(file_path, columns=['Path'])
    crema_df = pd.concat([emotion_df, path_df], axis=1)

    # Convert the emotions to numerical format
    label_encoder = LabelEncoder()
    crema_df['Emotion_Label'] = label_encoder.fit_transform(crema_df['Emotions'])

    # Split the dataset into training and testing sets
    train_df, test_df = train_test_split(crema_df, test_size=0.2, random_state=42)

    return train_df, test_df, label_encoder


# Set the path to the CREMA-D dataset
crema_path = "AudioWAV/"

# Load and process data
train_df, test_df, label_encoder = load_and_process_data(crema_path)



In [None]:
def extract_features(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13), axis=1)
        return mfccs
    except Exception as e:
        print(f"Error encountered while processing {file_path}: {e}")
        return None

# Apply the feature extraction function to all audio files in the training set
X_train = np.array([extract_features(file_path) for file_path in train_df['Path'].tolist()])
y_train = train_df['Emotion_Label'].values

# Apply the feature extraction function to all audio files in the testing set
X_test = np.array([extract_features(file_path) for file_path in test_df['Path'].tolist()])
y_test = test_df['Emotion_Label'].values

# Build the neural network model
model = models.Sequential()
model.add(layers.Dense(256, input_shape=(X_train.shape[1],)))
model.add(layers.Activation('relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(128))
model.add(layers.Activation('relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train,
    y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.1
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")