## Emotion recognition

Building emotion recognition model based on CREMA-D dataset and providing our own recordings to test its ability to generalise. 

In [18]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import models, layers, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers


### Convert wav to npy

In [19]:
def wav_to_npy(input_wav_path, output_npy_path):
    # Create the output directory if it doesn't exist
    os.makedirs(os.path.dirname(output_npy_path), exist_ok=True)

    # Load the audio file
    audio_data, _ = librosa.load(input_wav_path, sr=None)

    # Save the audio data as a NumPy array
    np.save(output_npy_path, audio_data)

# Example usage
input_wav_directory = 'AudioWAV/'
output_npy_directory = 'AudioNPY/'

# Iterate over all WAV files in the input directory
for filename in os.listdir(input_wav_directory):
    if filename.endswith(".wav"):
        input_wav_path = os.path.join(input_wav_directory, filename)
        output_npy_path = os.path.join(output_npy_directory, os.path.splitext(filename)[0] + '.npy')
        wav_to_npy(input_wav_path, output_npy_path)

In [20]:
def load_and_process_data(dataset_path):
    # Loading the CREMA-D dataset
    crema_directory_list = os.listdir(dataset_path)

    file_emotion = []
    file_path = []

    for file in crema_directory_list:
        # storing file paths
        file_path.append(dataset_path + file)
        # storing file emotions
        part = file.split('_')
        if part[2] == 'SAD':
            file_emotion.append('sad')
        elif part[2] == 'ANG':
            file_emotion.append('angry')
        elif part[2] == 'DIS':
            file_emotion.append('disgust')
        elif part[2] == 'FEA':
            file_emotion.append('fear')
        elif part[2] == 'HAP':
            file_emotion.append('happy')
        elif part[2] == 'NEU':
            file_emotion.append('neutral')
        else:
            file_emotion.append('Unknown')

    # Create a DataFrame for emotion of files
    emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

    # Create a DataFrame for the path of files
    path_df = pd.DataFrame(file_path, columns=['Path'])

    return path_df, emotion_df


# Set the path to the CREMA-D dataset
crema_path = "AudioWAV/"

# Load and process data
recordings_df, labels = load_and_process_data(crema_path)



### Loading .npy to dataframe

In [21]:

# Initialize empty lists to store .npy file data
npy_data_list = []

# Iterate through the directory to read each .npy file
for root, dirs, files in os.walk(output_npy_directory):
    for file in files:
        if file.endswith('.npy'):
            file_path = os.path.join(root, file)
            # Load .npy file
            npy_array = np.load(file_path)
            # Append data to the list
            npy_data_list.append(npy_array)

# Create a DataFrame with the .npy file data
data = {'Numpy_Data': npy_data_list}
npy_df = pd.DataFrame(data)

# Display the DataFrame
print(npy_df)

                                             Numpy_Data
0     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
1     [0.00091552734, 0.0013122559, 0.0015869141, 0....
2     [0.008026123, 0.008453369, 0.008361816, 0.0074...
3     [-0.004333496, -0.0032653809, -0.0030212402, -...
4     [0.0024108887, 0.001373291, 0.0013122559, 0.00...
...                                                 ...
7437  [0.00091552734, 0.0014038086, 0.001373291, 0.0...
7438  [-0.006958008, -0.0067749023, -0.0072021484, -...
7439  [-0.0022583008, -0.0018615723, -0.002319336, -...
7440  [-0.0016174316, -0.0010681152, -0.00076293945,...
7441  [0.0008544922, 0.0005187988, 0.0010986328, 0.0...

[7442 rows x 1 columns]


In [23]:
display(npy_df.shape)
display(labels.shape)

train_recordings, train_labels, test_recordings, test_labels = train_test_split(npy_df, labels, test_size=0.2, random_state=50)

# Define and compile the CNN model
model = keras.Sequential()

# Convolutional layers
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(62, 65, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Flatten the output for the fully connected layers
model.add(layers.Flatten())

# Dense layers
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))  # 10 is an example, adjust for your task

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_recordings, test_recordings, validation_data=(train_labels, test_labels), epochs=5, batch_size=32)

(7442, 1)

(7442, 1)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).