## Emotion recognition

Building emotion recognition model based on CREMA-D dataset and providing our own recordings to test its ability to generalise. 

In [33]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import models, layers, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from keras.utils import to_categorical



### Convert wav to npy

In [12]:
import os
import librosa
import numpy as np

def wav_to_npy(input_wav_path):
    # Load audio data
    audio_data, _ = librosa.load(input_wav_path, sr=None)

    return audio_data

def save_all_to_single_npy(directory, output_npy_path, target_length=None):
    X = []

    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            input_wav_path = os.path.join(directory, filename)
            loaded_data = wav_to_npy(input_wav_path)

            # Ensure that the array has the target length by padding or truncating
            if target_length is not None and len(loaded_data) != target_length:
                if len(loaded_data) < target_length:
                    # Pad with zeros if too short
                    loaded_data = np.pad(loaded_data, (0, target_length - len(loaded_data)))
                else:
                    # Truncate if too long
                    loaded_data = loaded_data[:target_length]

            X.append(loaded_data)

    # Save the entire list as a single NumPy file
    np.save(output_npy_path, np.array(X))

# Example usage
input_wav_directory = 'AudioWAV/'
output_single_npy_path = 'all_data.npy'
target_length = 10000  # Replace with your desired length

save_all_to_single_npy(input_wav_directory, output_single_npy_path, target_length=target_length)

# Load the single NumPy file
X = np.load(output_single_npy_path)

# Now X contains the feature matrix of all WAV files
print(X.shape)


(7442, 10000)


### Load CREMA-D

In [14]:
def load_and_process_data(dataset_path):
    # Loading the CREMA-D dataset
    crema_directory_list = os.listdir(dataset_path)

    file_emotion = []
    file_path = []

    for file in crema_directory_list:
        # storing file paths
        file_path.append(dataset_path + file)
        # storing file emotions
        part = file.split('_')
        if part[2] == 'SAD':
            file_emotion.append('sad')
        elif part[2] == 'ANG':
            file_emotion.append('angry')
        elif part[2] == 'DIS':
            file_emotion.append('disgust')
        elif part[2] == 'FEA':
            file_emotion.append('fear')
        elif part[2] == 'HAP':
            file_emotion.append('happy')
        elif part[2] == 'NEU':
            file_emotion.append('neutral')
        else:
            file_emotion.append('Unknown')

    # Create a DataFrame for emotion of files
    emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

    # Create a DataFrame for the path of files
    path_df = pd.DataFrame(file_path, columns=['Path'])

    return path_df, emotion_df


# Set the path to the CREMA-D dataset
crema_path = "AudioWAV/"

# Load and process data
recordings_df, labels_df = load_and_process_data(crema_path)



### Model Development

In [17]:
def load_npy_files(directory):
    X = []
    for filename in os.listdir(directory):
        if filename.endswith(".npy"):
            npy_path = os.path.join(directory, filename)
            loaded_data = np.load(npy_path)
            X.append(loaded_data)
    return np.array(X)

In [20]:
def encode_labels(emotion_labels):
    unique_labels = np.unique(emotion_labels)
    label_to_index = {label: i for i, label in enumerate(unique_labels)}
    encoded_labels = [label_to_index[label] for label in emotion_labels]
    return np.array(encoded_labels)

# Load data
#X = load_npy_files('AudioNPY/')
# Encode emotion labels
Y = encode_labels(labels['Emotions'])

display(X.shape)
display(Y.shape)

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=50)

# Define and compile the CNN model
model = keras.Sequential()

# Convolutional layers
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(10000)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Flatten the output for the fully connected layers
model.add(layers.Flatten())

# Dense layers
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))  # 10 is an example, adjust for your task

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=32)



(7442, 10000)

(7442,)

TypeError: 'int' object is not iterable

In [35]:

# Assuming your labels are already encoded as integers
Y = encode_labels(labels['Emotions'])

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=50)

# Define the transformer model using the functional API
inputs = layers.Input(shape=(10000,))  # Adjust the input shape based on your audio sequence length

# Reshape to add a time axis
query = layers.Reshape((1, -1))(inputs)
key = layers.Reshape((1, -1))(inputs)
value = layers.Reshape((1, -1))(inputs)

# Apply Permute to swap dimensions
query = layers.Permute((2, 1))(query)
key = layers.Permute((2, 1))(key)
value = layers.Permute((2, 1))(value)

# Apply MultiHeadAttention layer
attention_output = layers.MultiHeadAttention(num_heads=8, key_dim=64, dropout=0.1)(
    query, key, value
)
attention_output = layers.Dropout(0.1)(attention_output)
attention_output = layers.LayerNormalization(epsilon=1e-6)(attention_output)

# Flatten the output
attention_output = layers.Flatten()(attention_output)

# MLP (Multi-Layer Perceptron) block
mlp_output = layers.Dense(128, activation="relu")(attention_output)
mlp_output = layers.Dropout(0.1)(mlp_output)
mlp_output = layers.LayerNormalization(epsilon=1e-6)(mlp_output)
mlp_output = layers.Dense(32, activation="relu")(mlp_output)
mlp_output = layers.Dropout(0.1)(mlp_output)
mlp_output = layers.LayerNormalization(epsilon=1e-6)(mlp_output)

# Output layer
outputs = layers.Dense(10, activation="softmax")(mlp_output)  # Adjust for the number of classes

# Create the model
model = keras.Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Fit the model
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=32)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f"\nTest Accuracy: {test_accuracy * 100:.2f}%")

# Plot training history
import matplotlib.pyplot as plt

def plot_history(history):
    plt.figure(figsize=(12, 4))

    # Plot training & validation accuracy values
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Plot training & validation loss values
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.tight_layout()
    plt.show()

# Plot the training history
plot_history(history)

Epoch 1/5


ResourceExhaustedError: Graph execution error:

Detected at node model_2/multi_head_attention_9/einsum/Einsum defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1043, in launch_instance

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 736, in start

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\tornado\platform\asyncio.py", line 195, in start

  File "c:\Python311\Lib\asyncio\base_events.py", line 607, in run_forever

  File "c:\Python311\Lib\asyncio\base_events.py", line 1922, in _run_once

  File "c:\Python311\Lib\asyncio\events.py", line 80, in _run

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 505, in process_one

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 740, in execute_request

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\ipkernel.py", line 422, in do_execute

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\zmqshell.py", line 546, in run_cell

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3009, in run_cell

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3064, in _run_cell

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3269, in run_cell_async

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3448, in run_ast_nodes

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code

  File "C:\Users\jodyc\AppData\Local\Temp\ipykernel_17528\2298895296.py", line 48, in <module>

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1783, in fit

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1377, in train_function

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1360, in step_function

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1349, in run_step

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1126, in train_step

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 589, in __call__

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\base_layer.py", line 1149, in __call__

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\functional.py", line 515, in call

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\functional.py", line 672, in _run_internal_graph

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\base_layer.py", line 1149, in __call__

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\layers\attention\multi_head_attention.py", line 600, in call

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\layers\attention\multi_head_attention.py", line 532, in _compute_attention

OOM when allocating tensor with shape[32,8,10000,10000] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[{{node model_2/multi_head_attention_9/einsum/Einsum}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_15323]

In [34]:
# Assuming your data has shape (number_of_samples, sequence_length)
input_shape = X.shape[1:]

# One-hot encode labels
y_train_encoded = to_categorical(y_train)
y_test_encoded = to_categorical(y_test)

print("Shape of x_train:", x_train.shape)
print("Shape of x_test:", x_test.shape)


# Define the model
model = keras.Sequential()

# Embedding layer
model.add(layers.Embedding(input_dim=10000, output_dim=32, input_length=input_shape[0]))

# Convolutional layers
model.add(layers.Conv1D(32, 3, activation='relu'))
model.add(layers.MaxPooling1D(2))
model.add(layers.Conv1D(64, 3, activation='relu'))
model.add(layers.MaxPooling1D(2))

# Flatten layer
model.add(layers.Flatten())

# Dense layers
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))  # Adjust for your task

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Print unique values in x_train and x_test
print("Unique values in x_train:", np.unique(x_train))
print("Unique values in x_test:", np.unique(x_test))

# Fit the model
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=32)


# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f"\nTest Accuracy: {test_accuracy * 100:.2f}%")

Shape of x_train: (5953, 10000)
Shape of x_test: (1489, 10000)
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 10000, 32)         320000    
                                                                 
 conv1d_4 (Conv1D)           (None, 9998, 32)          3104      
                                                                 
 max_pooling1d_4 (MaxPoolin  (None, 4999, 32)          0         
 g1D)                                                            
                                                                 
 conv1d_5 (Conv1D)           (None, 4997, 64)          6208      
                                                                 
 max_pooling1d_5 (MaxPoolin  (None, 2498, 64)          0         
 g1D)                                                            
                                                         

InvalidArgumentError: Graph execution error:

Detected at node sequential_5/embedding_2/embedding_lookup defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1043, in launch_instance

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 736, in start

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\tornado\platform\asyncio.py", line 195, in start

  File "c:\Python311\Lib\asyncio\base_events.py", line 607, in run_forever

  File "c:\Python311\Lib\asyncio\base_events.py", line 1922, in _run_once

  File "c:\Python311\Lib\asyncio\events.py", line 80, in _run

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 505, in process_one

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 740, in execute_request

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\ipkernel.py", line 422, in do_execute

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\ipykernel\zmqshell.py", line 546, in run_cell

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3009, in run_cell

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3064, in _run_cell

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3269, in run_cell_async

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3448, in run_ast_nodes

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code

  File "C:\Users\jodyc\AppData\Local\Temp\ipykernel_17528\3188202023.py", line 42, in <module>

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1783, in fit

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1377, in train_function

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1360, in step_function

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1349, in run_step

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1126, in train_step

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 589, in __call__

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\base_layer.py", line 1149, in __call__

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\sequential.py", line 398, in call

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\functional.py", line 515, in call

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\functional.py", line 672, in _run_internal_graph

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\base_layer.py", line 1149, in __call__

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler

  File "C:\Users\jodyc\AppData\Roaming\Python\Python311\site-packages\keras\src\layers\core\embedding.py", line 272, in call

indices[13,4999] = -1 is not in [0, 10000)
	 [[{{node sequential_5/embedding_2/embedding_lookup}}]] [Op:__inference_train_function_11849]