Dataset Preprocessing

In [8]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Path to the RAVDESS dataset
dataset_path = 'RAVDESS'

# Emotions in RAVDESS
emotions = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)
    mfcc_scaled = np.mean(mfcc.T, axis=0)
    return mfcc_scaled

def load_data(dataset_path):
    x, y = [], []
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.lower().endswith('.wav'):
                file_path = os.path.join(root, file)
                parts = file.split("-")
                if len(parts) > 2:
                    emotion_key = parts[2]
                    if emotion_key in emotions:
                        emotion = emotions[emotion_key]
                        feature = extract_features(file_path)
                        x.append(feature)
                        y.append(emotion)
    return np.array(x), np.array(y)

# Load dataset
X, y = load_data(dataset_path)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.25, random_state=42)


Model Training

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

# Build model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    Dropout(0.5),
    LSTM(64),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(len(np.unique(y_encoded)), activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Reshape for LSTM layer
X_train_reshaped = np.expand_dims(X_train, axis=2)
X_test_reshaped = np.expand_dims(X_test, axis=2)

# Train
model.fit(X_train_reshaped, y_train, epochs=30, batch_size=64, validation_data=(X_test_reshaped, y_test))


  super().__init__(**kwargs)


Epoch 1/30
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - accuracy: 0.1589 - loss: 2.0505 - val_accuracy: 0.1944 - val_loss: 2.0111
Epoch 2/30
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.2030 - loss: 1.9997 - val_accuracy: 0.2014 - val_loss: 1.9521
Epoch 3/30
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.2175 - loss: 1.9486 - val_accuracy: 0.2042 - val_loss: 1.8891
Epoch 4/30
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.2405 - loss: 1.9042 - val_accuracy: 0.2264 - val_loss: 1.8778
Epoch 5/30
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.2495 - loss: 1.8686 - val_accuracy: 0.2306 - val_loss: 1.8578
Epoch 6/30
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.2666 - loss: 1.8620 - val_accuracy: 0.2528 - val_loss: 1.8369
Epoch 7/30
[1m34/34[0m [32m━━━━

<keras.src.callbacks.history.History at 0x16ffdb1a110>

Evaluation

In [10]:
test_loss, test_accuracy = model.evaluate(X_test_reshaped, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy}")

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.3961 - loss: 1.5539
Test Accuracy: 0.38749998807907104


Save the Model

In [11]:
# Save the model
model.save('emotion_recognition_model.h5')

# Print confirmation
print("Model saved successfully.")




Model saved successfully.
