In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib  # For saving and loading the label encoder
import random

# Set seeds for reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)

# Function to load and preprocess audio data
def load_data(dataset_path):
    features = []  # List to store feature vectors
    labels = []    # List to store corresponding labels
    for emotion in os.listdir(dataset_path):
        emotion_path = os.path.join(dataset_path, emotion)
        if os.path.isdir(emotion_path):
            for file in os.listdir(emotion_path):
                if file.endswith('.wav'):
                    file_path = os.path.join(emotion_path, file)
                    audio, sr = librosa.load(file_path, sr=None)  # Load audio file
                    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=30)  # Extract MFCC features
                    mfcc_scaled = np.mean(mfcc.T, axis=0)  # Average MFCC coefficients
                    features.append(mfcc_scaled)
                    labels.append(emotion)
    return np.array(features), np.array(labels)

# Define the path to your dataset
dataset_path = r'/content/drive/My Drive/FINAL_CODE_EMO_R_A ORIGINAL - Copy/DATASET/Tess'

# Load the dataset
X, y = load_data(dataset_path)

# Encode the labels into numerical format
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=SEED
)

# Feature Standardization
X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test, axis=0)

# Reshape the data for CNN and RNN
X_train_rnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)  # For RNN
X_test_rnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)      # For RNN
X_train_cnn = X_train_rnn  # Same reshaping for CNN
X_test_cnn = X_test_rnn

# -------------------------- CNN Model -------------------------- #
cnn_model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(filters=16, kernel_size=3, activation='relu',
                           kernel_regularizer=tf.keras.regularizers.l2(0.01), input_shape=(X_train_cnn.shape[1], 1)),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu',
                           kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.5),  # Moderate dropout
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile CNN Model
cnn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Train CNN Model
cnn_model.fit(X_train_cnn, y_train, epochs=25, batch_size=32, validation_data=(X_test_cnn, y_test))

# Save CNN Model
cnn_model.save('emotion_detection_cnn_93.93.h5')
print('CNN model saved to disk.')

# -------------------------- RNN Model -------------------------- #
rnn_model = tf.keras.Sequential([
    tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(X_train_rnn.shape[1], 1)),
    tf.keras.layers.LSTM(128, return_sequences=False),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile RNN Model
rnn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Train RNN Model
rnn_model.fit(X_train_rnn, y_train, epochs=35, batch_size=64, validation_data=(X_test_rnn, y_test))

# Save RNN Model
rnn_model.save('emotion_detection_rnn_96.96.h5')
print('RNN model saved to disk.')

# -------------------------- Evaluate Models -------------------------- #

# Evaluate CNN
y_pred_cnn = np.argmax(cnn_model.predict(X_test_cnn), axis=1)
cnn_accuracy = accuracy_score(y_test, y_pred_cnn)
print(f'CNN Accuracy: {cnn_accuracy * 100:.2f}%')
print("\nCNN Classification Report:")
print(classification_report(y_test, y_pred_cnn, target_names=label_encoder.classes_))

# Evaluate RNN
y_pred_rnn = np.argmax(rnn_model.predict(X_test_rnn), axis=1)
rnn_accuracy = accuracy_score(y_test, y_pred_rnn)
print(f'RNN Accuracy: {rnn_accuracy * 100:.2f}%')
print("\nRNN Classification Report:")
print(classification_report(y_test, y_pred_rnn, target_names=label_encoder.classes_))


Epoch 1/25


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.3432 - loss: 2.5229 - val_accuracy: 0.6387 - val_loss: 2.4217
Epoch 2/25
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4169 - loss: 2.4390 - val_accuracy: 0.8487 - val_loss: 2.3405
Epoch 3/25
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5720 - loss: 2.3443 - val_accuracy: 0.9076 - val_loss: 2.2627
Epoch 4/25
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6714 - loss: 2.2739 - val_accuracy: 0.9328 - val_loss: 2.1879
Epoch 5/25
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6929 - loss: 2.1968 - val_accuracy: 0.9412 - val_loss: 2.1127
Epoch 6/25
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7404 - loss: 2.1342 - val_accuracy: 0.9496 - val_loss: 2.0372
Epoch 7/25
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━



CNN model saved to disk.
Epoch 1/35


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 268ms/step - accuracy: 0.4552 - loss: 1.3690 - val_accuracy: 0.7479 - val_loss: 1.3403
Epoch 2/35
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 198ms/step - accuracy: 0.7414 - loss: 1.3371 - val_accuracy: 0.8235 - val_loss: 1.3022
Epoch 3/35
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 112ms/step - accuracy: 0.7743 - loss: 1.2992 - val_accuracy: 0.8067 - val_loss: 1.2568
Epoch 4/35
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 104ms/step - accuracy: 0.7969 - loss: 1.2564 - val_accuracy: 0.8151 - val_loss: 1.1996
Epoch 5/35
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 102ms/step - accuracy: 0.8006 - loss: 1.1969 - val_accuracy: 0.8067 - val_loss: 1.1251
Epoch 6/35
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 102ms/step - accuracy: 0.7752 - loss: 1.1243 - val_accuracy: 0.7899 - val_loss: 1.0307
Epoch 7/35
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0



RNN model saved to disk.
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
CNN Accuracy: 94.96%

CNN Classification Report:
              precision    recall  f1-score   support

    OAF_Fear       1.00      1.00      1.00        45
   OAF_angry       0.97      1.00      0.99        36
 OAF_disgust       0.00      0.00      0.00         5
     YAF_sad       0.86      0.97      0.91        33

    accuracy                           0.95       119
   macro avg       0.71      0.74      0.73       119
weighted avg       0.91      0.95      0.93       119



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 171ms/step
RNN Accuracy: 96.64%

RNN Classification Report:
              precision    recall  f1-score   support

    OAF_Fear       1.00      0.98      0.99        45
   OAF_angry       0.95      1.00      0.97        36
 OAF_disgust       1.00      0.60      0.75         5
     YAF_sad       0.94      0.97      0.96        33

    accuracy                           0.97       119
   macro avg       0.97      0.89      0.92       119
weighted avg       0.97      0.97      0.96       119

