In [1]:
import os
import glob
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import soundfile as sf
from tqdm import tqdm
from collections import Counter

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score

import joblib

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input, LSTM, GRU
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping


In [2]:
def extract_feature(file_name, mfcc=True, chroma=False, mel=False):
    X, sample_rate = librosa.load(os.path.join(file_name), res_type='kaiser_fast')
     
    if chroma:
        stft = np.abs(librosa.stft(X))
    result = np.array([])
    if mfcc:
        mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40)
        mfccs_mean = np.mean(mfccs.T, axis=0)

        # Delta and Delta-Delta
        delta = librosa.feature.delta(mfccs)
        # delta2 = librosa.feature.delta(mfccs, order=2)
        delta_mean = np.mean(delta.T, axis=0)
        # delta2_mean = np.mean(delta2.T, axis=0)

        # Stack all MFCC-related
        mfcc_combined = np.hstack((mfccs_mean, delta_mean))
        result = np.hstack((result, mfccs_mean))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
        result = np.hstack((result, mel))

    # Simple spectral features
    # spec_centroid = np.mean(librosa.feature.spectral_centroid(y=X, sr=sample_rate).T, axis=0)
    # spec_rolloff = np.mean(librosa.feature.spectral_rolloff(y=X, sr=sample_rate).T, axis=0)
    # spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=X, sr=sample_rate).T, axis=0)
    # zcr = np.mean(librosa.feature.zero_crossing_rate(y=X).T, axis=0)

    
    return result


In [3]:
emotions = {
        '01': 'neutral',
        '02': 'calm',
        '03': 'happy',
        '04': 'sad',
        '05': 'angry',
        '06': 'fearful',
        '07': 'disgust',
        '08': 'surprised'
    }
observed_emotions = [
        'neutral',
        'calm',
        'happy',
        'sad',
        'angry',
        'fearful',
        'disgust',
        'surprised']

In [4]:
def load_data(test_size=0.2):
    x, y = [], []
    for file in glob.glob('audio/*/Actor_*/**/*.wav', recursive=True):
        file_name = os.path.basename(file)
        emotion = emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, train_size=0.80, random_state=9)


In [None]:
x_train, x_test, y_train, y_test= load_data(test_size=0.2)

In [None]:
print((x_train.shape[0], x_test.shape[0]))

In [None]:
print(x_train.shape[1])

In [None]:
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

In [None]:
model = MLPClassifier(alpha=0.01, batch_size = 265, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [None]:
model.fit(x_train, y_train_encoded)

In [None]:
y_pred=model.predict(x_test)

In [None]:
accuracy = accuracy_score(y_true=y_test_encoded, y_pred=y_pred)

print("Accuracy: {:.2f}%".format(accuracy * 100))

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test_encoded, y_pred))


In [None]:
matrix = confusion_matrix(y_test_encoded, y_pred)
print(matrix)

In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp', MLPClassifier(hidden_layer_sizes=(256, 128),
                          max_iter=700,
                          alpha=1e-5,
                          early_stopping=True,
                          validation_fraction=0.1,
                          learning_rate='adaptive',
                          random_state=42))
])

pipeline.fit(x_train, y_train_encoded)
print("Accuracy:", pipeline.score(x_test, y_test_encoded))


In [None]:
y_pred=pipeline.predict(x_test)

In [None]:
accuracy = accuracy_score(y_true=y_test_encoded, y_pred=y_pred)

print("Accuracy: {:.2f}%".format(accuracy * 100))

In [None]:
print(classification_report(y_test_encoded, y_pred))

In [None]:
pipeline2 = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp', MLPClassifier(
        hidden_layer_sizes=(512, 256),
        max_iter=1000,
        alpha=1e-5,
        learning_rate='adaptive',
        learning_rate_init=0.0005,
        early_stopping=True,
        validation_fraction=0.1,
        n_iter_no_change=15,
        random_state=42
    ))
])

pipeline2.fit(x_train, y_train_encoded)

y_pred = pipeline2.predict(x_test)

print("Accuracy:", accuracy_score(y_test_encoded, y_pred))
print("\nClassification Report:\n", classification_report(y_test_encoded, y_pred))

cm = confusion_matrix(y_test_encoded, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp', MLPClassifier(max_iter=1000, 
                          early_stopping=True, 
                          validation_fraction=0.1,
                          n_iter_no_change=15,
                          random_state=42))
])


param_grid = {
    'mlp__hidden_layer_sizes': [(256, 128), (512, 256)],
    'mlp__alpha': [1e-4, 1e-5],
    'mlp__learning_rate_init': [0.001, 0.0005]
}

grid = GridSearchCV(pipeline, param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
grid.fit(x_train, y_train_encoded)

print("Best Parameters:", grid.best_params_)
print("Best Training Accuracy:", grid.best_score_)

y_pred = grid.predict(x_test)
print("\nClassification Report:\n", classification_report(y_test_encoded, y_pred))

cm = confusion_matrix(y_test_encoded, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens')
plt.title("Confusion Matrix (Best MLP)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
train_df = pd.DataFrame(x_train)
train_df['label'] = y_train_encoded
test_df = pd.DataFrame(x_test)
test_df['label'] = y_test_encoded

# Drop low-performing emotion
emotion_to_drop = 3
train_df = train_df[train_df['label'] != emotion_to_drop]
test_df = test_df[test_df['label'] != emotion_to_drop]

# Recreate filtered splits
x_train_filtered = train_df.drop('label', axis=1).values
y_train_filtered = train_df['label'].values
x_test_filtered = test_df.drop('label', axis=1).values
y_test_filtered = test_df['label'].values

filtered_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp', MLPClassifier(
        hidden_layer_sizes=(512, 256, 128),     
        max_iter=1600,
        alpha=5e-6,                              
        learning_rate='adaptive',
        learning_rate_init=0.0007,
        early_stopping=True,
        validation_fraction=0.2,
        n_iter_no_change=30,                  
        random_state=42
    ))
])


filtered_pipeline.fit(x_train_filtered, y_train_filtered)

# Predict & Evaluate
y_pred_filtered = filtered_pipeline.predict(x_test_filtered)
acc = accuracy_score(y_test_filtered, y_pred_filtered)
f1 = f1_score(y_test_filtered, y_pred_filtered, average='weighted')  # or use 'macro'/'micro' as needed

print("\nFinal Accuracy after improvements:", acc)
print("\nFinal F1 Score:", f1)
print("\nFinal Classification Report:")
print(classification_report(y_test_filtered, y_pred_filtered))


cm = confusion_matrix(y_test_filtered, y_pred_filtered)
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens')
plt.title("Confusion Matrix (Best MLP)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
# Save the trained model
joblib.dump(filtered_pipeline, 'final_emotion_model.pkl')
print("Model saved as 'final_emotion_model.pkl'")
