In [1]:
import os
import glob
import time
import numpy as np
import pandas as pd
import librosa
import matplotlib.pyplot as plt
import seaborn as sn

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.feature_selection import SelectKBest, f_classif


In [2]:
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fear',
  '07':'disgust',
  '08':'surprised'
}

observed_emotions = [
        'neutral',
        'calm',
        'happy',
        'sad',
        'angry',
        'fear',
        'disgust',
        'surprised']

In [3]:
def extract_feature(file_name, mfcc, chroma, mel):
    X, sample_rate = librosa.load(os.path.join(file_name), res_type='kaiser_fast')
    result = np.array([]) 
    if chroma:
        stft = np.abs(librosa.stft(X))
    result = np.array([])
    if mfcc:
        mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40)
        mfccs_mean = np.mean(mfccs.T, axis=0)

        # delta = librosa.feature.delta(mfccs)
        # delta_mean = np.mean(delta.T, axis=0)

        result = np.hstack((result, mfccs_mean))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
        result = np.hstack((result, mel))
    return result

In [4]:
def load_data(test_size=0.2): 
    x, y = [], []

    paths = [
        "audio/songs/Actor_*/**/*.wav",
        "audio/speech/Actor_*/**/*.wav"
    ]

    for path in paths:
        for file in glob.glob(path, recursive=True):
            file_name = os.path.basename(file)
            emotion_code = file_name.split("-")[2]
            emotion = emotions.get(emotion_code)
            if emotion not in observed_emotions:
                continue
            feature = extract_feature(file,mfcc=True, chroma=True, mel=True)
            x.append(feature)
            y.append(emotion)
    
    return {"X": x, "y": y}

In [None]:
start_time = time.time()
Trial_dict = load_data(test_size=0.2)
print("--- Data loaded. Loading time: %s seconds ---" % (time.time() - start_time))


In [None]:
X = pd.DataFrame(Trial_dict["X"])
y = pd.DataFrame(Trial_dict["y"])

In [None]:
data = pd.concat([X, y], axis=1)
data = data.reindex(np.random.permutation(data.index))

In [None]:
# Save to CSV
data.to_csv("RAVDESS_MFCC_Observed.csv")

In [None]:
start_time = time.time()
data = pd.read_csv('RAVDESS_MFCC_Observed.csv')
print("Data loaded in {:.2f} seconds".format(time.time() - start_time))

In [None]:
if 'Unnamed: 0' in data.columns:
    data = data.drop('Unnamed: 0', axis=1)

In [None]:
data.head()

In [None]:
print(data.columns)


In [None]:
# Rename the last column to 'emotion'
data.rename(columns={data.columns[-1]: 'emotion'}, inplace=True)

X = data.drop('emotion', axis=1).values
y = data['emotion'].values


In [None]:
print(data.head())
print(data.columns.tolist())


In [None]:
print("X shape:", X.shape)
print("Unique labels:", np.unique(y))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
print("\nTraining SVM without scaling...")
svm_unscaled = SVC(kernel='linear')
start = time.time()
svm_unscaled.fit(X_train, y_train)
print("Trained in {:.2f} seconds".format(time.time() - start))

In [None]:
y_pred_unscaled = svm_unscaled.predict(X_test)
print("\nClassification Report (Unscaled):\n", classification_report(y_test, y_pred_unscaled))
print("Accuracy (Unscaled): {:.2f}%".format(accuracy_score(y_test, y_pred_unscaled) * 100))


cm_unscaled = confusion_matrix(y_test, y_pred_unscaled)
sn.heatmap(pd.DataFrame(cm_unscaled), annot=True, fmt='d')
plt.title("Confusion Matrix (Unscaled)")
plt.show()

In [None]:
print("\nTraining SVM with scaling (Pipeline)...")
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel='linear'))
])
pipeline.fit(X_train, y_train)

In [None]:
y_pred_scaled = pipeline.predict(X_test)
print("\nClassification Report (Scaled):\n", classification_report(y_test, y_pred_scaled))
print("Accuracy (Scaled): {:.2f}%".format(accuracy_score(y_test, y_pred_scaled) * 100))



In [None]:
cm_scaled = confusion_matrix(y_test, y_pred_scaled)
sn.heatmap(pd.DataFrame(cm_scaled), annot=True, fmt='d')
plt.title("Confusion Matrix (Scaled)")
plt.show()

In [None]:
print("\nCross-validation accuracy scores (5-fold):")
cv_scores = cross_val_score(pipeline, X, y, cv=5)
print(cv_scores)
print("Mean CV accuracy: {:.2f}%".format(np.mean(cv_scores) * 100))

In [None]:
train_acc = pipeline.score(X_train, y_train) * 100
test_acc = pipeline.score(X_test, y_test) * 100
print("Train Accuracy (Scaled): {:.2f}%".format(train_acc))
print("Test Accuracy (Scaled): {:.2f}%".format(test_acc))

In [None]:
pipeline = Pipeline([
    # ('select', SelectKBest(score_func=f_classif, k=50)),
    ('scaler', StandardScaler()),
    ('svm', SVC())
])

param_grid = {
    'svm__kernel': ['linear', 'rbf'],
    'svm__C': [0.1, 1, 10],
    'svm__gamma': ['scale', 'auto']
}

grid = GridSearchCV(pipeline, param_grid, cv=5, verbose=1, n_jobs=-1)
grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)
print("Best Cross-Validation Accuracy: {:.2f}%".format(grid.best_score_ * 100))


In [None]:
y_pred = grid.predict(X_test)

print(classification_report(y_test, y_pred))
print("Test Accuracy: {:.2f}%".format(accuracy_score(y_test, y_pred) * 100))
