### Installation

pip install numpy pandas scikit-learn joblib opensmile soundfile librosa tqdm jupyter

!pip install kagglehub

!pip install xgboost

### Imports

In [2]:
import kagglehub
import os, re, joblib, numpy as np, pandas as pd, soundfile as sf, librosa
from glob import glob
from tqdm import tqdm
import opensmile
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

### Data

In [3]:
RAVDESS_DIR = kagglehub.dataset_download("uwrfkaggler/ravdess-emotional-speech-audio")
print("Path to dataset files:", RAVDESS_DIR)

ROOT = kagglehub.dataset_download("ejlok1/cremad")
CREMAD_DIR = os.path.join(ROOT, "AudioWAV")
print("Path to dataset files:", CREMAD_DIR)

SAVEE_DIR = kagglehub.dataset_download("ejlok1/surrey-audiovisual-expressed-emotion-savee")
SAVEE_ALL_DIR = os.path.join(SAVEE_DIR, "ALL")
print("SAVEE:", SAVEE_ALL_DIR)


Path to dataset files: /home/129c/.cache/kagglehub/datasets/uwrfkaggler/ravdess-emotional-speech-audio/versions/1
Path to dataset files: /home/129c/.cache/kagglehub/datasets/ejlok1/cremad/versions/1/AudioWAV
SAVEE: /home/129c/.cache/kagglehub/datasets/ejlok1/surrey-audiovisual-expressed-emotion-savee/versions/1/ALL


In [4]:
SR = 16000
FEATURE_SET_NAME = "ComParE_2016"  
FEATURE_LEVEL_NAME = "Functionals" 
EMO7 = ["angry","disgust","fear","happy","sad","surprise","neutral"]
label_to_id = {e:i for i,e in enumerate(EMO7)}

np.random.seed(42)

feature_set = getattr(opensmile.FeatureSet, FEATURE_SET_NAME)
feature_level = getattr(opensmile.FeatureLevel, FEATURE_LEVEL_NAME)
smile = opensmile.Smile(feature_set=feature_set, feature_level=feature_level)


### Dataset mappers

In [4]:
def map_ravdess_label(path: str):
    # Filename pattern: MM-VO-EE-II-SS-XX-*.wav; 3rd number = emotion
    # 01 neutral, 02 calm -> neutral, 03 happy, 04 sad, 05 angry, 06 fearful, 07 disgust, 08 surprised
    bn = os.path.basename(path)
    m = re.match(r"(\d+)-(\d+)-(\d+)-", bn)
    if not m: return None
    emo_id = int(m.group(3))
    mapping = {
        1:"neutral", 2:"happy", 3:"happy", 4:"sad",
        5:"angry", 6:"fear", 7:"disgust", 8:"surprise"
    }
    return mapping.get(emo_id)

def actor_id_ravdess(path: str):
    # Directory name contains Actor_## (01..24)
    for part in path.replace("\\","/").split("/"):
        if part.startswith("Actor_"):
            return part.split("_")[-1]  # "01".."24"
    return None

def map_cremad_label(path: str):
    # e.g., 1001_DFA_ANG_XX.wav; third token is emotion code
    bn = os.path.basename(path)
    parts = bn.split('_')
    if len(parts) < 3: return None
    code = parts[2].upper()
    mapping = {
        "ANG":"angry",
        "DIS":"disgust",
        "FEA":"fear",
        "HAP":"happy",
        "NEU":"neutral",
        "SAD":"sad",
        # CREMA-D has no 'surprise'
    }
    return mapping.get(code)

def speaker_id_cremad(path: str):
    # first 4 chars are speaker ID, e.g., "1001"
    return os.path.basename(path)[:4]

def map_savee_label(path: str):
    # Filenames like: DC_a01.wav, JE_sa12.wav, JK_su03.wav ...
    bn = os.path.basename(path).lower()
    stem = bn.split(".")[0]        # e.g. "dc_a01"
    if "_" not in stem:
        return None
    code_part = stem.split("_")[1] # "a01", "sa12", "su03", "n05"

    # strip trailing digits to get emotion code
    emo_code = "".join(ch for ch in code_part if not ch.isdigit())

    mapping = {
        "a":  "angry",
        "d":  "disgust",
        "f":  "fear",
        "h":  "happy",
        "n":  "neutral",
        "sa": "sad",
        "su": "surprise",
    }
    return mapping.get(emo_code)

def speaker_id_savee(path: str):
    # DC_a01.wav -> "DC"; JE_n05.wav -> "JE"
    bn = os.path.basename(path)
    stem = bn.split(".")[0]
    return stem.split("_")[0].upper()  # "DC", "JE", "JK", "KL"


In [5]:
rows = []

# RAVDESS
rav_wavs = sorted(glob(os.path.join(RAVDESS_DIR, "**/*.wav"), recursive=True))
for p in rav_wavs:
    lab = map_ravdess_label(p)
    if lab in label_to_id:
        rows.append({
            "path": p,
            "label": lab,
            "label_id": label_to_id[lab],
            "dataset": "ravdess",
            "speaker": actor_id_ravdess(p),
        })

# CREMA-D 
cre_wavs = sorted(glob(os.path.join(CREMAD_DIR, "*.wav"), recursive=True))
for p in cre_wavs:
    lab = map_cremad_label(p)
    if lab in label_to_id:
        rows.append({
            "path": p,
            "label": lab,
            "label_id": label_to_id[lab],
            "dataset": "cremad",
            "speaker": speaker_id_cremad(p),
        })    



# SAVEE
savee_wavs = sorted(glob(os.path.join(SAVEE_ALL_DIR, "*.wav")))
for p in savee_wavs:
    lab = map_savee_label(p)
    if lab in label_to_id:
        rows.append({
            "path": p,
            "label": lab,
            "label_id": label_to_id[lab],
            "dataset": "savee",
            "speaker": speaker_id_savee(p),
        })

df = pd.DataFrame(rows)
print("Total files:", len(df))
print("\nBy dataset:\n", df["dataset"].value_counts())
print("\nBy label:\n", df["label"].value_counts())
df.tail()


Total files: 10802

By dataset:
 dataset
cremad     7442
ravdess    2880
savee       480
Name: count, dtype: int64

By label:
 label
happy       2099
sad         1715
angry       1715
fear        1715
disgust     1715
neutral     1399
surprise     444
Name: count, dtype: int64


Unnamed: 0,path,label,label_id,dataset,speaker
10797,/home/129c/.cache/kagglehub/datasets/ejlok1/su...,surprise,5,savee,KL
10798,/home/129c/.cache/kagglehub/datasets/ejlok1/su...,surprise,5,savee,KL
10799,/home/129c/.cache/kagglehub/datasets/ejlok1/su...,surprise,5,savee,KL
10800,/home/129c/.cache/kagglehub/datasets/ejlok1/su...,surprise,5,savee,KL
10801,/home/129c/.cache/kagglehub/datasets/ejlok1/su...,surprise,5,savee,KL


### Dataset Split

In [6]:
gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, val_idx = next(gss.split(df, df["label_id"].values, groups=df["speaker"].values))
df_tr = df.iloc[train_idx].reset_index(drop=True)
df_va = df.iloc[val_idx].reset_index(drop=True)

print("Train n:", len(df_tr), "Val n:", len(df_va))
print("Train speakers:", df_tr.speaker.nunique(), "Val speakers:", df_va.speaker.nunique())
print("Overlap speakers:", set(df_tr.speaker) & set(df_va.speaker))
print("\nVal distribution:\n", df_va["label"].value_counts())


Train n: 8651 Val n: 2151
Train speakers: 95 Val speakers: 24
Overlap speakers: set()

Val distribution:
 label
happy       408
sad         344
angry       344
fear        344
disgust     344
neutral     288
surprise     79
Name: count, dtype: int64


In [7]:
def load_resample(path, sr=SR):
    sig, sr0 = sf.read(path, dtype="float32", always_2d=False)
    if sig.ndim == 2: sig = sig.mean(axis=1)
    if sr0 != sr:
        sig = librosa.resample(sig, orig_sr=sr0, target_sr=sr)
    return np.nan_to_num(sig).astype(np.float32)

def extract_features(df_split, desc="extract"):
    X, y = [], []
    for _, r in tqdm(df_split.iterrows(), total=len(df_split), desc=desc):
        try:
            sig = load_resample(r["path"], SR)
            feats = smile.process_signal(sig, SR).to_numpy().ravel()
            X.append(feats); y.append(r["label_id"])
        except Exception as e:
            # print("skip", r["path"], e)  # uncomment for debugging
            pass
    return np.asarray(X, np.float32), np.asarray(y, np.int64)

X_tr, y_tr = extract_features(df_tr, desc="extract-train")
X_va, y_va = extract_features(df_va, desc="extract-val")

print("X_tr:", X_tr.shape, "  X_va:", X_va.shape)


extract-train: 100%|████████████████████████████████████████████████████████████████| 8651/8651 [06:53<00:00, 20.94it/s]
extract-val: 100%|██████████████████████████████████████████████████████████████████| 2151/2151 [01:39<00:00, 21.66it/s]

X_tr: (8651, 6373)   X_va: (2151, 6373)





In [8]:
from sklearn.svm import SVC

scaler = StandardScaler(with_mean=True, with_std=True)
X_tr_s = scaler.fit_transform(X_tr)
X_va_s = scaler.transform(X_va)

clf = SVC(
    kernel="linear",
    C=1.0,
    gamma="scale",
    class_weight="balanced",
    probability=True,
    random_state=42,
)

clf.fit(X_tr_s, y_tr)
yp = clf.predict(X_va_s)
print("Linear SVM report:\n", classification_report(y_va, yp, target_names=EMO7, zero_division=0))

best = clf
best_name = "SVC_Linear"
print("Selected model:", best_name)


Linear SVM report:
               precision    recall  f1-score   support

       angry       0.54      0.67      0.60       344
     disgust       0.51      0.55      0.53       344
        fear       0.46      0.40      0.43       344
       happy       0.52      0.57      0.54       408
         sad       0.46      0.44      0.45       344
    surprise       0.78      0.48      0.59        79
     neutral       0.59      0.46      0.52       288

    accuracy                           0.52      2151
   macro avg       0.55      0.51      0.52      2151
weighted avg       0.52      0.52      0.51      2151

Selected model: SVC_Linear


In [9]:
import joblib, os
bundle = {"scaler": scaler, "clf": clf, "emolabels": EMO7}
os.makedirs("backend/models", exist_ok=True)
joblib.dump(bundle, "backend/models/ser_clf.pkl")


['backend/models/ser_clf.pkl']