# Training Baseline Notebook

This notebook implements **Mental State Detection from Speech**.

It includes:
- Data loading & augmentation
- Feature extraction (MFCC, spectral, pitch, ZCR, RMS)
- Baseline models (Random Forest, MLP)
- Evaluation (Confusion Matrix, ROC, F1-score)

---

## 1. Setup & Imports

In [None]:
import os, random, warnings
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from tqdm import tqdm
import librosa, soundfile as sf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
np.random.seed(42)
random.seed(42)

## 2. Data Augmentation

In [None]:
def augment_audio(y, sr):
    fn = random.choice(['pitch', 'stretch', 'noise', 'shift'])
    if fn == 'pitch':
        n_steps = random.uniform(-3, 3)
        return librosa.effects.pitch_shift(y, sr, n_steps=n_steps)
    elif fn == 'stretch':
        rate = random.uniform(0.85, 1.15)
        return librosa.effects.time_stretch(y, rate)
    elif fn == 'noise':
        noise_amp = 0.005 * np.random.uniform() * np.max(np.abs(y))
        return y + noise_amp * np.random.normal(size=y.shape[0])
    elif fn == 'shift':
        shift = int(np.random.uniform(-0.1, 0.1) * sr)
        return np.roll(y, shift)
    return y

## 3. Feature Extraction

In [None]:
def extract_features(y, sr=16000, n_mfcc=13):
    feats = []
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    feats.extend(np.mean(mfcc, axis=1))
    feats.extend(np.std(mfcc, axis=1))
    feats.append(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)))
    feats.append(np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)))
    feats.append(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)))
    feats.append(np.mean(librosa.feature.zero_crossing_rate(y)))
    feats.append(np.mean(librosa.feature.rms(y=y)))
    return np.array(feats)

## 4. Load Dataset & Preprocess

In [None]:
DATA_PATH = "data/processed"  # Adjust to your dataset

X, y = [], []
for root, dirs, files in os.walk(DATA_PATH):
    label = os.path.basename(root)
    for f in files:
        if f.endswith(".wav"):
            try:
                y_audio, sr = librosa.load(os.path.join(root, f), sr=16000)
                feats = extract_features(y_audio, sr)
                X.append(feats)
                y.append(label)
            except:
                continue

X = np.array(X)
y = LabelEncoder().fit_transform(y)
print("Dataset shape:", X.shape)

## 5. Train/Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 6. Baseline Models

In [None]:
# Random Forest
rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# MLP
mlp = MLPClassifier(hidden_layer_sizes=(128,64), max_iter=500, random_state=42)
mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict(X_test)

## 7. Evaluation

In [None]:
def evaluate(y_true, y_pred, model_name):
    print(f"\nModel: {model_name}\n")
    print(classification_report(y_true, y_pred))
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title(f"Confusion Matrix - {model_name}")
    plt.show()

evaluate(y_test, y_pred_rf, "Random Forest")
evaluate(y_test, y_pred_mlp, "MLP")