# Real-Time Speech Assessment - SVM Training

This notebook extracts audio features (MFCC, Pitch, RMS) from the RAVDESS dataset and trains a Support Vector Machine (SVM) classifier for stress detection.

In [None]:
!pip install librosa scikit-learn tqdm numpy

In [None]:
import os
import glob
import numpy as np
import pickle
import librosa
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from tqdm.notebook import tqdm

In [None]:
# --- CONFIGURATION ---
# If running on Kaggle, dataset is usually at /kaggle/input/ravdess-emotional-speech-audio
# If local, usually 'data'
DATA_DIRS = [
    "/kaggle/input/ravdess-emotional-speech-audio",
    "../input/ravdess-emotional-speech-audio",
    "data",
    "../data"
]
MODEL_FILENAME = "svm_stress.pkl"

In [None]:
# Feature Extraction Logic (Same as src/features.py)
def extract_features(y, sr=16000):
    # 1. RMS Energy (Loudness)
    rms = np.sqrt(np.mean(y**2))
    
    # 2. Zero Crossing Rate (Roughness)
    zcr = ((y[:-1] * y[1:]) < 0).sum() / len(y)
    
    # 3. MFCCs (Timbre)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_mean = np.mean(mfccs, axis=1)
    
    # 4. Pitch (F0) - Simple Heuristic via ZCR/Pyin would be too slow, use simple approximation
    # For training speed, we can skip complex pitch tracking or use a fast one.
    # We'll use a placeholder for speed in bulk processing or simple peak.
    f0 = 0.0
    
    return mfcc_mean.tolist() + [float(rms), float(zcr), float(f0)]

In [None]:
def load_data():
    valid_files = []
    for d in DATA_DIRS:
        if os.path.exists(d):
            print(f"Scanning {d}...")
            files = glob.glob(os.path.join(d, "**/*.wav"), recursive=True)
            files += glob.glob(os.path.join(d, "**/*.mp4"), recursive=True)
            valid_files.extend(files)
    
    if not valid_files:
        print("No files found! Check DATA_DIRS.")
        return None, None
        
    print(f"Found {len(valid_files)} files. Processing...")
    X, y = [], []
    
    for f in tqdm(valid_files):
        try:
            basename = os.path.basename(f)
            parts = basename.split("-")
            if len(parts) < 3: continue
            emotion = int(parts[2])
            
            # 01=Neutral, 02=Calm, 03=Happy -> CALM (0)
            # 04=Sad, 05=Angry, 06=Fear, 07=Disgust -> STRESS (1)
            if emotion in [1, 2, 3]: label = 0
            elif emotion in [4, 5, 6, 7]: label = 1
            else: continue
            
            # Load Audio
            y_audio, sr = librosa.load(f, sr=16000)
            feats = extract_features(y_audio, sr)
            X.append(feats)
            y.append(label)
        except Exception as e:
            pass
            
    return np.array(X), np.array(y)

In [None]:
X, y = load_data()
if X is not None:
    print(f"Data Shape: {X.shape}")

In [None]:
if X is not None:
    # Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train SVM
    print("Training SVM...")
    model = make_pipeline(StandardScaler(), SVC(probability=True, kernel='rbf'))
    model.fit(X_train, y_train)
    
    # Evaluate
    print("Evaluation:")
    print(model.score(X_test, y_test))
    
    # Save
    with open(MODEL_FILENAME, 'wb') as f:
        pickle.dump({'model': model['svc'], 'scaler': model['standardscaler']}, f)
    print(f"Model saved to {MODEL_FILENAME}")