In [None]:
# ==========================================
#   TASK 2: EMOTION RECOGNITION FROM SPEECH
#   Single Script Version (MFCC + CNN)
# ==========================================

import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# ----------- PATH TO DATASET -------------
DATA_PATH = "RAVDESS/"    # Change this to your dataset path

# ----------- HELPER: EXTRACT MFCC --------
def extract_mfcc(file_path, max_len=216):
    y, sr = librosa.load(file_path, duration=3, offset=0.5)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)

    # Pad/Trim MFCCs â†’ uniform size
    if mfcc.shape[1] < max_len:
        pad = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0,0),(0,pad)), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]

    return mfcc

# ----------- LOAD DATA + LABELS ----------
features = []
emotions = []

emotion_map = {
    "01": "neutral", "02": "calm", "03": "happy", "04": "sad",
    "05": "angry", "06": "fear", "07": "disgust", "08": "surprise"
}

for root, dirs, files in os.walk(DATA_PATH):
    for file in files:
        if file.endswith(".wav"):
            emotion_code = file.split("-")[2]   # RAVDESS naming
            emotion = emotion_map.get(emotion_code)

            path = os.path.join(root, file)
            mfcc = extract_mfcc(path)

            features.append(mfcc)
            emotions.append(emotion)

print("Total Samples:", len(features))

# ----------- PREPARE DATA ----------------
X = np.array(features).reshape(-1, 40, 216, 1)
y = np.array(emotions)

# Encode labels
le = LabelEncoder()
y = to_categorical(le.fit_transform(y))

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ----------- CNN MODEL -------------------
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(40,216,1)),
    MaxPooling2D((2,2)),
    Dropout(0.3),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(y.shape[1], activation='softmax')
])

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()

# ----------- TRAIN -----------------------
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_test, y_test)
)

# ----------- EVALUATE --------------------
loss, acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", acc)

# ----------- SAVE MODEL ------------------
model.save("emotion_model.h5")
print("Model Saved: emotion_model.h5")
