# Speech Emotion Recognition
## LSTM MFCC Sequence
Goal: Train a Bidirectional LSTM + Conv1D model on MFCC sequence features to classify emotions from speech audio.

In [1]:
# 📁 File: notebooks/03_lstm_mfcc_sequence_v2.py

import os
import numpy as np
import librosa
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, Conv1D, MaxPooling1D, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
import joblib

# --------------------------
# Parameters
# --------------------------
DATA_DIR = "data"
MAX_LEN = 130
N_MFCC = 40

# --------------------------
# Label Map
# --------------------------
emotion_map = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
}

# --------------------------
# Helper Functions
# --------------------------
def get_emotion(filename):
    try:
        return emotion_map[filename.split('-')[2]]
    except IndexError:
        return None

# --------------------------
# Feature Extraction
# --------------------------
X, y = [], []

for actor in tqdm(os.listdir(DATA_DIR)):
    actor_path = os.path.join(DATA_DIR, actor)
    if not os.path.isdir(actor_path):
        continue

    for file in os.listdir(actor_path):
        if file.endswith(".wav"):
            emotion = get_emotion(file)
            if emotion is None:
                continue

            try:
                path = os.path.join(actor_path, file)
                y_audio, sr = librosa.load(path, duration=3, offset=0.5)
                if len(y_audio) < sr * 1:
                    continue

                # Extract MFCC + delta + delta-delta
                mfcc = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=N_MFCC)
                delta = librosa.feature.delta(mfcc)
                delta2 = librosa.feature.delta(mfcc, order=2)
                combined = np.vstack([mfcc, delta, delta2])  # shape: 120 x time

                if combined.shape[1] < MAX_LEN:
                    pad_width = MAX_LEN - combined.shape[1]
                    combined = np.pad(combined, pad_width=((0, 0), (0, pad_width)), mode='constant')
                else:
                    combined = combined[:, :MAX_LEN]

                X.append(combined.T)
                y.append(emotion)
            except Exception as e:
                print(f"⚠️ Skipped {file}: {str(e)}")
                continue

X = np.array(X)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_cat = to_categorical(y_encoded, num_classes=8)

# Save label encoder
joblib.dump(label_encoder, "streamlit_app/lstm_label_encoder.pkl")

# --------------------------
# Oversampling
# --------------------------
X_flat = X.reshape(X.shape[0], -1)
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X_flat, y_encoded)
X_resampled = X_resampled.reshape(-1, MAX_LEN, N_MFCC * 3)  # 120 features now
y_resampled_cat = to_categorical(y_resampled, num_classes=8)

# --------------------------
# Train-Test Split
# --------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled_cat, test_size=0.2, stratify=y_resampled, random_state=42
)

# --------------------------
# Build Model (BiLSTM + Conv1D)
# --------------------------
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(MAX_LEN, 120)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.3),
    LSTM(32),
    Dropout(0.3),

    Dense(64, activation='relu'),
    Dense(8, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# --------------------------
# Train Model
# --------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop]
)

# --------------------------
# Save Model
# --------------------------
model.save("streamlit_app/emotion_lstm_model.h5")
print("✅ Model saved as 'emotion_lstm_model.h5'")

100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [00:25<00:00,  1.05s/it]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 100ms/step - accuracy: 0.1593 - loss: 2.0295 - val_accuracy: 0.1396 - val_loss: 2.2449
Epoch 2/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 96ms/step - accuracy: 0.3093 - loss: 1.7909 - val_accuracy: 0.1916 - val_loss: 2.2168
Epoch 3/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 98ms/step - accuracy: 0.3760 - loss: 1.6662 - val_accuracy: 0.2695 - val_loss: 1.9409
Epoch 4/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 148ms/step - accuracy: 0.4276 - loss: 1.4973 - val_accuracy: 0.3799 - val_loss: 1.5987
Epoch 5/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 138ms/step - accuracy: 0.4200 - loss: 1.5037 - val_accuracy: 0.4091 - val_loss: 1.4977
Epoch 6/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 140ms/step - accuracy: 0.4967 - loss: 1.4161 - val_accuracy: 0.4545 - val_loss: 1.4696
Epoch 7/50
[1m39/39[0m [32m



✅ Model saved as 'emotion_lstm_model.h5'


## ✅ Summary

| Stage           | Status                 |
|------------------|------------------------|
| **Feature Shape** | `(samples, 130, 120)` |
| **Model Type**    | `Conv1D + BiLSTM`     |
| **Final Train Acc** | ~87%              |
| **Final Val Acc**   | ~70%              |
| **Saved Model**     | `emotion_lstm_model.h5` |
| **Label Encoder**   | `lstm_label_encoder.pkl` |
