In [3]:
import os
import numpy as np
import librosa
import optuna
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping

# --------- Load Audio Files ---------
def load_audio_files_ravdess(dataset_path):
    audio_files = []
    labels = []
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(".wav"):
                emotion_code = int(file.split("-")[2])
                audio_files.append(os.path.join(root, file))
                labels.append(emotion_code)
    return audio_files, labels

# --------- Map Emotion Labels ---------
def map_emotions_ravdess(labels):
    emotion_dict = {
        1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad',
        5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'
    }
    return [emotion_dict[label] for label in labels]

# --------- Extract MFCC + ZCR + RMS ---------
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    zcr = librosa.feature.zero_crossing_rate(y)
    rms = librosa.feature.rms(y=y)

    # Average pooling
    mfccs_mean = np.mean(mfccs, axis=1)
    zcr_mean = np.mean(zcr)
    rms_mean = np.mean(rms)

    return np.concatenate((mfccs_mean, [zcr_mean, rms_mean]))

# --------- Load Dataset and Extract Features ---------
dataset_path = "C:/Users/samhi/OneDrive/문서/College/s6/Speech Processing/Endsem/Final codes/ravdees"

audio_files, labels = load_audio_files_ravdess(dataset_path)
labels_mapped = map_emotions_ravdess(labels)

X = []
y_clean = []
failed = []

for file, label in zip(audio_files, labels_mapped):
    try:
        features = extract_features(file)
        X.append(features)
        y_clean.append(label)
    except Exception as e:
        failed.append((file, str(e)))

print(f"Extracted features from {len(X)} files.")
print(f"Skipped {len(failed)} files.")

# --------- Preprocess ---------
X = np.array(X)
y = LabelEncoder().fit_transform(y_clean)

X = StandardScaler().fit_transform(X)
X = X.reshape((X.shape[0], X.shape[1], 1))  # Reshape for LSTM

# --------- Split ---------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --------- Optuna Objective ---------
def objective(trial):
    lstm_units = trial.suggest_int("lstm_units", 64, 256)
    dropout_rate = trial.suggest_float("dropout_rate", 0.2, 0.5)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])

    model = Sequential([
        LSTM(units=lstm_units, input_shape=(X_train.shape[1], 1), return_sequences=False),
        Dropout(dropout_rate),
        Dense(128, activation='relu'),
        Dropout(dropout_rate),
        Dense(len(np.unique(y)), activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(X_train, y_train,
              validation_split=0.2, epochs=20, batch_size=batch_size,
              callbacks=[early_stop], verbose=0)

    _, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy

# --------- Run Optuna ---------
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

best_params = study.best_params
print("Best Parameters:", best_params)

# --------- Final Model ---------
final_model = Sequential([
    LSTM(units=best_params['lstm_units'], input_shape=(X_train.shape[1], 1), return_sequences=False),
    Dropout(best_params['dropout_rate']),
    Dense(128, activation='relu'),
    Dropout(best_params['dropout_rate']),
    Dense(len(np.unique(y)), activation='softmax')
])

final_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

final_model.fit(X_train, y_train,
                validation_data=(X_test, y_test),
                epochs=50, batch_size=best_params['batch_size'],
                callbacks=[early_stop])

# --------- Evaluation ---------
eval_result = final_model.evaluate(X_test, y_test)
y_pred = np.argmax(final_model.predict(X_test), axis=1)
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Final Test Loss: {eval_result[0]:.4f}")
print(f"Final Test Accuracy: {eval_result[1]*100:.2f}%")
print(f"Final Weighted F1 Score: {f1:.4f}")
print(classification_report(y_test, y_pred))





[I 2025-04-05 23:40:56,810] A new study created in memory with name: no-name-7380421a-b2a1-4b20-8542-a65de40f5a7b


Extracted features from 2880 files.
Skipped 0 files.






[I 2025-04-05 23:42:23,156] Trial 0 finished with value: 0.4270833432674408 and parameters: {'lstm_units': 254, 'dropout_rate': 0.45495634370246135, 'batch_size': 32}. Best is trial 0 with value: 0.4270833432674408.
[I 2025-04-05 23:43:09,977] Trial 1 finished with value: 0.4357638955116272 and parameters: {'lstm_units': 156, 'dropout_rate': 0.20971218622363244, 'batch_size': 64}. Best is trial 1 with value: 0.4357638955116272.
[I 2025-04-05 23:43:53,742] Trial 2 finished with value: 0.3958333432674408 and parameters: {'lstm_units': 116, 'dropout_rate': 0.49489214744624166, 'batch_size': 64}. Best is trial 1 with value: 0.4357638955116272.
[I 2025-04-05 23:45:44,643] Trial 3 finished with value: 0.4947916567325592 and parameters: {'lstm_units': 219, 'dropout_rate': 0.3545965221023448, 'batch_size': 16}. Best is trial 3 with value: 0.4947916567325592.
[I 2025-04-05 23:47:09,111] Trial 4 finished with value: 0.5208333134651184 and parameters: {'lstm_units': 201, 'dropout_rate': 0.2336306

Best Parameters: {'lstm_units': 194, 'dropout_rate': 0.2065420143531594, 'batch_size': 16}
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Final Test Loss: 0.5964
Final Test Accuracy: 85.76%
Final Weighted F1 Score: 0.8570
              precision    recall  f1-score   support

           0       0.96      0.85      0.90        79
           1       0.84      0.97      0.90        69
           2       0.85      0.86      0.85        84
           3       0.86      0.79      0.82        80
           4       0.79      0.85  