In [13]:
import os
import numpy as np
import pandas as pd
import librosa
import opensmile
import xgboost as xgb
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import optuna

In [17]:
# Load Data for RAVDESS (recursive)
def load_audio_files_ravdess(dataset_path):
    audio_files = []
    labels = []
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(".wav"):
                emotion_code = int(file.split("-")[2])  # Extract emotion from filename
                labels.append(emotion_code)
                audio_files.append(os.path.join(root, file))
    print(f"Found {len(audio_files)} audio files in total.")
    return audio_files, labels

In [18]:
# Emotion Mapping for RAVDESS
def map_emotions_ravdess(labels):
    emotion_dict = {
        1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad',
        5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'
    }
    return [emotion_dict[label] for label in labels]

# Feature Extraction using OpenSMILE
def extract_features(file_path):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.ComParE_2016,
        feature_level=opensmile.FeatureLevel.Functionals
    )
    features = smile.process_file(file_path)
    return features.values.flatten()

# Feature Selection using XGBoost
def feature_selection(X, y):
    xgb_model = xgb.XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='logloss')
    xgb_model.fit(X, y)
    feature_importances = xgb_model.feature_importances_
    top_features = np.argsort(feature_importances)[-50:]
    return X[:, top_features]

# --------- Load and Process RAVDESS Dataset ---------
dataset_path = "C:/Users/samhi/OneDrive/문서/College/s6/Speech Processing/Endsem/Final codes/ravdees"  # Update if needed

audio_files, labels = load_audio_files_ravdess(dataset_path)
labels_mapped = map_emotions_ravdess(labels)

Found 2880 audio files in total.


In [4]:
# Robust Feature Extraction
X = []
y_clean = []
failed_files = []

for file, label in tqdm(zip(audio_files, labels_mapped), total=len(audio_files)):
    try:
        features = extract_features(file)
        if features.size == 0:
            raise ValueError("Empty feature vector")
        X.append(features)
        y_clean.append(label)
    except Exception as e:
        failed_files.append((file, str(e)))

print(f"Extracted features from {len(X)} files.")
print(f"Skipped {len(failed_files)} files due to errors.")

100%|██████████| 2880/2880 [14:20<00:00,  3.35it/s]

Extracted features from 2880 files.
Skipped 0 files due to errors.





In [20]:
# --------- Preprocess Features ---------
X = np.array(X)
if X.ndim == 3:
    X = X.squeeze(axis=1)

y = LabelEncoder().fit_transform(y_clean)
X = StandardScaler().fit_transform(X)
X_selected = feature_selection(X, y)

# --------- Train-Test Split ---------
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# --------- Optuna Objective for LSTM ---------
def objective(trial):
    lstm_units = trial.suggest_int('lstm_units', 64, 256)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])

    model = Sequential([
        LSTM(units=lstm_units, input_shape=(X_train.shape[1], 1), return_sequences=False),
        Dropout(dropout_rate),
        Dense(128, activation='relu'),
        Dropout(dropout_rate),
        Dense(len(np.unique(y)), activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(X_train.reshape(-1, X_train.shape[1], 1), y_train,
              validation_split=0.2, epochs=20, batch_size=batch_size,
              callbacks=[early_stop], verbose=0)

    _, accuracy = model.evaluate(X_test.reshape(-1, X_test.shape[1], 1), y_test, verbose=0)
    return accuracy

# --------- Run Optuna Study ---------
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# --------- Train Final Model ---------
best_params = study.best_params
print("Best Parameters:", best_params)

final_model = Sequential([
    LSTM(units=best_params['lstm_units'], input_shape=(X_train.shape[1], 1), return_sequences=False),
    Dropout(best_params['dropout_rate']),
    Dense(128, activation='relu'),
    Dropout(best_params['dropout_rate']),
    Dense(len(np.unique(y)), activation='softmax')
])

final_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

final_model.fit(X_train.reshape(-1, X_train.shape[1], 1), y_train,
                validation_data=(X_test.reshape(-1, X_test.shape[1], 1), y_test),
                epochs=50, batch_size=best_params['batch_size'],
                callbacks=[early_stop])

# --------- Evaluation ---------
eval_result = final_model.evaluate(X_test.reshape(-1, X_test.shape[1], 1), y_test)
y_pred = np.argmax(final_model.predict(X_test.reshape(-1, X_test.shape[1], 1)), axis=1)
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Final Test Loss: {eval_result[0]:.4f}")
print(f"Final Test Accuracy: {eval_result[1]*100:.2f}%")
print(f"Final Weighted F1 Score: {f1:.4f}")
print(classification_report(y_test, y_pred))

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-04-05 23:22:26,088] A new study created in memory with name: no-name-4e8a0cce-f4b5-4c2f-b9ea-fbaa63425ec0
[I 2025-04-05 23:23:02,167] Trial 0 finished with value: 0.390625 and parameters: {'lstm_units': 74, 'dropout_rate': 0.37880759152782395, 'batch_size': 32}. Best is trial 0 with value: 0.390625.
[I 2025-04-05 23:24:22,407] Trial 1 finished with value: 0.4583333432674408 and parameters: {'lstm_units': 155, 'dropout_rate': 0.23073275779485444, 'batch_size': 16}. Best is trial 1 with value: 0.4583333432674408.
[I 2025-04-05 23:25:21,310] Trial 2 finished with value: 0.3993055522441864 and parameters: {'lstm_units': 243, 'dropout_rate': 0.2118050107164003, 'batch_size': 64}. Best is trial 1 with value: 0.4583333432674408.
[I 2025-04-05 23:25:58,187] Trial 3 finished with value: 0.4166666567325592 and parameters: {'lstm_units': 165, 'dropout_rate': 0.3325164209765772, 'batch_size': 64}

Best Parameters: {'lstm_units': 253, 'dropout_rate': 0.3925523579584986, 'batch_size': 16}
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Final Test Loss: 0.7058
Final Test Accuracy: 80.73%
Final Weighted F1 Score: 0.8086
              precision    recall  f1-score   support

           0       0.81      0.81      0.81        79
           1       0.83      0.78      0.81        69
           2       0.88      0.81      0.84        84
           3       0.82      0.88      0.