In [3]:
import os
import numpy as np
import opensmile
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, accuracy_score, f1_score

# ------------------- Load SAVEE -------------------
def load_savee_audio(dataset_path):
    audio_files = []
    labels = []
    label_map = {
        'a': 'angry', 'd': 'disgust', 'f': 'fear', 'h': 'happy',
        'n': 'neutral', 'sa': 'sad', 'su': 'surprise'
    }

    for file in os.listdir(dataset_path):
        if file.endswith(".wav"):
            parts = file.split('_')
            emotion_code = parts[1][:2] if parts[1][:2] in label_map else parts[1][0]
            if emotion_code in label_map:
                emotion = label_map[emotion_code]
                audio_files.append(os.path.join(dataset_path, file))
                labels.append(emotion)

    print(f"Loaded {len(audio_files)} audio files from SAVEE.")
    return audio_files, labels

# OpenSMILE Feature Extraction
def extract_opensmile_features(file_path):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.ComParE_2016,
        feature_level=opensmile.FeatureLevel.Functionals
    )
    features = smile.process_file(file_path)
    return features.values.flatten()

# ------------------- Load & Extract Features -------------------
dataset_path = "C:/Users/samhi/OneDrive/문서/College/s6/Speech Processing/Endsem/archive/ALL"
audio_files, labels = load_savee_audio(dataset_path)

X, y_clean = [], []
for file, label in tqdm(zip(audio_files, labels), total=len(audio_files)):
    try:
        features = extract_opensmile_features(file)
        if features.size > 0:
            X.append(features)
            y_clean.append(label)
    except Exception as e:
        print(f"Error processing {file}: {e}")

X = np.array(X)
y = LabelEncoder().fit_transform(y_clean)
X = StandardScaler().fit_transform(X)

# ------------------- Cross-validation Setup -------------------
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accuracies = []
f1_scores = []

for fold, (train_idx, test_idx) in enumerate(kf.split(X, y), 1):
    print(f"\n--- Fold {fold} ---")
    
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Reshape for LSTM: (samples, timesteps, features)
    X_train_lstm = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
    X_test_lstm = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

    model = Sequential([
        LSTM(64, input_shape=(1, X.shape[1]), return_sequences=False),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(len(np.unique(y)), activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train_lstm, y_train, epochs=40, batch_size=16,
              validation_split=0.2, callbacks=[es], verbose=0)

    y_pred = np.argmax(model.predict(X_test_lstm), axis=1)

    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')

    print(f"Fold Accuracy: {acc:.4f}, Macro F1: {f1:.4f}")
    print(classification_report(y_test, y_pred))

    accuracies.append(acc)
    f1_scores.append(f1)

# ------------------- Final Summary -------------------
print("\n=== Cross-Validation Summary ===")
print(f"Mean Accuracy: {np.mean(accuracies)*100:.2f}%")
print(f"Mean Macro F1 Score: {np.mean(f1_scores):.4f}")


Loaded 480 audio files from SAVEE.


  0%|          | 0/480 [00:00<?, ?it/s]

100%|██████████| 480/480 [01:39<00:00,  4.80it/s]



--- Fold 1 ---
Fold Accuracy: 0.4167, Macro F1: 0.2996
              precision    recall  f1-score   support

           0       0.27      0.58      0.37        12
           1       0.33      0.17      0.22        12
           2       0.00      0.00      0.00        12
           3       0.38      0.25      0.30        12
           4       0.51      0.83      0.63        24
           5       0.00      0.00      0.00        12
           6       0.50      0.67      0.57        12

    accuracy                           0.42        96
   macro avg       0.28      0.36      0.30        96
weighted avg       0.31      0.42      0.34        96


--- Fold 2 ---


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold Accuracy: 0.3958, Macro F1: 0.3231
              precision    recall  f1-score   support

           0       0.21      0.50      0.30        12
           1       0.20      0.17      0.18        12
           2       0.50      0.25      0.33        12
           3       0.20      0.08      0.12        12
           4       0.65      0.83      0.73        24
           5       0.43      0.25      0.32        12
           6       0.33      0.25      0.29        12

    accuracy                           0.40        96
   macro avg       0.36      0.33      0.32        96
weighted avg       0.40      0.40      0.37        96


--- Fold 3 ---
Fold Accuracy: 0.3750, Macro F1: 0.2627
              precision    recall  f1-score   support

           0       0.24      0.42      0.30        12
           1       0.00      0.00      0.00        12
           2       0.20      0.25      0.22        12
           3       0.25      0.25      0.25        12
           4       0.56      0.92   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold Accuracy: 0.4375, Macro F1: 0.4134
              precision    recall  f1-score   support

           0       0.36      0.42      0.38        12
           1       0.21      0.33      0.26        12
           2       0.75      0.25      0.38        12
           3       0.50      0.33      0.40        12
           4       0.60      0.62      0.61        24
           5       0.40      0.33      0.36        12
           6       0.44      0.58      0.50        12

    accuracy                           0.44        96
   macro avg       0.47      0.41      0.41        96
weighted avg       0.48      0.44      0.44        96


=== Cross-Validation Summary ===
Mean Accuracy: 40.83%
Mean Macro F1 Score: 0.3238
