In [2]:
import os
import numpy as np
import opensmile
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
import optuna

# ------------------- Load SAVEE -------------------
def load_savee_audio(dataset_path):
    audio_files = []
    labels = []

    label_map = {
        'a': 'angry', 'd': 'disgust', 'f': 'fear', 'h': 'happy',
        'n': 'neutral', 'sa': 'sad', 'su': 'surprise'
    }

    for file in os.listdir(dataset_path):
        if file.endswith(".wav"):
            parts = file.split('_')
            emotion_code = parts[1][:2] if parts[1][:2] in label_map else parts[1][0]
            if emotion_code in label_map:
                emotion = label_map[emotion_code]
                audio_files.append(os.path.join(dataset_path, file))
                labels.append(emotion)

    print(f"Loaded {len(audio_files)} audio files from SAVEE.")
    return audio_files, labels

# OpenSMILE Feature Extraction
def extract_opensmile_features(file_path):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.ComParE_2016,
        feature_level=opensmile.FeatureLevel.Functionals
    )
    features = smile.process_file(file_path)
    return features.values.flatten()

# Dataset path
dataset_path = "C:/Users/samhi/OneDrive/문서/College/s6/Speech Processing/Endsem/archive/ALL"

audio_files, labels = load_savee_audio(dataset_path)

X, y_clean = [], []
for file, label in tqdm(zip(audio_files, labels), total=len(audio_files)):
    try:
        features = extract_opensmile_features(file)
        if features.size > 0:
            X.append(features)
            y_clean.append(label)
    except Exception as e:
        print(f"Error processing {file}: {e}")

X = np.array(X)
y = LabelEncoder().fit_transform(y_clean)
X = StandardScaler().fit_transform(X)

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ------------------- DCNN with Optuna -------------------
def objective(trial):
    model = Sequential([
        Conv1D(trial.suggest_int('filters1', 32, 128), kernel_size=trial.suggest_int('kernel1', 3, 7),
               activation='relu', input_shape=(X_train.shape[1], 1)),
        MaxPooling1D(pool_size=2),
        Dropout(trial.suggest_float('dropout', 0.2, 0.5)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(trial.suggest_float('dropout', 0.2, 0.5)),
        Dense(len(set(y)), activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train.reshape(-1, X_train.shape[1], 1), y_train, epochs=20, batch_size=trial.suggest_int('batch', 16, 64), verbose=0,
              validation_data=(X_test.reshape(-1, X_test.shape[1], 1), y_test), callbacks=[EarlyStopping(patience=3)])
    _, acc = model.evaluate(X_test.reshape(-1, X_test.shape[1], 1), y_test, verbose=0)
    return acc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# Train final model
params = study.best_params
model = Sequential([
    Conv1D(params['filters1'], kernel_size=params['kernel1'], activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(params['dropout']),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(params['dropout']),
    Dense(len(set(y)), activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train.reshape(-1, X_train.shape[1], 1), y_train, epochs=50, batch_size=params['batch'], validation_data=(X_test.reshape(-1, X_test.shape[1], 1), y_test), callbacks=[EarlyStopping(patience=5)])

# Evaluation
y_pred = model.predict(X_test.reshape(-1, X_test.shape[1], 1))
y_pred = np.argmax(y_pred, axis=1)

print(classification_report(y_test, y_pred))


Loaded 480 audio files from SAVEE.


100%|██████████| 480/480 [02:29<00:00,  3.20it/s]
[I 2025-04-06 12:30:51,685] A new study created in memory with name: no-name-701314a1-c214-4592-b0d0-86e595c58713









[I 2025-04-06 12:31:54,537] Trial 0 finished with value: 0.6145833134651184 and parameters: {'filters1': 82, 'kernel1': 3, 'dropout': 0.344226840746249, 'batch': 55}. Best is trial 0 with value: 0.6145833134651184.
[I 2025-04-06 12:32:27,829] Trial 1 finished with value: 0.5625 and parameters: {'filters1': 82, 'kernel1': 5, 'dropout': 0.43349568355571366, 'batch': 56}. Best is trial 0 with value: 0.6145833134651184.
[I 2025-04-06 12:33:48,732] Trial 2 finished with value: 0.65625 and parameters: {'filters1': 100, 'kernel1': 3, 'dropout': 0.32632555742281666, 'batch': 30}. Best is trial 2 with value: 0.65625.
[I 2025-04-06 12:34:33,899] Trial 3 finished with value: 0.6979166865348816 and parameters: {'filters1': 44, 'kernel1': 3, 'dropout': 0.29444693026146634, 'batch': 60}. Best is trial 3 with value: 0.6979166865348816.
[I 2025-04-06 12:36:57,577] Trial 4 finished with value: 0.6458333134651184 and parameters: {'filters1': 117, 'kernel1': 7, 'dropout': 0.3897025241134324, 'batch': 50}

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
              precision    recall  f1-score   support

           0       0.57      0.80      0.67         5
           1       0.50      0.50      0.50        12
           2       0.43      0.25      0.32        12
           3       0.50      0.50      0.50        10
           4       0.92      0.72      0.81        32
           5       0.50      0.79      0.61        14
           6       0.46      0.55      0.50        11

    accuracy                           0.60        96
   macro avg       0.55      0.59      0.56        96
weighted avg       0.63      0.60      0.60        96

