In [13]:
import os
import numpy as np
import librosa
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import optuna

# --------- Load Audio Files (RAVDESS) ---------
def load_audio_files_ravdess(dataset_path):
    audio_files, labels = [], []
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(".wav"):
                emotion_code = int(file.split("-")[2])  # Extract emotion from filename
                labels.append(emotion_code)
                audio_files.append(os.path.join(root, file))
    print(f"Found {len(audio_files)} audio files.")
    return audio_files, labels

# --------- Map Emotions to Names ---------
def map_emotions_ravdess(labels):
    emotion_dict = {
        1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad',
        5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'
    }
    return [emotion_dict[label] for label in labels]

# --------- Feature Extraction (MFCC 40 + ZCR + RMS) ---------
def extract_features(file_path, sr=22050):
    y, _ = librosa.load(file_path, sr=sr)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    zcr = librosa.feature.zero_crossing_rate(y)
    rms = librosa.feature.rms(y=y)
    combined = np.concatenate((mfccs, zcr, rms), axis=0)
    return np.mean(combined, axis=1)  # shape: (43,)

# --------- Load and Extract Features ---------
dataset_path = "C:/Users/samhi/OneDrive/문서/College/s6/Speech Processing/Endsem/Final codes/ravdees"
audio_files, labels = load_audio_files_ravdess(dataset_path)
labels_mapped = map_emotions_ravdess(labels)

X, y_clean = [], []
for file, label in tqdm(zip(audio_files, labels_mapped), total=len(audio_files)):
    try:
        features = extract_features(file)
        X.append(features)
        y_clean.append(label)
    except Exception as e:
        print(f"Error processing {file}: {e}")

X = np.array(X)
y = LabelEncoder().fit_transform(y_clean)
X = StandardScaler().fit_transform(X)

# --------- Train-Test Split ---------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --------- Optuna for Hyperparameter Tuning ---------
def objective(trial):
    filters_1 = trial.suggest_int('filters_1', 32, 128)
    filters_2 = trial.suggest_int('filters_2', 64, 256)
    kernel_size = trial.suggest_int('kernel_size', 3, 7)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5)
    batch_size = trial.suggest_int('batch_size', 16, 64)

    model = Sequential([
        Conv1D(filters_1, kernel_size=kernel_size, activation='relu', input_shape=(X_train.shape[1], 1)),
        MaxPooling1D(pool_size=2),
        Dropout(dropout_rate),
        Conv1D(filters_2, kernel_size=kernel_size, activation='relu'),
        MaxPooling1D(pool_size=2),
        Dropout(dropout_rate),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(dropout_rate),
        Dense(len(set(y)), activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(X_train.reshape(-1, X_train.shape[1], 1), y_train,
              epochs=20, batch_size=batch_size,
              validation_data=(X_test.reshape(-1, X_test.shape[1], 1), y_test),
              callbacks=[early_stopping], verbose=0)

    _, accuracy = model.evaluate(X_test.reshape(-1, X_test.shape[1], 1), y_test, verbose=0)
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# --------- Train Final DCNN ---------
model = Sequential([
    Conv1D(best_params['filters_1'], kernel_size=best_params['kernel_size'], activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(best_params['dropout_rate']),
    Conv1D(best_params['filters_2'], kernel_size=best_params['kernel_size'], activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(best_params['dropout_rate']),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(best_params['dropout_rate']),
    Dense(len(set(y)), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.fit(X_train.reshape(-1, X_train.shape[1], 1), y_train,
          epochs=50, batch_size=best_params['batch_size'],
          validation_data=(X_test.reshape(-1, X_test.shape[1], 1), y_test),
          callbacks=[early_stopping])

# --------- Final Evaluation ---------
loss, acc = model.evaluate(X_test.reshape(-1, X_test.shape[1], 1), y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {acc * 100:.2f}%")


  from .autonotebook import tqdm as notebook_tqdm


Found 2880 audio files.


100%|██████████| 2880/2880 [01:00<00:00, 47.69it/s]
[I 2025-04-06 02:00:11,451] A new study created in memory with name: no-name-61ac36c5-8afa-4ca7-97d9-93c7798c01ec
[I 2025-04-06 02:00:26,079] Trial 0 finished with value: 0.9045138955116272 and parameters: {'filters_1': 64, 'filters_2': 236, 'kernel_size': 3, 'dropout_rate': 0.24344554932293935, 'batch_size': 41}. Best is trial 0 with value: 0.9045138955116272.
[I 2025-04-06 02:00:40,295] Trial 1 finished with value: 0.7621527910232544 and parameters: {'filters_1': 113, 'filters_2': 145, 'kernel_size': 5, 'dropout_rate': 0.48308167976119665, 'batch_size': 34}. Best is trial 0 with value: 0.9045138955116272.
[I 2025-04-06 02:00:51,043] Trial 2 finished with value: 0.8559027910232544 and parameters: {'filters_1': 62, 'filters_2': 141, 'kernel_size': 4, 'dropout_rate': 0.24967155828216328, 'batch_size': 50}. Best is trial 0 with value: 0.9045138955116272.
[I 2025-04-06 02:01:06,882] Trial 3 finished with value: 0.9114583134651184 and par

Best Hyperparameters: {'filters_1': 88, 'filters_2': 220, 'kernel_size': 6, 'dropout_rate': 0.26353660437925563, 'batch_size': 33}
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Test Loss: 0.2606, Test Accuracy: 94.44%


In [19]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Predictions
y_pred = model.predict(X_test.reshape(-1, X_test.shape[1], 1))
y_pred_classes = np.argmax(y_pred, axis=1)

# Evaluation Metrics
acc = accuracy_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes, average='macro')
recall = recall_score(y_test, y_pred_classes, average='macro')
f1 = f1_score(y_test, y_pred_classes, average='macro')

print(f"Test Accuracy: {acc * 100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")


Test Accuracy: 94.44%
Precision: 0.9455
Recall:    0.9340
F1 Score:  0.9380
