In [1]:
import sys
import os
import pandas as pd
import tensorflow as tf

# Add path to Helpers to import model_utils
sys.path.append(os.path.abspath('../Helpers'))
import model_utils as mu

In [2]:
# --- 1. Load Data ---
BASE_PATH = '../data_recordings'
print(f"Loading manifest from: {BASE_PATH}")
df = mu.load_manifest(BASE_PATH)

# Optional: Sample data for quick debugging (uncomment next line)
# df = df.head(50)

Loading manifest from: ../data_recordings


In [3]:
# Load and process audio into spectrograms
X, y, speakers = mu.load_and_process_data(df, BASE_PATH)

Loading 11225 audio files...
Successfully loaded: 11225, Failed: 0
Generating Mel-Spectrograms...


In [4]:
# --- 2. Split Data ---
X_train, X_val, X_test, y_train, y_val, y_test, df_train, df_val, df_test = mu.split_dataset(X, y, speakers, df)

print(f"Train shape: {X_train.shape}")
print(f"Val shape:   {X_val.shape}")
print(f"Test shape:  {X_test.shape}")

Train shape: (7859, 128, 130, 1)
Val shape:   (1681, 128, 130, 1)
Test shape:  (1685, 128, 130, 1)


In [5]:
# --- 3. Define Experiments ---
# List of tuples: (Experiment Name, Optimizer, Dropout, Learning Rate)
experiments = [
    ("Baseline_Adam", "adam", 0.0, 0.001),
    ("Baseline_SGD",  "sgd",  0.0, 0.01),
    ("Dropout_0.2",   "adam", 0.2, 0.001),
    ("Low_LR_Adam",   "adam", 0.0, 0.0001),
]

results_summary = []
# Create a directory for results if it doesn't exist
os.makedirs("results", exist_ok=True)

In [6]:
# --- 4. Run Experiments Loop ---
for exp_name, opt, drop, lr in experiments:
    print(f"\n" + "="*40)
    print(f"Running Experiment: {exp_name}")
    print(f"Params: Opt={opt}, Dropout={drop}, LR={lr}")
    print("="*40)

    # Build model using helper function
    model = mu.build_model(
        input_shape=X_train.shape[1:],
        optimizer_name=opt,
        dropout_rate=drop,
        learning_rate=lr
    )

    # Callbacks: Early Stopping and CSV Logger
    log_path = f"results/logs_{exp_name}.csv"
    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1),
        tf.keras.callbacks.CSVLogger(log_path)
    ]

    # Train model
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=20,
        batch_size=32,
        callbacks=callbacks,
        verbose=1
    )

    # Evaluate on Test Set
    loss, acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {acc:.4f}")

    # Record results
    results_summary.append({
        "Experiment": exp_name,
        "Test_Accuracy": acc,
        "Test_Loss": loss,
        "Optimizer": opt,
        "Dropout": drop,
        "Learning_Rate": lr
    })

    # Save Model
    model_save_path = f"results/model_{exp_name}.h5"
    model.save(model_save_path)
    print(f"Model saved to {model_save_path}")

    # --- IMPORTANT FOR EDA ---
    # Save predictions with metadata for Error Analysis
    print("Generating predictions for analysis...")
    preds_prob = model.predict(X_test, verbose=0).flatten()

    # Create DataFrame with true labels, predictions, AND metadata (filenames)
    # We use df_test which we got from the split function
    pred_df = df_test.copy()
    pred_df["true_label"] = y_test
    pred_df["pred_prob"] = preds_prob
    pred_df["pred_label"] = (preds_prob > 0.5).astype(int)

    # Save to CSV
    pred_df.to_csv(f"results/predictions_{exp_name}.csv", index=False)


Running Experiment: Baseline_Adam
Params: Opt=adam, Dropout=0.0, LR=0.001
Epoch 1/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 152ms/step - accuracy: 0.6136 - loss: 1.6358 - val_accuracy: 0.6764 - val_loss: 0.6147
Epoch 2/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 145ms/step - accuracy: 0.7118 - loss: 0.5537 - val_accuracy: 0.7852 - val_loss: 0.4482
Epoch 3/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 130ms/step - accuracy: 0.7908 - loss: 0.4283 - val_accuracy: 0.8519 - val_loss: 0.3411
Epoch 4/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 126ms/step - accuracy: 0.8384 - loss: 0.3566 - val_accuracy: 0.8572 - val_loss: 0.3215
Epoch 5/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 125ms/step - accuracy: 0.8495 - loss: 0.3277 - val_accuracy: 0.8763 - val_loss: 0.2942
Epoch 6/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 126ms/step - accuracy:



Test Accuracy: 0.8926
Model saved to results/model_Baseline_Adam.h5
Generating predictions for analysis...

Running Experiment: Baseline_SGD
Params: Opt=sgd, Dropout=0.0, LR=0.01
Epoch 1/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 116ms/step - accuracy: 0.5329 - loss: nan - val_accuracy: 0.5348 - val_loss: 0.6908
Epoch 2/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 116ms/step - accuracy: 0.5349 - loss: 0.6909 - val_accuracy: 0.5348 - val_loss: 0.6907
Epoch 3/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 113ms/step - accuracy: 0.5349 - loss: 0.6910 - val_accuracy: 0.5348 - val_loss: 0.6910
Epoch 4/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 114ms/step - accuracy: 0.5349 - loss: 0.6910 - val_accuracy: 0.5348 - val_loss: 0.6907
Epoch 5/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 114ms/step - accuracy: 0.5349 - loss: 0.6909 - val_accuracy: 0.5348 - val_loss: 0.6909



Test Accuracy: 0.5353
Model saved to results/model_Baseline_SGD.h5
Generating predictions for analysis...

Running Experiment: Dropout_0.2
Params: Opt=adam, Dropout=0.2, LR=0.001
Epoch 1/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 140ms/step - accuracy: 0.5681 - loss: 2.5391 - val_accuracy: 0.6669 - val_loss: 0.6300
Epoch 2/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 138ms/step - accuracy: 0.6855 - loss: 0.6087 - val_accuracy: 0.7002 - val_loss: 0.5762
Epoch 3/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 138ms/step - accuracy: 0.7068 - loss: 0.5670 - val_accuracy: 0.7329 - val_loss: 0.5197
Epoch 4/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 137ms/step - accuracy: 0.7202 - loss: 0.5395 - val_accuracy: 0.7329 - val_loss: 0.5061
Epoch 5/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 138ms/step - accuracy: 0.7224 - loss: 0.5271 - val_accuracy: 0.7246 - val_loss: 0.5



Test Accuracy: 0.9573
Model saved to results/model_Dropout_0.2.h5
Generating predictions for analysis...

Running Experiment: Low_LR_Adam
Params: Opt=adam, Dropout=0.0, LR=0.0001
Epoch 1/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 135ms/step - accuracy: 0.5922 - loss: 0.8267 - val_accuracy: 0.6627 - val_loss: 0.6215
Epoch 2/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 129ms/step - accuracy: 0.6935 - loss: 0.5792 - val_accuracy: 0.7394 - val_loss: 0.5599
Epoch 3/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 128ms/step - accuracy: 0.7624 - loss: 0.4992 - val_accuracy: 0.8001 - val_loss: 0.4665
Epoch 4/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 126ms/step - accuracy: 0.8089 - loss: 0.4268 - val_accuracy: 0.8560 - val_loss: 0.3794
Epoch 5/20
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 130ms/step - accuracy: 0.8748 - loss: 0.3161 - val_accuracy: 0.8590 - val_loss: 0.3



Test Accuracy: 0.9276
Model saved to results/model_Low_LR_Adam.h5
Generating predictions for analysis...


In [7]:
# --- 5. Summary ---
summary_df = pd.DataFrame(results_summary)
print("\n=== Final Experiments Summary ===")
print(summary_df)

summary_path = "results/experiments_summary.csv"

# Check if file exists to determine whether to write the header
write_header = not os.path.exists(summary_path)

# Save to CSV with mode='a' (append)
summary_df.to_csv(summary_path, mode='a', header=write_header, index=False)

print("\nAll experiments completed.")


=== Final Experiments Summary ===
      Experiment  Test_Accuracy  Test_Loss Optimizer  Dropout  Learning_Rate
0  Baseline_Adam       0.892582   0.250945      adam      0.0         0.0010
1   Baseline_SGD       0.535312   0.690652       sgd      0.0         0.0100
2    Dropout_0.2       0.957270   0.116995      adam      0.2         0.0010
3    Low_LR_Adam       0.927596   0.176640      adam      0.0         0.0001

All experiments completed.
