
# Project Echo — Task 1: Alternate Model **vs Baseline** (Suite‑Style Notebook)

This notebook mirrors Dean’s benchmarking suite structure (configs + utils + orchestration) but stays **self‑contained** for Task 1.  
It runs **one alternate model** (default: *MobileNetV2*) and optionally a **baseline** (default: *ResNet50*) so you can compare.

**Mapping to the suite:**
- `configs/*.py` → Config dataclasses below (`SystemConfig`, `MelSpecConfig`, `TrainConfig`, `ExperimentConfig`).
- `utils/*.py` → Helper functions (dataset discovery, mel pipeline, DataSets, train/eval).
- `Benchmarking_Framework.ipynb` → Orchestration cells at the end.


In [1]:

# Optional installs (uncomment if needed)
# !pip install librosa==0.10.1 soundfile==0.12.1 tensorflow==2.12.0 scikit-learn==1.4.2 matplotlib==3.8.4


In [2]:

import os, random, json
from dataclasses import dataclass, asdict
from pathlib import Path
import numpy as np
import tensorflow as tf
import librosa, soundfile as sf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import matplotlib.pyplot as plt


# Set seed early so everything else can use it
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)



## Configs (suite-style)

In [3]:
from dataclasses import dataclass

@dataclass
class SystemConfig:
    data_root: str = "/Users/mankirat/Desktop/Deakin/DEAKIN25/Sem2/ProjectEcho/dataset/Bucket_1"
    output_dir: str = "./outputs_task1"
    seed: int = SEED
    device: str = "GPU"  # or "CPU"

system_cfg = SystemConfig()

@dataclass
class MelSpecConfig:
    sample_rate: int = 48000
    clip_duration_s: int = 5
    n_fft: int = 2048
    hop_length: int = 200
    n_mels: int = 260
    fmin: int = 20
    fmax: int = 13000
    top_db: int = 80
    image_size: tuple = (260, 260)  # (H, W)
    channels: int = 3

melspec_cfg = MelSpecConfig()

@dataclass
class TrainConfig:
    batch_size: int = 16
    max_epochs: int = 50
    learning_rate: float = 1e-4
    early_stop_patience: int = 10
    lr_plateau_patience: int = 6

train_cfg = TrainConfig()

@dataclass
class ExperimentConfig:
    # Run keys: choose any from model factory below
    alt_model_key: str = "mobilenet_v2_alt"
    baseline_model_key: str = "resnet50_baseline"
    run_baseline: bool = True
    val_size: float = 0.2
    test_size: float = 0.1
    stratify: bool = True

exp_cfg = ExperimentConfig()

Path(system_cfg.output_dir).mkdir(parents=True, exist_ok=True)
print("Output dir:", system_cfg.output_dir)


Output dir: ./outputs_task1


## Utils — Dataset discovery & splits

In [4]:

def discover_dataset(root: str, exts=('.wav','.mp3','.flac','.ogg')):
    root = Path(root)
    classes = sorted([d.name for d in root.iterdir() if d.is_dir()])
    files, labels = [], []
    for idx, cls in enumerate(classes):
        for p in (root/cls).rglob("*"):
            if p.suffix.lower() in exts:
                files.append(str(p)); labels.append(idx)
    return classes, np.array(files), np.array(labels)

def make_splits(files, labels, val_size=0.2, test_size=0.1, stratify=True, seed=SEED):
    strat = labels if stratify else None
    f_trainval, f_test, y_trainval, y_test = train_test_split(files, labels, test_size=test_size,
                                                             random_state=seed, stratify=strat)
    strat2 = y_trainval if stratify else None
    relative_val = val_size / (1.0 - test_size)
    f_train, f_val, y_train, y_val = train_test_split(f_trainval, y_trainval, test_size=relative_val,
                                                      random_state=seed, stratify=strat2)
    return (f_train, y_train), (f_val, y_val), (f_test, y_test)


In [5]:
import numpy as np

def filter_min_count(files, labels, min_per_class=3):
    files = np.array(files); labels = np.array(labels)
    keep = []
    for cls in np.unique(labels):
        idx = np.where(labels == cls)[0]
        if len(idx) >= min_per_class:
            keep.extend(idx.tolist())
    keep = np.array(sorted(keep))
    return files[keep], labels[keep]

## Utils — Audio→Mel→Image (pipeline)

In [6]:

# 1) (Optional but recommended) set MobileNet-friendly size
melspec_cfg.image_size = (224, 224)  # (H, W)

# 2) Replace your load_audio_to_mel with this PURE NumPy/Python version
from PIL import Image

def load_audio_to_mel(path, cfg: MelSpecConfig):
    # --- read path from tf.numpy_function (bytes) ---
    if isinstance(path, (bytes, bytearray)):
        path = path.decode()

    # --- load audio, pad/trim to fixed duration ---
    y, sr = librosa.load(path, sr=cfg.sample_rate, mono=True)
    target_len = int(cfg.clip_duration_s * cfg.sample_rate)
    if len(y) < target_len:
        y = np.pad(y, (0, target_len - len(y)))
    elif len(y) > target_len:
        y = y[:target_len]

    # --- mel spectrogram in dB, normalized to [0,1] ---
    S = librosa.feature.melspectrogram(
        y=y, sr=cfg.sample_rate, n_fft=cfg.n_fft, hop_length=cfg.hop_length,
        n_mels=cfg.n_mels, fmin=cfg.fmin, fmax=cfg.fmax
    )
    S_db = librosa.power_to_db(S, top_db=cfg.top_db, ref=np.max)  # shape: (n_mels, time)
    img = S_db.T  # (time, mels)

    # min-max normalize (avoid div by zero)
    mn, mx = img.min(), img.max()
    if mx > mn:
        img = (img - mn) / (mx - mn)
    else:
        img = np.zeros_like(img, dtype=np.float32)

    # --- resize to (H,W) using PIL (no TF ops here) ---
    # PIL expects (W,H), so reverse when passing size
    pil_img = Image.fromarray((img * 255).astype(np.uint8))
    pil_img = pil_img.resize((cfg.image_size[1], cfg.image_size[0]), resample=Image.BICUBIC)

    arr = np.asarray(pil_img).astype(np.float32) / 255.0  # (H, W)
    if arr.ndim == 2:
        arr = np.repeat(arr[..., None], cfg.channels, axis=-1)  # (H, W, C)

    return arr.astype(np.float32)  # pure numpy array, no tf ops


# Keep your existing load_audio_to_mel(path, melspec_cfg) as-is
# 1) Discover dataset
CLASS_NAMES, FILES, LABELS = discover_dataset(system_cfg.data_root)
CLASS_NAMES = np.array(CLASS_NAMES)
NUM_CLASSES = int(len(CLASS_NAMES))  # set once after you set CLASS_NAMES

def _load_with_cfg(path):
    # path comes in as a numpy bytes/str; we use the global melspec_cfg
    return load_audio_to_mel(path, melspec_cfg)

def tf_load_mel_map(path, label):
    # Only pass Tensors to numpy_function; capture config via closure
    img = tf.numpy_function(_load_with_cfg, [path], tf.float32)
    img.set_shape((melspec_cfg.image_size[0], melspec_cfg.image_size[1], melspec_cfg.channels))
    one_hot = tf.one_hot(label, depth=NUM_CLASSES)   # use a Python int, not tf.shape(...)
    one_hot.set_shape((NUM_CLASSES,))
    return img, one_hot

AUTOTUNE = tf.data.AUTOTUNE

def build_dataset(file_paths, labels, batch_size, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((file_paths, labels))
    if shuffle: ds = ds.shuffle(len(file_paths), seed=SEED)
    ds = ds.map(tf_load_mel_map, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(AUTOTUNE)
    return ds


## Model factory (alternate + baseline)

In [7]:

def build_model(model_key: str, num_classes: int, input_shape=(260,260,3), lr=1e-4):
    inputs = tf.keras.Input(shape=input_shape)

    if model_key == "mobilenet_v2_alt":
        base = tf.keras.applications.MobileNetV2(include_top=False, weights="imagenet",
                                                 input_shape=input_shape, pooling="avg")
    elif model_key == "resnet50_baseline":
        base = tf.keras.applications.ResNet50(include_top=False, weights="imagenet",
                                              input_shape=input_shape, pooling="avg")
    else:
        raise ValueError(f"Unknown model_key: {model_key}")

    x = base(inputs, training=True)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(num_classes * 8, activation="relu")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(num_classes * 4, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    logits = tf.keras.layers.Dense(num_classes, activation=None)(x)

    model = tf.keras.Model(inputs, logits)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                  metrics=["accuracy"])
    return model


## Train / Evaluate helpers

In [8]:

def train_model(model, train_ds, val_ds, out_dir: str):
    os.makedirs(out_dir, exist_ok=True)
    ckpt_path = os.path.join(out_dir, "best.h5")
    cbs = [
        tf.keras.callbacks.ModelCheckpoint(ckpt_path, monitor="val_loss", mode="min",
                                           save_best_only=True, save_weights_only=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.75, patience=6),
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
    ]
    hist = model.fit(train_ds, validation_data=val_ds, epochs=train_cfg.max_epochs, callbacks=cbs, verbose=1)
    return hist, ckpt_path

def evaluate_model(model, test_ds, class_names):
    y_true, y_pred = [], []
    for xb, yb in test_ds:
        logits = model.predict(xb, verbose=0)
        y_pred.extend(np.argmax(logits, axis=1).tolist())
        y_true.extend(np.argmax(yb.numpy(), axis=1).tolist())
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    macro_f1 = f1_score(y_true, y_pred, average="macro")
    print("\nClassification report:")
    print(classification_report(y_true, y_pred, target_names=class_names, digits=3))
    cm = confusion_matrix(y_true, y_pred)
    return macro_f1, cm

def plot_confusion_matrix(cm, class_names, title="Confusion Matrix"):
    fig, ax = plt.subplots(figsize=(8,6))
    im = ax.imshow(cm, interpolation='nearest')
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]),
           xticklabels=class_names, yticklabels=class_names,
           ylabel='True label', xlabel='Predicted label', title=title)
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    fig.tight_layout(); plt.show()


## Orchestration — Build data once (shared splits)

In [9]:
# === ALL-IN-ONE: discover -> filter -> remap -> safe split -> build datasets ===
from pathlib import Path
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split

assert Path(system_cfg.data_root).exists(), "Update system_cfg.data_root to your dataset path."

# 1) Discover dataset (requires discover_dataset() to be defined already)
CLASS_NAMES, FILES, LABELS = discover_dataset(system_cfg.data_root)
CLASS_NAMES = np.array(CLASS_NAMES)

# 2) Filter classes with < 2 samples (prevents stratified split errors)
counts = Counter(LABELS.tolist())
valid_old_labels = sorted([c for c, n in counts.items() if n >= 2])

# Keep only those samples
keep_idx = [i for i, l in enumerate(LABELS) if l in valid_old_labels]
FILES2  = np.array(FILES)[keep_idx]
LABELS2 = np.array(LABELS)[keep_idx]

# 3) Remap labels to 0..K-1 based on kept classes
kept_classes = [CLASS_NAMES[i] for i in valid_old_labels]  # names of kept classes in old order
old_to_new = {old:i for i, old in enumerate(valid_old_labels)}
LABELS2 = np.array([old_to_new[l] for l in LABELS2], dtype=int)
CLASS_NAMES = np.array(kept_classes)

print(f"Classes total: {len(CLASS_NAMES)} | Min per-class count (after filtering): "
      f"{min(Counter(LABELS2.tolist()).values() or [0])}")

# 4) Safe stratified split with bump + fallback
def safe_stratified_splits(files, labels, val_size, test_size, seed):
    files = np.array(files); labels = np.array(labels)
    n = len(labels)
    n_classes = len(np.unique(labels))

    def _do_split(vs, ts, stratify=True):
        strat = labels if stratify else None
        f_trainval, f_test, y_trainval, y_test = train_test_split(
            files, labels, test_size=ts, random_state=seed, stratify=strat
        )
        rel_val = vs / (1.0 - ts)
        strat2 = y_trainval if stratify else None
        f_train, f_val, y_train, y_val = train_test_split(
            f_trainval, y_trainval, test_size=rel_val, random_state=seed, stratify=strat2
        )
        return (f_train, y_train), (f_val, y_val), (f_test, y_test)

    # try as-is
    try:
        return _do_split(val_size, test_size, stratify=True)
    except ValueError as e:
        print("⚠️ Stratified split failed with current sizes:", e)

    # bump sizes to ensure ≥1 sample/class in each split
    min_frac = n_classes / max(n, 1)
    bumped_test = max(test_size, min_frac + 1e-6)
    bumped_val  = max(val_size,  min_frac + 1e-6)

    # ensure train size also ≥ n_classes
    max_val_allowed = 1.0 - bumped_test - min_frac - 1e-6
    if bumped_val > max_val_allowed:
        bumped_val = max_val_allowed

    if bumped_test + bumped_val >= 1.0 - 1e-6:
        print("⚠️ Not enough samples to keep stratification with all classes.")
    else:
        try:
            print(f"➡️ Retrying with bumped sizes: test_size={bumped_test:.3f}, val_size={bumped_val:.3f}")
            return _do_split(bumped_val, bumped_test, stratify=True)
        except ValueError as e2:
            print("⚠️ Bumped stratified split still failed:", e2)

    print("➡️ Falling back to NON-stratified split so training can proceed.")
    return _do_split(val_size, test_size, stratify=False)

# 5) Make splits (uses exp_cfg/train_cfg/system_cfg already defined)
(train_f, train_y), (val_f, val_y), (test_f, test_y) = safe_stratified_splits(
    FILES2, LABELS2, val_size=exp_cfg.val_size, test_size=exp_cfg.test_size, seed=system_cfg.seed
)

print(f"Train: {len(train_f)}, Val: {len(val_f)}, Test: {len(test_f)} | Classes: {len(CLASS_NAMES)}")

# 6) Build tf.data datasets (requires build_dataset() to be defined already)
train_ds = build_dataset(train_f, train_y, batch_size=train_cfg.batch_size, shuffle=True)
val_ds   = build_dataset(val_f,   val_y,   batch_size=train_cfg.batch_size, shuffle=False)
test_ds  = build_dataset(test_f,  test_y,  batch_size=train_cfg.batch_size, shuffle=False)

# quick sanity check
for xb, yb in train_ds.take(1):
    print("Sample batch shapes:", xb.shape, yb.shape)


Classes total: 118 | Min per-class count (after filtering): 2
⚠️ Stratified split failed with current sizes: The test_size = 36 should be greater or equal to the number of classes = 118
➡️ Retrying with bumped sizes: test_size=0.334, val_size=0.331
⚠️ Bumped stratified split still failed: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
➡️ Falling back to NON-stratified split so training can proceed.
Train: 246, Val: 71, Test: 36 | Classes: 118
Metal device set to: Apple M2


2025-09-08 20:28:31.239184: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-09-08 20:28:31.239516: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-09-08 20:28:31.408948: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Sample batch shapes: (16, 224, 224, 3) (16, 118)


## Run Alternate Model

In [10]:
# Build tf.data datasets from the split file lists
train_ds = build_dataset(train_f, train_y, batch_size=train_cfg.batch_size, shuffle=True)
val_ds   = build_dataset(val_f,   val_y,   batch_size=train_cfg.batch_size, shuffle=False)
test_ds  = build_dataset(test_f,  test_y,  batch_size=train_cfg.batch_size, shuffle=False)

# Quick sanity check
for xb, yb in train_ds.take(1):
    print("Batch X:", xb.shape, "| Batch y:", yb.shape)


Batch X: (16, 224, 224, 3) | Batch y: (16, 118)


In [11]:

alt_name = f"{exp_cfg.alt_model_key}_seed{system_cfg.seed}"
alt_out = str(Path(system_cfg.output_dir)/alt_name)

alt_model = build_model(exp_cfg.alt_model_key, num_classes=len(CLASS_NAMES),
                        input_shape=(melspec_cfg.image_size[0], melspec_cfg.image_size[1], melspec_cfg.channels),
                        lr=train_cfg.learning_rate)
alt_model.summary()
alt_hist, alt_ckpt = train_model(alt_model, train_ds, val_ds, alt_out)
alt_f1, alt_cm = evaluate_model(alt_model, test_ds, CLASS_NAMES.tolist())
plot_confusion_matrix(alt_cm, CLASS_NAMES.tolist(), title=f"{exp_cfg.alt_model_key} — Confusion Matrix")

with open(Path(alt_out)/"results.json","w") as f:
    json.dump({"model_key": exp_cfg.alt_model_key, "macro_f1": float(alt_f1),
               "classes": CLASS_NAMES.tolist(),
               "config": {"system": asdict(system_cfg), "mel": asdict(melspec_cfg),
                          "train": asdict(train_cfg), "exp": asdict(exp_cfg)}}, f, indent=2)
print("Saved:", Path(alt_out)/"results.json")


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 1280)             2257984   
 ional)                                                          
                                                                 
 batch_normalization (BatchN  (None, 1280)             5120      
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 944)               1209264   
                                                                 
 batch_normalization_1 (Batc  (None, 944)              3776      
 hNormalization)                                                 
                                                             

2025-09-08 20:28:39.173338: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-09-08 20:29:10.504941: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50


2025-09-08 20:43:02.028289: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.



Classification report:


ValueError: Number of classes, 53, does not match size of target_names, 118. Try specifying the labels parameter

## Run Baseline (optional)

In [None]:

baseline_f1 = None
if exp_cfg.run_baseline:
    base_name = f"{exp_cfg.baseline_model_key}_seed{system_cfg.seed}"
    base_out = str(Path(system_cfg.output_dir)/base_name)

    base_model = build_model(exp_cfg.baseline_model_key, num_classes=len(CLASS_NAMES),
                             input_shape=(melspec_cfg.image_size[0], melspec_cfg.image_size[1], melspec_cfg.channels),
                             lr=train_cfg.learning_rate)
    base_model.summary()
    base_hist, base_ckpt = train_model(base_model, train_ds, val_ds, base_out)
    baseline_f1, base_cm = evaluate_model(base_model, test_ds, CLASS_NAMES.tolist())
    plot_confusion_matrix(base_cm, CLASS_NAMES.tolist(), title=f"{exp_cfg.baseline_model_key} — Confusion Matrix")

    with open(Path(base_out)/"results.json","w") as f:
        json.dump({"model_key": exp_cfg.baseline_model_key, "macro_f1": float(baseline_f1),
                   "classes": CLASS_NAMES.tolist(),
                   "config": {"system": asdict(system_cfg), "mel": asdict(melspec_cfg),
                              "train": asdict(train_cfg), "exp": asdict(exp_cfg)}}, f, indent=2)
    print("Saved:", Path(base_out)/"results.json")


## Save a quick summary CSV

In [None]:

import csv
summary_path = Path(system_cfg.output_dir)/"summary.csv"
rows = [["model_key","macro_f1"],
        [exp_cfg.alt_model_key, alt_f1]]
if baseline_f1 is not None:
    rows.append([exp_cfg.baseline_model_key, baseline_f1])

with open(summary_path, "w", newline="") as f:
    csv.writer(f).writerows(rows)

print("Summary saved:", summary_path)



### Porting Tips for Task 2
- Move dataclasses into `configs/system_config.py`, `configs/model_configs.py`, etc.
- Move dataset/mel functions into `utils/create_dataset.py` and `utils/data_pipeline.py`.
- Keep the `model_key` names identical; register them in `configs/model_configs.py`.
- In `experiment_configs.py`, create entries for (baseline, alt) with shared splits and epochs.
