In [1]:
# %% [markdown]
# # MLPC 2025 – Task 2 · Simple SED system  (v3 – autosufficiente con filtraggio e controllo)

# %% ---------------------------------------------------------------------------
# 1 · Setup generale
import os, math, joblib, numpy as np, pandas as pd
from pathlib import Path
from typing import Dict
from scipy.signal import medfilt

from compute_cost import (
    CLASSES, COST_MATRIX, aggregate_targets,
    total_cost, check_dataframe
)

DATASET_PATH  = Path("../MLPC2025_test")
AF_PATH       = DATASET_PATH / "audio_features"
MODEL_DIR     = Path("./models")
FRAME_OUT_DIR = Path("./frame_out")
FRAME_OUT_DIR.mkdir(exist_ok=True)

OUT_CSV       = "predictions.csv"

print("Target classes:", CLASSES)
print("Audio‑features :", AF_PATH.resolve())
print("Modelli        :", MODEL_DIR.resolve())

# %% ---------------------------------------------------------------------------
# 2 · Carica i modelli per classe
models = {}
for i, cls in enumerate(CLASSES):
    pkl_path = MODEL_DIR / f"classifier_{i}_{cls}.pkl"
    if not pkl_path.exists():
        raise FileNotFoundError(f"Modello mancante: {pkl_path}")
    models[cls] = joblib.load(pkl_path)
print(f"Caricati {len(models)} modelli.")

# %% ---------------------------------------------------------------------------
# 3 · Genera (se manca) frame_out/<ID>.npz  con probs (T×10)
def ensure_frame_predictions():
    todo = []
    for feat_file in AF_PATH.glob("*.npz"):
        file_id = feat_file.stem
        if not (FRAME_OUT_DIR / f"{file_id}.npz").exists():
            todo.append(file_id)

    print(f"Clip da processare: {len(todo)}")

    for fid in todo:
        X = np.load(AF_PATH / f"{fid}.npz")["embeddings"]  # T×D
        probs = np.zeros((X.shape[0], len(CLASSES)), dtype=np.float32)
        for j, cls in enumerate(CLASSES):
            mdl = models[cls]
            if hasattr(mdl, "predict_proba"):
                probs[:, j] = mdl.predict_proba(X)[:, 1]
            else:
                z = mdl.decision_function(X)
                probs[:, j] = 1 / (1 + np.exp(-z))
        np.savez_compressed(FRAME_OUT_DIR / f"{fid}.npz", probs=probs)

ensure_frame_predictions()

# %% ---------------------------------------------------------------------------
# 4 · Carica le predizioni frame-level filtrando sui file del ground-truth
gt_df = pd.read_csv(DATASET_PATH / "ground_truth.csv")
gt_df["onset"] = np.round(gt_df["onset"], 1)  # coerente con le predizioni
valid_files = set(gt_df["filename"].unique())

def load_frame_level_predictions_filtered(pred_dir: Path, allowed_files: set) -> Dict[str, Dict[str, np.ndarray]]:
    mapping = {}
    for npz_path in pred_dir.glob("*.npz"):
        fid = npz_path.stem
        fname = f"{fid}.mp3"
        if fname not in allowed_files:
            continue
        arr = np.load(npz_path)["probs"]
        assert arr.shape[1] == len(CLASSES)
        mapping[fname] = {c: arr[:, i] for i, c in enumerate(CLASSES)}
    print(f"Caricate predizioni per {len(mapping)} file presenti nel ground-truth.")
    return mapping

frame_preds = load_frame_level_predictions_filtered(FRAME_OUT_DIR, valid_files)

# %% ---------------------------------------------------------------------------
# 5 · Aggregazione 10 frame → 1.2s + thresholding 0.17 + filtro mediano
def frame2segment(preds: Dict[str, Dict[str, np.ndarray]],
                  thr: float = 0.17, use_median: bool = True) -> pd.DataFrame:
    rows = []
    for fname, per_class in preds.items():
        frame_mat = np.stack([per_class[c] for c in CLASSES], axis=1)  # T×10
        seg_mat   = aggregate_targets(frame_mat, f=10)                 # S×10
        bin_mat   = (seg_mat > thr).astype(int)
        if use_median and bin_mat.shape[0] >= 3:
            bin_mat = medfilt(bin_mat, kernel_size=(3, 1))
        for idx, vec in enumerate(bin_mat):
            onset = np.round(idx * 1.2, 1)
            rows.append([fname, onset] + vec.tolist())
    return pd.DataFrame(rows, columns=["filename", "onset"] + CLASSES)

seg_df = frame2segment(frame_preds)
print("Segment-level rows:", len(seg_df))

# %% ---------------------------------------------------------------------------
# 6 · Salvataggio + validazione formato
seg_df.to_csv(OUT_CSV, index=False)
check_dataframe(seg_df, dataset_path=str(DATASET_PATH))
print(f"✓ {OUT_CSV} creato e validato.")

# %% ---------------------------------------------------------------------------
# 7 · Valutazione costo (vs baseline 0)
cost_sys, _ = total_cost(seg_df, gt_df)
print(f"Costo sistema: {cost_sys:.2f}")

zero_pred = gt_df.copy()
zero_pred[CLASSES] = 0
cost_zero, _ = total_cost(zero_pred, gt_df)
print(f"Costo baseline (all-zero): {cost_zero:.2f}")
print(f"Δ miglioramento: {cost_zero - cost_sys:.2f}")

# %% ---------------------------------------------------------------------------
# 8 · Controllo allineamento predizioni e GT
merged = seg_df.merge(gt_df, on=["filename", "onset"], suffixes=("_p", "_t"))
print("Righe dopo merge:", len(merged))
print("Somma etichette ground-truth:", merged[[c + "_t" for c in CLASSES]].values.sum())


Target classes: ['Speech', 'Shout', 'Chainsaw', 'Jackhammer', 'Lawn Mower', 'Power Drill', 'Dog Bark', 'Rooster Crow', 'Horn Honk', 'Siren']
Audio‑features : /Users/Q540900/Desktop/Sparkling---Pattern-Classification-Project/MLPC2025_test/audio_features
Modelli        : /Users/Q540900/Desktop/Sparkling---Pattern-Classification-Project/04 - Model Training/models
Caricati 10 modelli.
Clip da processare: 0
Caricate predizioni per 2742 file presenti nel ground-truth.
Segment-level rows: 52191
✓ predictions.csv creato e validato.
Costo sistema: 22.28
Costo baseline (all-zero): 0.00
Δ miglioramento: -22.28
Righe dopo merge: 52191
Somma etichette ground-truth: 0
