# Audio Weld Defect Classification — Script-Orchestrated Notebook
This notebook intentionally delegates training/evaluation to the project scripts.
Any change in `audio/*.py` is automatically picked up here.


In [38]:
import json
import os
import shlex
import subprocess
import sys
from pathlib import Path

CONFIG_PATH = Path("configs/master_config.json")
ACTIVE_CONFIG_PATH = CONFIG_PATH
PROJECT_ROOT = Path.cwd()
PYTHON_BIN = sys.executable

def run_live(cmd, cwd=None, extra_env=None):
    cmd = [str(c) for c in cmd]
    print("$ " + " ".join(shlex.quote(c) for c in cmd))

    env = os.environ.copy()
    env["PYTHONUNBUFFERED"] = "1"
    if extra_env:
        env.update(extra_env)

    proc = subprocess.Popen(
        cmd,
        cwd=str(cwd or PROJECT_ROOT),
        env=env,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
    )

    assert proc.stdout is not None
    for line in proc.stdout:
        print(line, end="")

    rc = proc.wait()
    print(f"\n[exit_code={rc}]")
    if rc != 0:
        raise subprocess.CalledProcessError(rc, cmd)

print(f"Base config:   {CONFIG_PATH.resolve()}")
print(f"Active config: {ACTIVE_CONFIG_PATH.resolve()}")
print(f"Notebook python: {PYTHON_BIN}")

with open(ACTIVE_CONFIG_PATH) as f:
    cfg = json.load(f)

train_cfg = cfg["audio"]["training"]
print(
    f"Run summary | task={train_cfg.get('task')} | "
    f"mil={train_cfg.get('sequence_mil', {}).get('enabled', False)} | "
    f"epochs={train_cfg.get('num_epochs')} | lr={train_cfg.get('lr')} | "
    f"checkpoint_dir={train_cfg.get('checkpoint_dir')}"
 )


Base config:   /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/configs/master_config.json
Active config: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/configs/master_config.json
Notebook python: /home/alolli/miniconda3/envs/therness_env/bin/python
Run summary | task=binary | mil=True | epochs=45 | lr=0.0003 | checkpoint_dir=checkpoints/audio


## Auto-Promote Best Grid Run (Confirmatory)
This cell selects the best `grid_search_mil` run by `val_f1`, writes `configs/master_config_promoted.json`, and switches subsequent cells to that active config.
If no valid grid run is found, the base config remains active.


In [39]:
import copy
import glob
import re

import torch

base_train_cfg = cfg["audio"]["training"]
grid_root = Path(base_train_cfg["checkpoint_dir"]) / "grid_search_mil"
run_best_ckpts = sorted(glob.glob(str(grid_root / "run_*" / "best_model.pt")))

best_run = None
best_val_f1 = float("-inf")

for ckpt_path_str in run_best_ckpts:
    ckpt_path = Path(ckpt_path_str)
    try:
        ckpt = torch.load(ckpt_path, map_location="cpu", weights_only=True)
    except TypeError:
        ckpt = torch.load(ckpt_path, map_location="cpu")

    val_f1 = ckpt.get("val_f1")
    if val_f1 is not None and val_f1 > best_val_f1:
        best_val_f1 = val_f1
        best_run = ckpt_path.parent.name

if best_run is None:
    print("No valid grid-search checkpoint found. Using base config.")
else:
    print(f"Best grid run detected: {best_run} | val_f1={best_val_f1:.4f}")

    m = re.match(
        r"run_\d+_topk(?P<topk>[\d.]+)_gw(?P<gw>[\d.]+)_lr(?P<lr>[\deE\-+.]+)",
        best_run,
    )

    if m is None:
        print("Could not parse run hyperparameters from run name. Using base config.")
    else:
        promoted_cfg = copy.deepcopy(cfg)
        promoted_train = promoted_cfg["audio"]["training"]
        promoted_train.setdefault("sequence_mil", {})

        promoted_train["task"] = "binary"
        promoted_train["lr"] = float(m.group("lr"))
        promoted_train["sequence_mil"]["enabled"] = True
        promoted_train["sequence_mil"]["topk_ratio_pos"] = float(m.group("topk"))
        promoted_train["sequence_mil"]["eval_pool_ratio"] = float(m.group("topk"))
        promoted_train["sequence_mil"]["good_window_weight"] = float(m.group("gw"))

        confirm_dir = Path(base_train_cfg["checkpoint_dir"]).parent / f"audio_confirm_{best_run}"
        promoted_train["checkpoint_dir"] = str(confirm_dir)

        promoted_path = Path("configs/master_config_promoted.json")
        with open(promoted_path, "w") as f:
            json.dump(promoted_cfg, f, indent=2)

        ACTIVE_CONFIG_PATH = promoted_path
        with open(ACTIVE_CONFIG_PATH) as f:
            cfg = json.load(f)
        train_cfg = cfg["audio"]["training"]

        print(f"Promoted config saved: {ACTIVE_CONFIG_PATH.resolve()}")
        print(
            f"Promoted summary | topk={train_cfg['sequence_mil']['topk_ratio_pos']} | "
            f"gw={train_cfg['sequence_mil']['good_window_weight']} | lr={train_cfg['lr']} | "
            f"checkpoint_dir={train_cfg['checkpoint_dir']}"
        )


No valid grid-search checkpoint found. Using base config.


## 1. Train (Uses `audio.run_audio`)
This runs the exact same code path as terminal training.


In [37]:
train_cmd = [
    PYTHON_BIN, "-u", "-m", "audio.run_audio",
    "--config", str(ACTIVE_CONFIG_PATH),
]
run_live(train_cmd)


$ /home/alolli/miniconda3/envs/therness_env/bin/python -u -m audio.run_audio --config configs/master_config.json
Device: cuda

Total weld files: 1163
File label distribution (binary): {'defect': 617, 'good_weld': 546}
Split strategy: stratified
Train welds: 930 | Val welds: 233
File split stats | train={ defect: 493 (53.0%), good_weld: 437 (47.0%) } | val={ defect: 124 (53.2%), good_weld: 109 (46.8%) }
Train files: 930 | Val files: 233
Classes (2): {'defect': 0, 'good_weld': 1}
DataLoader stats | train_batches=117 | val_batches=30 | batch_size=8
MIL mode enabled | topk_ratio_pos=0.05 | topk_ratio_neg=0.2 | eval_pool_ratio=0.05 | auto_threshold=True | threshold=0.5 | good_window_weight=0.25
Model parameters: 185,698
LR schedule: base_lr=0.0003 | warmup_steps=526/5265 (10.0%) | plateau_factor=0.5 | plateau_patience=3
Config saved to checkpoints/audio/config.json

Epoch 1/45
----------------------------------------

Training (MIL):   0%|          | 0/117 [00:00<?, ?it/s]
Training (MIL):  

## 2. Evaluate Best Checkpoint (Optional)
Runs script `--test_only` path.


In [30]:
BEST_CKPT = Path(cfg["audio"]["training"]["checkpoint_dir"]) / "best_model.pt"
print(f"Active config: {ACTIVE_CONFIG_PATH.resolve()}")
print(f"Best checkpoint: {BEST_CKPT.resolve()}")

if BEST_CKPT.exists():
    test_cmd = [
        PYTHON_BIN, "-u", "-m", "audio.run_audio",
        "--config", str(ACTIVE_CONFIG_PATH),
        "--test_only",
        "--checkpoint", str(BEST_CKPT),
    ]
    run_live(test_cmd)
else:
    print("Checkpoint not found yet. Run training first.")


Active config: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/configs/master_config_promoted.json
Best checkpoint: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_confirm_run_07_topk0.05_gw0.25_lr0.0001/best_model.pt
Running: python -m audio.run_audio --config configs/master_config_promoted.json --test_only --checkpoint checkpoints/audio_confirm_run_07_topk0.05_gw0.25_lr0.0001/best_model.pt
Device: cuda

Total weld files: 775
File label distribution (binary): {'defect': 417, 'good_weld': 358}
Split strategy: stratified
Train welds: 620 | Val welds: 155
File split stats | train={ defect: 334 (53.9%), good_weld: 286 (46.1%) } | val={ defect: 83 (53.5%), good_weld: 72 (46.5%) }
Train files: 620 | Val files: 155
Classes (2): {'defect': 0, 'good_weld': 1}
DataLoader stats | train_batches=78 | val_batches=20 | batch_size=8
MIL mode enabled | topk_ratio_pos=0.05 | topk_ratio_neg=0.2 | eval_pool_ratio=0.05 | auto_threshold=True | threshold=0.5 |

                                                                             

Val loss: 0.3065
Val macro F1: 0.9483
Val accuracy: 0.9484
Val AUC: 0.9866
Threshold used: 0.25


## 3. Inspect Chunk Timeline (Optional)
Useful to verify onset behavior on defect vs good files.


In [31]:
BEST_CKPT = Path(cfg["audio"]["training"]["checkpoint_dir"]) / "best_model.pt"

if BEST_CKPT.exists():
    timeline_cmd = [
        PYTHON_BIN, "-u", "-m", "audio.inspect_timeline",
        "--config", str(ACTIVE_CONFIG_PATH),
        "--checkpoint", str(BEST_CKPT),
        "--split", "val",
        "--num_per_class", "1",
    ]
    run_live(timeline_cmd)
else:
    print("Checkpoint not found yet. Run training first.")


Running: python -m audio.inspect_timeline --config configs/master_config_promoted.json --checkpoint checkpoints/audio_confirm_run_07_topk0.05_gw0.25_lr0.0001/best_model.pt --split val --num_per_class 1
Device: cuda
Inspecting 2 file(s)

File: /data1/malto/therness/data/Hackathon/defect-weld/lack_of_fusion_7_11-21-22_butt/11-21-22-0101-07/11-21-22-0101-07.flac
True label: defect
Summary | windows=38 | pred_file=defect | max_p=0.905 | mean_p=0.314 | high_ratio=26.3%
Live trigger: first ON at t=21.0s (window 21)

Window timeline:
t=   0.0s | p_defect=0.083 | cls=G | live=0
t=   1.0s | p_defect=0.083 | cls=G | live=0
t=   2.0s | p_defect=0.072 | cls=G | live=0
t=   3.0s | p_defect=0.097 | cls=G | live=0
t=   4.0s | p_defect=0.160 | cls=G | live=0
t=   5.0s | p_defect=0.096 | cls=G | live=0
t=   6.0s | p_defect=0.036 | cls=G | live=0
t=   7.0s | p_defect=0.077 | cls=G | live=0
t=   8.0s | p_defect=0.151 | cls=G | live=0
t=   9.0s | p_defect=0.113 | cls=G | live=0
t=  10.0s | p_defect=0.299 

## 4. Custom Commands
Edit and run this cell for ad-hoc script invocations.


In [32]:
custom_cmd = [
    PYTHON_BIN, "-u", "-m", "audio.run_audio",
    "--config", str(ACTIVE_CONFIG_PATH),
    # "--test_only",
    # "--checkpoint", "checkpoints/audio/best_model.pt",
]
run_live(custom_cmd)


Running: python -m audio.run_audio --config configs/master_config_promoted.json
Device: cuda

Total weld files: 775
File label distribution (binary): {'defect': 417, 'good_weld': 358}
Split strategy: stratified
Train welds: 620 | Val welds: 155
File split stats | train={ defect: 334 (53.9%), good_weld: 286 (46.1%) } | val={ defect: 83 (53.5%), good_weld: 72 (46.5%) }
Train files: 620 | Val files: 155
Classes (2): {'defect': 0, 'good_weld': 1}
DataLoader stats | train_batches=78 | val_batches=20 | batch_size=8
MIL mode enabled | topk_ratio_pos=0.05 | topk_ratio_neg=0.2 | eval_pool_ratio=0.05 | auto_threshold=True | threshold=0.5 | good_window_weight=0.25
Model parameters: 185,698
LR schedule: base_lr=0.0001 | warmup_steps=234/2340 (10.0%) | plateau_factor=0.5 | plateau_patience=2
Config saved to checkpoints/audio_confirm_run_07_topk0.05_gw0.25_lr0.0001/config.json

Epoch 1/30
----------------------------------------


                                                                             

Train loss: 1.2032 | Val loss: 0.7576 | Val Macro F1: 0.5225 | Val AUC: 0.6334 | Thr: 0.75 | LR: 3.33333e-05
New best model saved (val_f1=0.5225)
Best snapshot dir: checkpoints/audio_confirm_run_07_topk0.05_gw0.25_lr0.0001/best_models/20260228_022712_ep001_f1_0.5225_vl_0.7576

Epoch 2/30
----------------------------------------


                                                                             

Train loss: 0.9620 | Val loss: 0.6369 | Val Macro F1: 0.5813 | Val AUC: 0.6714 | Thr: 0.70 | LR: 6.66667e-05
New best model saved (val_f1=0.5813)
Best snapshot dir: checkpoints/audio_confirm_run_07_topk0.05_gw0.25_lr0.0001/best_models/20260228_022712_ep002_f1_0.5813_vl_0.6369

Epoch 3/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.7198 | Val loss: 0.5321 | Val Macro F1: 0.6463 | Val AUC: 0.6864 | Thr: 0.75 | LR: 0.0001
New best model saved (val_f1=0.6463)
Best snapshot dir: checkpoints/audio_confirm_run_07_topk0.05_gw0.25_lr0.0001/best_models/20260228_022712_ep003_f1_0.6463_vl_0.5321

Epoch 4/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5692 | Val loss: 0.4669 | Val Macro F1: 0.6701 | Val AUC: 0.6735 | Thr: 0.85 | LR: 0.0001
New best model saved (val_f1=0.6701)
Best snapshot dir: checkpoints/audio_confirm_run_07_topk0.05_gw0.25_lr0.0001/best_models/20260228_022712_ep004_f1_0.6701_vl_0.4669

Epoch 5/30
----------------------------------------


                                                                             

Train loss: 0.5429 | Val loss: 0.4328 | Val Macro F1: 0.6504 | Val AUC: 0.6954 | Thr: 0.70 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 6/30
----------------------------------------


                                                                             

Train loss: 0.5176 | Val loss: 0.4067 | Val Macro F1: 0.6642 | Val AUC: 0.6911 | Thr: 0.70 | LR: 0.0001
No improvement for 2 epoch(s)

Epoch 7/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4929 | Val loss: 0.4127 | Val Macro F1: 0.6320 | Val AUC: 0.6824 | Thr: 0.75 | LR: 0.0001
No improvement for 3 epoch(s)

Epoch 8/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4936 | Val loss: 0.6441 | Val Macro F1: 0.6390 | Val AUC: 0.6941 | Thr: 0.25 | LR: 0.0001
No improvement for 4 epoch(s)

Epoch 9/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4937 | Val loss: 0.3895 | Val Macro F1: 0.6486 | Val AUC: 0.6975 | Thr: 0.55 | LR: 0.0001
No improvement for 5 epoch(s)

Epoch 10/30
----------------------------------------


                                                                             

Train loss: 0.4897 | Val loss: 0.4385 | Val Macro F1: 0.6162 | Val AUC: 0.6464 | Thr: 0.60 | LR: 0.0001
No improvement for 6 epoch(s)

Epoch 11/30
----------------------------------------


                                                                             

Train loss: 0.4913 | Val loss: 0.4598 | Val Macro F1: 0.6257 | Val AUC: 0.6560 | Thr: 0.65 | LR: 0.0001
No improvement for 7 epoch(s)

Early stopping at epoch 11

Training complete. Best epoch: 4 (Best Val Macro F1=0.6701)

Best epoch: 4
Train losses: ['1.2032', '0.9620', '0.7198', '0.5692', '0.5429', '0.5176', '0.4929', '0.4936', '0.4937', '0.4897', '0.4913']
Val losses:   ['0.7576', '0.6369', '0.5321', '0.4669', '0.4328', '0.4067', '0.4127', '0.6441', '0.3895', '0.4385', '0.4598']


CompletedProcess(args=['python', '-m', 'audio.run_audio', '--config', 'configs/master_config_promoted.json'], returncode=0)

## 5. MIL Mini Grid Search (Script-Based)
Runs multiple trainings by writing temporary configs and calling `audio.run_audio`.
Change the grid lists before running.


In [33]:
import copy
import itertools
import time
from pathlib import Path

import torch

# ---- Edit this grid ----
grid_topk_pos = [0.03, 0.05]
grid_good_weight = [0.1, 0.25]
grid_lr = [1e-4, 3e-4]
# ------------------------

base_cfg = copy.deepcopy(cfg)

# Force MIL mode for this sweep
base_cfg["audio"]["training"]["task"] = "binary"
base_cfg["audio"]["training"].setdefault("sequence_mil", {})
base_cfg["audio"]["training"]["sequence_mil"]["enabled"] = True

# Optional: shorten runs for quick sweeps
# base_cfg["audio"]["training"]["num_epochs"] = 12
# base_cfg["audio"]["training"]["patience"] = 4

run_root = Path("checkpoints/audio/grid_search_mil")
cfg_root = run_root / "configs"
run_root.mkdir(parents=True, exist_ok=True)
cfg_root.mkdir(parents=True, exist_ok=True)

results = []
combos = list(itertools.product(grid_topk_pos, grid_good_weight, grid_lr))
print(f"Total runs: {len(combos)}")

for i, (topk_pos, good_weight, lr) in enumerate(combos, 1):
    run_name = f"run_{i:02d}_topk{topk_pos}_gw{good_weight}_lr{lr}"
    checkpoint_dir = run_root / run_name

    run_cfg = copy.deepcopy(base_cfg)
    run_cfg["audio"]["training"]["lr"] = lr
    run_cfg["audio"]["training"]["checkpoint_dir"] = str(checkpoint_dir)
    run_cfg["audio"]["training"]["sequence_mil"]["topk_ratio_pos"] = float(topk_pos)
    run_cfg["audio"]["training"]["sequence_mil"]["eval_pool_ratio"] = float(topk_pos)
    run_cfg["audio"]["training"]["sequence_mil"]["good_window_weight"] = float(good_weight)

    cfg_path = cfg_root / f"{run_name}.json"
    with open(cfg_path, "w") as f:
        json.dump(run_cfg, f, indent=2)

    cmd = [
        PYTHON_BIN, "-u", "-m", "audio.run_audio",
        "--config", str(cfg_path),
    ]

    print()
    print("=" * 100)
    print(f"[{i}/{len(combos)}] Starting run: {run_name}")
    t0 = time.time()
    try:
        run_live(cmd)
        status = "ok"
        err_msg = ""
    except subprocess.CalledProcessError as e:
        status = "failed"
        err_msg = str(e)

    best_ckpt = checkpoint_dir / "best_model.pt"
    best_epoch = None
    val_f1 = None
    val_loss = None

    if best_ckpt.exists():
        try:
            ckpt = torch.load(best_ckpt, map_location="cpu", weights_only=True)
        except TypeError:
            ckpt = torch.load(best_ckpt, map_location="cpu")
        best_epoch = ckpt.get("epoch")
        val_f1 = ckpt.get("val_f1")
        val_loss = ckpt.get("val_loss")

    elapsed = time.time() - t0
    row = {
        "run": run_name,
        "status": status,
        "topk_ratio_pos": float(topk_pos),
        "good_window_weight": float(good_weight),
        "lr": float(lr),
        "best_epoch": best_epoch,
        "val_f1": val_f1,
        "val_loss": val_loss,
        "seconds": round(elapsed, 2),
        "checkpoint_dir": str(checkpoint_dir),
        "error": err_msg,
    }
    results.append(row)
    print(
        f"[{status}] {run_name} | epoch={best_epoch} | "
        f"val_f1={val_f1} | val_loss={val_loss} | t={elapsed:.1f}s"
    )

print()
print("Grid search completed.")
results


Total runs: 8

[1/8] Running: python -m audio.run_audio --config checkpoints/audio/grid_search_mil/configs/run_01_topk0.03_gw0.1_lr0.0001.json
Device: cuda

Total weld files: 775
File label distribution (binary): {'defect': 417, 'good_weld': 358}
Split strategy: stratified
Train welds: 620 | Val welds: 155
File split stats | train={ defect: 334 (53.9%), good_weld: 286 (46.1%) } | val={ defect: 83 (53.5%), good_weld: 72 (46.5%) }
Train files: 620 | Val files: 155
Classes (2): {'defect': 0, 'good_weld': 1}
DataLoader stats | train_batches=78 | val_batches=20 | batch_size=8
MIL mode enabled | topk_ratio_pos=0.03 | topk_ratio_neg=0.2 | eval_pool_ratio=0.03 | auto_threshold=True | threshold=0.5 | good_window_weight=0.1
Model parameters: 185,698
LR schedule: base_lr=0.0001 | warmup_steps=234/2340 (10.0%) | plateau_factor=0.5 | plateau_patience=2
Config saved to checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/config.json

Epoch 1/30
----------------------------------------


                                                                             

Train loss: 0.9101 | Val loss: 0.7308 | Val Macro F1: 0.4910 | Val AUC: 0.5211 | Thr: 0.70 | LR: 3.33333e-05
New best model saved (val_f1=0.4910)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep001_f1_0.4910_vl_0.7308

Epoch 2/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.8267 | Val loss: 0.6883 | Val Macro F1: 0.6188 | Val AUC: 0.6908 | Thr: 0.65 | LR: 6.66667e-05
New best model saved (val_f1=0.6188)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep002_f1_0.6188_vl_0.6883

Epoch 3/30
----------------------------------------


                                                                             

Train loss: 0.7364 | Val loss: 0.6397 | Val Macro F1: 0.6925 | Val AUC: 0.7234 | Thr: 0.55 | LR: 0.0001
New best model saved (val_f1=0.6925)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep003_f1_0.6925_vl_0.6397

Epoch 4/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6656 | Val loss: 0.6113 | Val Macro F1: 0.6710 | Val AUC: 0.7410 | Thr: 0.55 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 5/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6446 | Val loss: 0.5747 | Val Macro F1: 0.7265 | Val AUC: 0.7825 | Thr: 0.50 | LR: 0.0001
New best model saved (val_f1=0.7265)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep005_f1_0.7265_vl_0.5747

Epoch 6/30
----------------------------------------


                                                                             

Train loss: 0.6140 | Val loss: 0.5499 | Val Macro F1: 0.7277 | Val AUC: 0.7701 | Thr: 0.70 | LR: 0.0001
New best model saved (val_f1=0.7277)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep006_f1_0.7277_vl_0.5499

Epoch 7/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5718 | Val loss: 0.5166 | Val Macro F1: 0.7380 | Val AUC: 0.7861 | Thr: 0.55 | LR: 0.0001
New best model saved (val_f1=0.7380)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep007_f1_0.7380_vl_0.5166

Epoch 8/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5416 | Val loss: 0.7466 | Val Macro F1: 0.6926 | Val AUC: 0.7318 | Thr: 0.25 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 9/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4961 | Val loss: 0.7142 | Val Macro F1: 0.6987 | Val AUC: 0.7495 | Thr: 0.25 | LR: 0.0001
No improvement for 2 epoch(s)

Epoch 10/30
----------------------------------------


                                                                             

Train loss: 0.4839 | Val loss: 0.5335 | Val Macro F1: 0.7519 | Val AUC: 0.8215 | Thr: 0.40 | LR: 0.0001
New best model saved (val_f1=0.7519)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep010_f1_0.7519_vl_0.5335

Epoch 11/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4115 | Val loss: 0.4318 | Val Macro F1: 0.7406 | Val AUC: 0.8261 | Thr: 0.45 | LR: 5e-05
No improvement for 1 epoch(s)

Epoch 12/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.3911 | Val loss: 0.3477 | Val Macro F1: 0.8581 | Val AUC: 0.9108 | Thr: 0.70 | LR: 5e-05
New best model saved (val_f1=0.8581)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep012_f1_0.8581_vl_0.3477

Epoch 13/30
----------------------------------------


                                                                             

Train loss: 0.3784 | Val loss: 0.4333 | Val Macro F1: 0.7673 | Val AUC: 0.8472 | Thr: 0.60 | LR: 5e-05
No improvement for 1 epoch(s)

Epoch 14/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.3783 | Val loss: 0.3308 | Val Macro F1: 0.8828 | Val AUC: 0.9362 | Thr: 0.55 | LR: 5e-05
New best model saved (val_f1=0.8828)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep014_f1_0.8828_vl_0.3308

Epoch 15/30
----------------------------------------


                                                                             

Train loss: 0.3667 | Val loss: 0.3319 | Val Macro F1: 0.8694 | Val AUC: 0.9396 | Thr: 0.50 | LR: 5e-05
No improvement for 1 epoch(s)

Epoch 16/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.3876 | Val loss: 0.5430 | Val Macro F1: 0.8437 | Val AUC: 0.9045 | Thr: 0.85 | LR: 5e-05
No improvement for 2 epoch(s)

Epoch 17/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.3465 | Val loss: 0.4298 | Val Macro F1: 0.8387 | Val AUC: 0.9249 | Thr: 0.45 | LR: 5e-05
No improvement for 3 epoch(s)

Epoch 18/30
----------------------------------------


                                                                             

Train loss: 0.2881 | Val loss: 0.2804 | Val Macro F1: 0.8773 | Val AUC: 0.9505 | Thr: 0.80 | LR: 2.5e-05
No improvement for 4 epoch(s)

Epoch 19/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.2658 | Val loss: 0.3698 | Val Macro F1: 0.7999 | Val AUC: 0.8845 | Thr: 0.80 | LR: 2.5e-05
No improvement for 5 epoch(s)

Epoch 20/30
----------------------------------------


                                                                             

Train loss: 0.2340 | Val loss: 0.3279 | Val Macro F1: 0.8579 | Val AUC: 0.9326 | Thr: 0.85 | LR: 2.5e-05
No improvement for 6 epoch(s)

Epoch 21/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.2490 | Val loss: 0.3165 | Val Macro F1: 0.9032 | Val AUC: 0.9640 | Thr: 0.40 | LR: 2.5e-05
New best model saved (val_f1=0.9032)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep021_f1_0.9032_vl_0.3165

Epoch 22/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.2018 | Val loss: 0.2658 | Val Macro F1: 0.9154 | Val AUC: 0.9690 | Thr: 0.30 | LR: 1.25e-05
New best model saved (val_f1=0.9154)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep022_f1_0.9154_vl_0.2658

Epoch 23/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.1728 | Val loss: 0.1801 | Val Macro F1: 0.9288 | Val AUC: 0.9843 | Thr: 0.65 | LR: 1.25e-05
New best model saved (val_f1=0.9288)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep023_f1_0.9288_vl_0.1801

Epoch 24/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.1677 | Val loss: 0.2268 | Val Macro F1: 0.9287 | Val AUC: 0.9739 | Thr: 0.80 | LR: 1.25e-05
No improvement for 1 epoch(s)

Epoch 25/30
----------------------------------------


                                                                             

Train loss: 0.1635 | Val loss: 0.1805 | Val Macro F1: 0.9290 | Val AUC: 0.9804 | Thr: 0.75 | LR: 1.25e-05
New best model saved (val_f1=0.9290)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep025_f1_0.9290_vl_0.1805

Epoch 26/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.1570 | Val loss: 0.1863 | Val Macro F1: 0.9154 | Val AUC: 0.9717 | Thr: 0.60 | LR: 1.25e-05
No improvement for 1 epoch(s)

Epoch 27/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.1384 | Val loss: 0.1576 | Val Macro F1: 0.9419 | Val AUC: 0.9900 | Thr: 0.60 | LR: 6.25e-06
New best model saved (val_f1=0.9419)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep027_f1_0.9419_vl_0.1576

Epoch 28/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.1304 | Val loss: 0.1600 | Val Macro F1: 0.9352 | Val AUC: 0.9838 | Thr: 0.45 | LR: 6.25e-06
No improvement for 1 epoch(s)

Epoch 29/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.1367 | Val loss: 0.1807 | Val Macro F1: 0.9612 | Val AUC: 0.9876 | Thr: 0.45 | LR: 6.25e-06
New best model saved (val_f1=0.9612)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_01_topk0.03_gw0.1_lr0.0001/best_models/20260228_022812_ep029_f1_0.9612_vl_0.1807

Epoch 30/30
----------------------------------------


                                                                             

Train loss: 0.1240 | Val loss: 0.1753 | Val Macro F1: 0.9353 | Val AUC: 0.9818 | Thr: 0.50 | LR: 6.25e-06
No improvement for 1 epoch(s)

Training complete. Best epoch: 29 (Best Val Macro F1=0.9612)

Best epoch: 29
Train losses: ['0.9101', '0.8267', '0.7364', '0.6656', '0.6446', '0.6140', '0.5718', '0.5416', '0.4961', '0.4839', '0.4115', '0.3911', '0.3784', '0.3783', '0.3667', '0.3876', '0.3465', '0.2881', '0.2658', '0.2340', '0.2490', '0.2018', '0.1728', '0.1677', '0.1635', '0.1570', '0.1384', '0.1304', '0.1367', '0.1240']
Val losses:   ['0.7308', '0.6883', '0.6397', '0.6113', '0.5747', '0.5499', '0.5166', '0.7466', '0.7142', '0.5335', '0.4318', '0.3477', '0.4333', '0.3308', '0.3319', '0.5430', '0.4298', '0.2804', '0.3698', '0.3279', '0.3165', '0.2658', '0.1801', '0.2268', '0.1805', '0.1863', '0.1576', '0.1600', '0.1807', '0.1753']
Completed run_01_topk0.03_gw0.1_lr0.0001 | status=ok | best_epoch=29 | val_f1=0.9611593718676912 | val_loss=0.1807474959281183 | minutes=2.58

[2/8] Running

                                                                             

Train loss: 0.7371 | Val loss: 0.6626 | Val Macro F1: 0.6576 | Val AUC: 0.7058 | Thr: 0.55 | LR: 0.0001
New best model saved (val_f1=0.6576)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_02_topk0.03_gw0.1_lr0.0003/best_models/20260228_023047_ep001_f1_0.6576_vl_0.6626

Epoch 2/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6820 | Val loss: 0.6467 | Val Macro F1: 0.6996 | Val AUC: 0.7738 | Thr: 0.60 | LR: 0.0002
New best model saved (val_f1=0.6996)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_02_topk0.03_gw0.1_lr0.0003/best_models/20260228_023047_ep002_f1_0.6996_vl_0.6467

Epoch 3/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6271 | Val loss: 0.5786 | Val Macro F1: 0.7127 | Val AUC: 0.7970 | Thr: 0.45 | LR: 0.0003
New best model saved (val_f1=0.7127)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_02_topk0.03_gw0.1_lr0.0003/best_models/20260228_023047_ep003_f1_0.7127_vl_0.5786

Epoch 4/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5833 | Val loss: 0.5488 | Val Macro F1: 0.7418 | Val AUC: 0.8064 | Thr: 0.60 | LR: 0.0003
New best model saved (val_f1=0.7418)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_02_topk0.03_gw0.1_lr0.0003/best_models/20260228_023047_ep004_f1_0.7418_vl_0.5488

Epoch 5/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5952 | Val loss: 0.5855 | Val Macro F1: 0.6839 | Val AUC: 0.7306 | Thr: 0.65 | LR: 0.0003
No improvement for 1 epoch(s)

Epoch 6/30
----------------------------------------


                                                                             

Train loss: 0.5658 | Val loss: 0.9014 | Val Macro F1: 0.6306 | Val AUC: 0.6949 | Thr: 0.90 | LR: 0.0003
No improvement for 2 epoch(s)

Epoch 7/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                      

Train loss: 0.5711 | Val loss: 3.9145 | Val Macro F1: 0.3487 | Val AUC: 0.6529 | Thr: 0.05 | LR: 0.0003
No improvement for 3 epoch(s)

Epoch 8/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5757 | Val loss: 0.5105 | Val Macro F1: 0.6102 | Val AUC: 0.6083 | Thr: 0.65 | LR: 0.00015
No improvement for 4 epoch(s)

Epoch 9/30
----------------------------------------


                                                                             

Train loss: 0.5210 | Val loss: 0.6753 | Val Macro F1: 0.6474 | Val AUC: 0.6248 | Thr: 0.30 | LR: 0.00015
No improvement for 5 epoch(s)

Epoch 10/30
----------------------------------------


                                                                            

Train loss: 0.5149 | Val loss: 1.5612 | Val Macro F1: 0.3725 | Val AUC: 0.6379 | Thr: 0.95 | LR: 0.00015
No improvement for 6 epoch(s)

Epoch 11/30
----------------------------------------


                                                                             

Train loss: 0.5350 | Val loss: 0.6942 | Val Macro F1: 0.6613 | Val AUC: 0.6931 | Thr: 0.35 | LR: 0.00015
No improvement for 7 epoch(s)

Early stopping at epoch 11

Training complete. Best epoch: 4 (Best Val Macro F1=0.7418)

Best epoch: 4
Train losses: ['0.7371', '0.6820', '0.6271', '0.5833', '0.5952', '0.5658', '0.5711', '0.5757', '0.5210', '0.5149', '0.5350']
Val losses:   ['0.6626', '0.6467', '0.5786', '0.5488', '0.5855', '0.9014', '3.9145', '0.5105', '0.6753', '1.5612', '0.6942']
Completed run_02_topk0.03_gw0.1_lr0.0003 | status=ok | best_epoch=4 | val_f1=0.7418387741505663 | val_loss=0.548829700196943 | minutes=1.03

[3/8] Running: python -m audio.run_audio --config checkpoints/audio/grid_search_mil/configs/run_03_topk0.03_gw0.25_lr0.0001.json
Device: cuda

Total weld files: 775
File label distribution (binary): {'defect': 417, 'good_weld': 358}
Split strategy: stratified
Train welds: 620 | Val welds: 155
File split stats | train={ defect: 334 (53.9%), good_weld: 286 (46.1%) } | v

Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.8528 | Val loss: 0.6713 | Val Macro F1: 0.5042 | Val AUC: 0.4930 | Thr: 0.60 | LR: 3.33333e-05
New best model saved (val_f1=0.5042)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep001_f1_0.5042_vl_0.6713

Epoch 2/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.7741 | Val loss: 0.6553 | Val Macro F1: 0.5730 | Val AUC: 0.6103 | Thr: 0.55 | LR: 6.66667e-05
New best model saved (val_f1=0.5730)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep002_f1_0.5730_vl_0.6553

Epoch 3/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.7106 | Val loss: 0.6062 | Val Macro F1: 0.5972 | Val AUC: 0.6386 | Thr: 0.55 | LR: 0.0001
New best model saved (val_f1=0.5972)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep003_f1_0.5972_vl_0.6062

Epoch 4/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6183 | Val loss: 0.5382 | Val Macro F1: 0.6057 | Val AUC: 0.6444 | Thr: 0.65 | LR: 0.0001
New best model saved (val_f1=0.6057)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep004_f1_0.6057_vl_0.5382

Epoch 5/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5778 | Val loss: 0.5264 | Val Macro F1: 0.5852 | Val AUC: 0.6309 | Thr: 0.60 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 6/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5767 | Val loss: 0.5034 | Val Macro F1: 0.6082 | Val AUC: 0.6444 | Thr: 0.65 | LR: 0.0001
New best model saved (val_f1=0.6082)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep006_f1_0.6082_vl_0.5034

Epoch 7/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5555 | Val loss: 0.4751 | Val Macro F1: 0.6644 | Val AUC: 0.6760 | Thr: 0.80 | LR: 0.0001
New best model saved (val_f1=0.6644)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep007_f1_0.6644_vl_0.4751

Epoch 8/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4922 | Val loss: 0.4514 | Val Macro F1: 0.6213 | Val AUC: 0.6675 | Thr: 0.55 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 9/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4823 | Val loss: 0.4193 | Val Macro F1: 0.6418 | Val AUC: 0.6871 | Thr: 0.70 | LR: 0.0001
No improvement for 2 epoch(s)

Epoch 10/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4765 | Val loss: 0.4152 | Val Macro F1: 0.6195 | Val AUC: 0.6739 | Thr: 0.70 | LR: 0.0001
No improvement for 3 epoch(s)

Epoch 11/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4775 | Val loss: 0.6286 | Val Macro F1: 0.6805 | Val AUC: 0.7110 | Thr: 0.30 | LR: 0.0001
New best model saved (val_f1=0.6805)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep011_f1_0.6805_vl_0.6286

Epoch 12/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5193 | Val loss: 0.4283 | Val Macro F1: 0.6413 | Val AUC: 0.6682 | Thr: 0.80 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 13/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4845 | Val loss: 0.4579 | Val Macro F1: 0.6317 | Val AUC: 0.6720 | Thr: 0.80 | LR: 0.0001
No improvement for 2 epoch(s)

Epoch 14/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4401 | Val loss: 0.4659 | Val Macro F1: 0.6486 | Val AUC: 0.7055 | Thr: 0.70 | LR: 5e-05
No improvement for 3 epoch(s)

Epoch 15/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4433 | Val loss: 0.3915 | Val Macro F1: 0.6629 | Val AUC: 0.7043 | Thr: 0.50 | LR: 5e-05
No improvement for 4 epoch(s)

Epoch 16/30
----------------------------------------


                                                                             

Train loss: 0.4213 | Val loss: 0.3189 | Val Macro F1: 0.6893 | Val AUC: 0.7423 | Thr: 0.70 | LR: 5e-05
New best model saved (val_f1=0.6893)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep016_f1_0.6893_vl_0.3189

Epoch 17/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4285 | Val loss: 0.3582 | Val Macro F1: 0.6628 | Val AUC: 0.7078 | Thr: 0.65 | LR: 5e-05
No improvement for 1 epoch(s)

Epoch 18/30
----------------------------------------


                                                                             

Train loss: 0.4319 | Val loss: 0.3671 | Val Macro F1: 0.6628 | Val AUC: 0.7088 | Thr: 0.75 | LR: 5e-05
No improvement for 2 epoch(s)

Epoch 19/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4700 | Val loss: 0.5182 | Val Macro F1: 0.6272 | Val AUC: 0.6911 | Thr: 0.40 | LR: 5e-05
No improvement for 3 epoch(s)

Epoch 20/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4079 | Val loss: 0.3685 | Val Macro F1: 0.6621 | Val AUC: 0.7308 | Thr: 0.55 | LR: 2.5e-05
No improvement for 4 epoch(s)

Epoch 21/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.3704 | Val loss: 0.3290 | Val Macro F1: 0.7350 | Val AUC: 0.7721 | Thr: 0.55 | LR: 2.5e-05
New best model saved (val_f1=0.7350)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep021_f1_0.7350_vl_0.3290

Epoch 22/30
----------------------------------------


                                                                             

Train loss: 0.3450 | Val loss: 0.3472 | Val Macro F1: 0.7507 | Val AUC: 0.7679 | Thr: 0.45 | LR: 2.5e-05
New best model saved (val_f1=0.7507)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep022_f1_0.7507_vl_0.3472

Epoch 23/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.3084 | Val loss: 0.3135 | Val Macro F1: 0.7613 | Val AUC: 0.8128 | Thr: 0.40 | LR: 1.25e-05
New best model saved (val_f1=0.7613)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep023_f1_0.7613_vl_0.3135

Epoch 24/30
----------------------------------------


                                                                             

Train loss: 0.3052 | Val loss: 0.3143 | Val Macro F1: 0.7443 | Val AUC: 0.8203 | Thr: 0.40 | LR: 1.25e-05
No improvement for 1 epoch(s)

Epoch 25/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.2913 | Val loss: 0.2694 | Val Macro F1: 0.7942 | Val AUC: 0.8509 | Thr: 0.60 | LR: 1.25e-05
New best model saved (val_f1=0.7942)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep025_f1_0.7942_vl_0.2694

Epoch 26/30
----------------------------------------


                                                                             

Train loss: 0.2872 | Val loss: 0.2547 | Val Macro F1: 0.8103 | Val AUC: 0.8578 | Thr: 0.75 | LR: 1.25e-05
New best model saved (val_f1=0.8103)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep026_f1_0.8103_vl_0.2547

Epoch 27/30
----------------------------------------


                                                                             

Train loss: 0.2725 | Val loss: 0.2941 | Val Macro F1: 0.8891 | Val AUC: 0.9230 | Thr: 0.75 | LR: 1.25e-05
New best model saved (val_f1=0.8891)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep027_f1_0.8891_vl_0.2941

Epoch 28/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.2681 | Val loss: 0.2495 | Val Macro F1: 0.8118 | Val AUC: 0.8747 | Thr: 0.75 | LR: 1.25e-05
No improvement for 1 epoch(s)

Epoch 29/30
----------------------------------------


                                                                             

Train loss: 0.2717 | Val loss: 0.2427 | Val Macro F1: 0.7984 | Val AUC: 0.8750 | Thr: 0.60 | LR: 1.25e-05
No improvement for 2 epoch(s)

Epoch 30/30
----------------------------------------


                                                                             

Train loss: 0.2558 | Val loss: 0.2599 | Val Macro F1: 0.9090 | Val AUC: 0.9511 | Thr: 0.65 | LR: 1.25e-05
New best model saved (val_f1=0.9090)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_03_topk0.03_gw0.25_lr0.0001/best_models/20260228_023149_ep030_f1_0.9090_vl_0.2599

Training complete. Best epoch: 30 (Best Val Macro F1=0.9090)

Best epoch: 30
Train losses: ['0.8528', '0.7741', '0.7106', '0.6183', '0.5778', '0.5767', '0.5555', '0.4922', '0.4823', '0.4765', '0.4775', '0.5193', '0.4845', '0.4401', '0.4433', '0.4213', '0.4285', '0.4319', '0.4700', '0.4079', '0.3704', '0.3450', '0.3084', '0.3052', '0.2913', '0.2872', '0.2725', '0.2681', '0.2717', '0.2558']
Val losses:   ['0.6713', '0.6553', '0.6062', '0.5382', '0.5264', '0.5034', '0.4751', '0.4514', '0.4193', '0.4152', '0.6286', '0.4283', '0.4579', '0.4659', '0.3915', '0.3189', '0.3582', '0.3671', '0.5182', '0.3685', '0.3290', '0.3472', '0.3135', '0.3143', '0.2694', '0.2547', '0.2941', '0.2495', '0.2427', '0.2599']
Completed 

                                                                             

Train loss: 1.0731 | Val loss: 0.6677 | Val Macro F1: 0.5453 | Val AUC: 0.7336 | Thr: 0.65 | LR: 0.0001
New best model saved (val_f1=0.5453)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_04_topk0.03_gw0.25_lr0.0003/best_models/20260228_023428_ep001_f1_0.5453_vl_0.6677

Epoch 2/30
----------------------------------------


                                                                             

Train loss: 0.7097 | Val loss: 0.6408 | Val Macro F1: 0.5972 | Val AUC: 0.6698 | Thr: 0.80 | LR: 0.0002
New best model saved (val_f1=0.5972)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_04_topk0.03_gw0.25_lr0.0003/best_models/20260228_023428_ep002_f1_0.5972_vl_0.6408

Epoch 3/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5456 | Val loss: 0.5808 | Val Macro F1: 0.6774 | Val AUC: 0.7287 | Thr: 0.80 | LR: 0.0003
New best model saved (val_f1=0.6774)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_04_topk0.03_gw0.25_lr0.0003/best_models/20260228_023428_ep003_f1_0.6774_vl_0.5808

Epoch 4/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5136 | Val loss: 0.7850 | Val Macro F1: 0.6580 | Val AUC: 0.6996 | Thr: 0.95 | LR: 0.0003
No improvement for 1 epoch(s)

Epoch 5/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5380 | Val loss: 0.7218 | Val Macro F1: 0.5850 | Val AUC: 0.6573 | Thr: 0.90 | LR: 0.0003
No improvement for 2 epoch(s)

Epoch 6/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6444 | Val loss: 0.5582 | Val Macro F1: 0.6199 | Val AUC: 0.6186 | Thr: 0.75 | LR: 0.0003
No improvement for 3 epoch(s)

Epoch 7/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5696 | Val loss: 0.4217 | Val Macro F1: 0.5813 | Val AUC: 0.6196 | Thr: 0.65 | LR: 0.0003
No improvement for 4 epoch(s)

Epoch 8/30
----------------------------------------


                                                                             

Train loss: 0.5238 | Val loss: 0.4079 | Val Macro F1: 0.6254 | Val AUC: 0.6834 | Thr: 0.75 | LR: 0.0003
No improvement for 5 epoch(s)

Epoch 9/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5436 | Val loss: 0.4998 | Val Macro F1: 0.6294 | Val AUC: 0.6772 | Thr: 0.65 | LR: 0.0003
No improvement for 6 epoch(s)

Epoch 10/30
----------------------------------------


                                                                             

Train loss: 0.5767 | Val loss: 0.9129 | Val Macro F1: 0.5504 | Val AUC: 0.6575 | Thr: 0.90 | LR: 0.0003
No improvement for 7 epoch(s)

Early stopping at epoch 10

Training complete. Best epoch: 3 (Best Val Macro F1=0.6774)

Best epoch: 3
Train losses: ['1.0731', '0.7097', '0.5456', '0.5136', '0.5380', '0.6444', '0.5696', '0.5238', '0.5436', '0.5767']
Val losses:   ['0.6677', '0.6408', '0.5808', '0.7850', '0.7218', '0.5582', '0.4217', '0.4079', '0.4998', '0.9129']
Completed run_04_topk0.03_gw0.25_lr0.0003 | status=ok | best_epoch=3 | val_f1=0.6774059274059274 | val_loss=0.5808358306846311 | minutes=0.94

[5/8] Running: python -m audio.run_audio --config checkpoints/audio/grid_search_mil/configs/run_05_topk0.05_gw0.1_lr0.0001.json
Device: cuda

Total weld files: 775
File label distribution (binary): {'defect': 417, 'good_weld': 358}
Split strategy: stratified
Train welds: 620 | Val welds: 155
File split stats | train={ defect: 334 (53.9%), good_weld: 286 (46.1%) } | val={ defect: 83 (53.

Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.7068 | Val loss: 0.6846 | Val Macro F1: 0.4122 | Val AUC: 0.4543 | Thr: 0.50 | LR: 3.33333e-05
New best model saved (val_f1=0.4122)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_05_topk0.05_gw0.1_lr0.0001/best_models/20260228_023525_ep001_f1_0.4122_vl_0.6846

Epoch 2/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6920 | Val loss: 0.6669 | Val Macro F1: 0.5728 | Val AUC: 0.6504 | Thr: 0.50 | LR: 6.66667e-05
New best model saved (val_f1=0.5728)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_05_topk0.05_gw0.1_lr0.0001/best_models/20260228_023525_ep002_f1_0.5728_vl_0.6669

Epoch 3/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6755 | Val loss: 0.6564 | Val Macro F1: 0.5620 | Val AUC: 0.6240 | Thr: 0.55 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 4/30
----------------------------------------


                                                                             

Train loss: 0.6470 | Val loss: 0.6299 | Val Macro F1: 0.6032 | Val AUC: 0.6178 | Thr: 0.55 | LR: 0.0001
New best model saved (val_f1=0.6032)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_05_topk0.05_gw0.1_lr0.0001/best_models/20260228_023525_ep004_f1_0.6032_vl_0.6299

Epoch 5/30
----------------------------------------


                                                                             

Train loss: 0.6298 | Val loss: 0.6117 | Val Macro F1: 0.6193 | Val AUC: 0.6464 | Thr: 0.60 | LR: 0.0001
New best model saved (val_f1=0.6193)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_05_topk0.05_gw0.1_lr0.0001/best_models/20260228_023525_ep005_f1_0.6193_vl_0.6117

Epoch 6/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5959 | Val loss: 0.5954 | Val Macro F1: 0.6423 | Val AUC: 0.6881 | Thr: 0.65 | LR: 0.0001
New best model saved (val_f1=0.6423)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_05_topk0.05_gw0.1_lr0.0001/best_models/20260228_023525_ep006_f1_0.6423_vl_0.5954

Epoch 7/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5813 | Val loss: 0.6023 | Val Macro F1: 0.6553 | Val AUC: 0.6849 | Thr: 0.70 | LR: 0.0001
New best model saved (val_f1=0.6553)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_05_topk0.05_gw0.1_lr0.0001/best_models/20260228_023525_ep007_f1_0.6553_vl_0.6023

Epoch 8/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5656 | Val loss: 0.7994 | Val Macro F1: 0.6247 | Val AUC: 0.6777 | Thr: 0.90 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 9/30
----------------------------------------


                                                                             

Train loss: 0.5523 | Val loss: 0.4830 | Val Macro F1: 0.6100 | Val AUC: 0.6764 | Thr: 0.65 | LR: 0.0001
No improvement for 2 epoch(s)

Epoch 10/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5248 | Val loss: 0.4937 | Val Macro F1: 0.6162 | Val AUC: 0.6705 | Thr: 0.65 | LR: 0.0001
No improvement for 3 epoch(s)

Epoch 11/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4738 | Val loss: 0.7666 | Val Macro F1: 0.5546 | Val AUC: 0.5910 | Thr: 0.30 | LR: 0.0001
No improvement for 4 epoch(s)

Epoch 12/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4767 | Val loss: 0.5208 | Val Macro F1: 0.6873 | Val AUC: 0.7311 | Thr: 0.50 | LR: 0.0001
New best model saved (val_f1=0.6873)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_05_topk0.05_gw0.1_lr0.0001/best_models/20260228_023525_ep012_f1_0.6873_vl_0.5208

Epoch 13/30
----------------------------------------


                                                                             

Train loss: 0.4520 | Val loss: 0.3990 | Val Macro F1: 0.6709 | Val AUC: 0.7279 | Thr: 0.65 | LR: 5e-05
No improvement for 1 epoch(s)

Epoch 14/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4462 | Val loss: 0.4124 | Val Macro F1: 0.6553 | Val AUC: 0.6941 | Thr: 0.80 | LR: 5e-05
No improvement for 2 epoch(s)

Epoch 15/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4510 | Val loss: 0.3763 | Val Macro F1: 0.6543 | Val AUC: 0.6968 | Thr: 0.65 | LR: 5e-05
No improvement for 3 epoch(s)

Epoch 16/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4253 | Val loss: 0.3647 | Val Macro F1: 0.6320 | Val AUC: 0.6921 | Thr: 0.65 | LR: 5e-05
No improvement for 4 epoch(s)

Epoch 17/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4242 | Val loss: 0.3769 | Val Macro F1: 0.6696 | Val AUC: 0.7189 | Thr: 0.80 | LR: 5e-05
No improvement for 5 epoch(s)

Epoch 18/30
----------------------------------------


                                                                             

Train loss: 0.4214 | Val loss: 0.4249 | Val Macro F1: 0.6474 | Val AUC: 0.7028 | Thr: 0.85 | LR: 5e-05
No improvement for 6 epoch(s)

Epoch 19/30
----------------------------------------


                                                                             

Train loss: 0.4226 | Val loss: 0.5987 | Val Macro F1: 0.6867 | Val AUC: 0.7108 | Thr: 0.25 | LR: 5e-05
No improvement for 7 epoch(s)

Early stopping at epoch 19

Training complete. Best epoch: 12 (Best Val Macro F1=0.6873)

Best epoch: 12
Train losses: ['0.7068', '0.6920', '0.6755', '0.6470', '0.6298', '0.5959', '0.5813', '0.5656', '0.5523', '0.5248', '0.4738', '0.4767', '0.4520', '0.4462', '0.4510', '0.4253', '0.4242', '0.4214', '0.4226']
Val losses:   ['0.6846', '0.6669', '0.6564', '0.6299', '0.6117', '0.5954', '0.6023', '0.7994', '0.4830', '0.4937', '0.7666', '0.5208', '0.3990', '0.4124', '0.3763', '0.3647', '0.3769', '0.4249', '0.5987']
Completed run_05_topk0.05_gw0.1_lr0.0001 | status=ok | best_epoch=12 | val_f1=0.687280701754386 | val_loss=0.5208077324974921 | minutes=1.70

[6/8] Running: python -m audio.run_audio --config checkpoints/audio/grid_search_mil/configs/run_06_topk0.05_gw0.1_lr0.0003.json
Device: cuda

Total weld files: 775
File label distribution (binary): {'defect': 

                                                                             

Train loss: 0.8069 | Val loss: 0.8379 | Val Macro F1: 0.3487 | Val AUC: 0.3059 | Thr: 0.05 | LR: 0.0001
New best model saved (val_f1=0.3487)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_06_topk0.05_gw0.1_lr0.0003/best_models/20260228_023707_ep001_f1_0.3487_vl_0.8379

Epoch 2/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6749 | Val loss: 0.6503 | Val Macro F1: 0.5589 | Val AUC: 0.5929 | Thr: 0.50 | LR: 0.0002
New best model saved (val_f1=0.5589)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_06_topk0.05_gw0.1_lr0.0003/best_models/20260228_023707_ep002_f1_0.5589_vl_0.6503

Epoch 3/30
----------------------------------------


                                                                             

Train loss: 0.5622 | Val loss: 0.5629 | Val Macro F1: 0.6306 | Val AUC: 0.6909 | Thr: 0.80 | LR: 0.0003
New best model saved (val_f1=0.6306)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_06_topk0.05_gw0.1_lr0.0003/best_models/20260228_023707_ep003_f1_0.6306_vl_0.5629

Epoch 4/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4973 | Val loss: 0.4958 | Val Macro F1: 0.6645 | Val AUC: 0.6844 | Thr: 0.75 | LR: 0.0003
New best model saved (val_f1=0.6645)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_06_topk0.05_gw0.1_lr0.0003/best_models/20260228_023707_ep004_f1_0.6645_vl_0.4958

Epoch 5/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5173 | Val loss: 0.5371 | Val Macro F1: 0.6062 | Val AUC: 0.6563 | Thr: 0.80 | LR: 0.0003
No improvement for 1 epoch(s)

Epoch 6/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5718 | Val loss: 0.5356 | Val Macro F1: 0.6054 | Val AUC: 0.6812 | Thr: 0.45 | LR: 0.0003
No improvement for 2 epoch(s)

Epoch 7/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5700 | Val loss: 0.5449 | Val Macro F1: 0.6523 | Val AUC: 0.6856 | Thr: 0.40 | LR: 0.0003
No improvement for 3 epoch(s)

Epoch 8/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5235 | Val loss: 0.5016 | Val Macro F1: 0.6065 | Val AUC: 0.6670 | Thr: 0.60 | LR: 0.00015
No improvement for 4 epoch(s)

Epoch 9/30
----------------------------------------


                                                                            

Train loss: 0.5666 | Val loss: 1.2500 | Val Macro F1: 0.5208 | Val AUC: 0.6483 | Thr: 0.95 | LR: 0.00015
No improvement for 5 epoch(s)

Epoch 10/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5526 | Val loss: 0.5269 | Val Macro F1: 0.6086 | Val AUC: 0.6539 | Thr: 0.60 | LR: 0.00015
No improvement for 6 epoch(s)

Epoch 11/30
----------------------------------------


                                                                             

Train loss: 0.5154 | Val loss: 0.6460 | Val Macro F1: 0.6122 | Val AUC: 0.6829 | Thr: 0.35 | LR: 7.5e-05
No improvement for 7 epoch(s)

Early stopping at epoch 11

Training complete. Best epoch: 4 (Best Val Macro F1=0.6645)

Best epoch: 4
Train losses: ['0.8069', '0.6749', '0.5622', '0.4973', '0.5173', '0.5718', '0.5700', '0.5235', '0.5666', '0.5526', '0.5154']
Val losses:   ['0.8379', '0.6503', '0.5629', '0.4958', '0.5371', '0.5356', '0.5449', '0.5016', '1.2500', '0.5269', '0.6460']
Completed run_06_topk0.05_gw0.1_lr0.0003 | status=ok | best_epoch=4 | val_f1=0.6645021645021645 | val_loss=0.4958052360242413 | minutes=1.02

[7/8] Running: python -m audio.run_audio --config checkpoints/audio/grid_search_mil/configs/run_07_topk0.05_gw0.25_lr0.0001.json
Device: cuda

Total weld files: 775
File label distribution (binary): {'defect': 417, 'good_weld': 358}
Split strategy: stratified
Train welds: 620 | Val welds: 155
File split stats | train={ defect: 334 (53.9%), good_weld: 286 (46.1%) } | 

Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.7527 | Val loss: 0.7027 | Val Macro F1: 0.4982 | Val AUC: 0.5295 | Thr: 0.40 | LR: 3.33333e-05
New best model saved (val_f1=0.4982)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_07_topk0.05_gw0.25_lr0.0001/best_models/20260228_023808_ep001_f1_0.4982_vl_0.7027

Epoch 2/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.7432 | Val loss: 0.6759 | Val Macro F1: 0.6613 | Val AUC: 0.6914 | Thr: 0.45 | LR: 6.66667e-05
New best model saved (val_f1=0.6613)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_07_topk0.05_gw0.25_lr0.0001/best_models/20260228_023808_ep002_f1_0.6613_vl_0.6759

Epoch 3/30
----------------------------------------


                                                                             

Train loss: 0.7150 | Val loss: 0.6232 | Val Macro F1: 0.7158 | Val AUC: 0.7826 | Thr: 0.45 | LR: 0.0001
New best model saved (val_f1=0.7158)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_07_topk0.05_gw0.25_lr0.0001/best_models/20260228_023808_ep003_f1_0.7158_vl_0.6232

Epoch 4/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6511 | Val loss: 0.5504 | Val Macro F1: 0.7742 | Val AUC: 0.8290 | Thr: 0.50 | LR: 0.0001
New best model saved (val_f1=0.7742)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_07_topk0.05_gw0.25_lr0.0001/best_models/20260228_023808_ep004_f1_0.7742_vl_0.5504

Epoch 5/30
----------------------------------------


                                                                             

Train loss: 0.6161 | Val loss: 0.5537 | Val Macro F1: 0.7482 | Val AUC: 0.8226 | Thr: 0.50 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 6/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5933 | Val loss: 0.4839 | Val Macro F1: 0.7319 | Val AUC: 0.7861 | Thr: 0.45 | LR: 0.0001
No improvement for 2 epoch(s)

Epoch 7/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5438 | Val loss: 0.4260 | Val Macro F1: 0.6967 | Val AUC: 0.7738 | Thr: 0.60 | LR: 0.0001
No improvement for 3 epoch(s)

Epoch 8/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4816 | Val loss: 0.4066 | Val Macro F1: 0.7160 | Val AUC: 0.7882 | Thr: 0.60 | LR: 0.0001
No improvement for 4 epoch(s)

Epoch 9/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.4715 | Val loss: 0.8464 | Val Macro F1: 0.6594 | Val AUC: 0.7117 | Thr: 0.15 | LR: 0.0001
No improvement for 5 epoch(s)

Epoch 10/30
----------------------------------------


                                                                             

Train loss: 0.4567 | Val loss: 0.4328 | Val Macro F1: 0.6744 | Val AUC: 0.7318 | Thr: 0.50 | LR: 0.0001
No improvement for 6 epoch(s)

Epoch 11/30
----------------------------------------


                                                                             

Train loss: 0.4319 | Val loss: 0.4667 | Val Macro F1: 0.7411 | Val AUC: 0.7990 | Thr: 0.80 | LR: 0.0001
No improvement for 7 epoch(s)

Early stopping at epoch 11

Training complete. Best epoch: 4 (Best Val Macro F1=0.7742)

Best epoch: 4
Train losses: ['0.7527', '0.7432', '0.7150', '0.6511', '0.6161', '0.5933', '0.5438', '0.4816', '0.4715', '0.4567', '0.4319']
Val losses:   ['0.7027', '0.6759', '0.6232', '0.5504', '0.5537', '0.4839', '0.4260', '0.4066', '0.8464', '0.4328', '0.4667']
Completed run_07_topk0.05_gw0.25_lr0.0001 | status=ok | best_epoch=4 | val_f1=0.7741935483870968 | val_loss=0.5503993603491014 | minutes=1.02

[8/8] Running: python -m audio.run_audio --config checkpoints/audio/grid_search_mil/configs/run_08_topk0.05_gw0.25_lr0.0003.json
Device: cuda

Total weld files: 775
File label distribution (binary): {'defect': 417, 'good_weld': 358}
Split strategy: stratified
Train welds: 620 | Val welds: 155
File split stats | train={ defect: 334 (53.9%), good_weld: 286 (46.1%) } | 

                                                                             

Train loss: 0.9367 | Val loss: 0.6627 | Val Macro F1: 0.6178 | Val AUC: 0.6988 | Thr: 0.65 | LR: 0.0001
New best model saved (val_f1=0.6178)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_08_topk0.05_gw0.25_lr0.0003/best_models/20260228_023909_ep001_f1_0.6178_vl_0.6627

Epoch 2/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.7579 | Val loss: 0.6286 | Val Macro F1: 0.6449 | Val AUC: 0.7078 | Thr: 0.70 | LR: 0.0002
New best model saved (val_f1=0.6449)
Best snapshot dir: checkpoints/audio/grid_search_mil/run_08_topk0.05_gw0.25_lr0.0003/best_models/20260228_023909_ep002_f1_0.6449_vl_0.6286

Epoch 3/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6473 | Val loss: 0.7588 | Val Macro F1: 0.6171 | Val AUC: 0.6627 | Thr: 0.85 | LR: 0.0003
No improvement for 1 epoch(s)

Epoch 4/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.5803 | Val loss: 0.6549 | Val Macro F1: 0.5911 | Val AUC: 0.6503 | Thr: 0.80 | LR: 0.0003
No improvement for 2 epoch(s)

Epoch 5/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6194 | Val loss: 0.5245 | Val Macro F1: 0.6320 | Val AUC: 0.6750 | Thr: 0.75 | LR: 0.0003
No improvement for 3 epoch(s)

Epoch 6/30
----------------------------------------


                                                                             

Train loss: 0.6136 | Val loss: 0.5778 | Val Macro F1: 0.5730 | Val AUC: 0.5858 | Thr: 0.45 | LR: 0.0003
No improvement for 4 epoch(s)

Epoch 7/30
----------------------------------------


Training (MIL):   0%|          | 0/78 [00:00<?, ?it/s]                       

Train loss: 0.6639 | Val loss: 0.5939 | Val Macro F1: 0.5980 | Val AUC: 0.6478 | Thr: 0.45 | LR: 0.0003
No improvement for 5 epoch(s)

Epoch 8/30
----------------------------------------


                                                                             

Train loss: 0.6481 | Val loss: 0.9134 | Val Macro F1: 0.6001 | Val AUC: 0.6692 | Thr: 0.15 | LR: 0.0003
No improvement for 6 epoch(s)

Epoch 9/30
----------------------------------------


                                                                             

Train loss: 0.6315 | Val loss: 0.5131 | Val Macro F1: 0.6403 | Val AUC: 0.6545 | Thr: 0.60 | LR: 0.00015
No improvement for 7 epoch(s)

Early stopping at epoch 9

Training complete. Best epoch: 2 (Best Val Macro F1=0.6449)

Best epoch: 2
Train losses: ['0.9367', '0.7579', '0.6473', '0.5803', '0.6194', '0.6136', '0.6639', '0.6481', '0.6315']
Val losses:   ['0.6627', '0.6286', '0.7588', '0.6549', '0.5245', '0.5778', '0.5939', '0.9134', '0.5131']
Completed run_08_topk0.05_gw0.25_lr0.0003 | status=ok | best_epoch=2 | val_f1=0.6449248198592195 | val_loss=0.62856113718402 | minutes=0.84

####################################################################################################
MIL Grid Search Summary
run_01_topk0.03_gw0.1_lr0.0001 | status=ok | val_f1=0.9611593718676912 | val_loss=0.1807474959281183 | epoch=29 | topk_pos=0.03 | gw=0.1 | lr=0.0001
run_03_topk0.03_gw0.25_lr0.0001 | status=ok | val_f1=0.909037558685446 | val_loss=0.25987665057182313 | epoch=30 | topk_pos=0.03 | gw=0.2

## 6. Compact Run Summary
Reads checkpoint artifacts and prints a concise view of current and grid runs.


In [34]:
from pathlib import Path
import glob
import os

import torch


def load_ckpt(path):
    if not Path(path).exists():
        return None
    try:
        return torch.load(path, map_location='cpu', weights_only=True)
    except TypeError:
        return torch.load(path, map_location='cpu')


def ckpt_row(name, ckpt):
    if ckpt is None:
        return {
            'name': name,
            'epoch': None,
            'val_f1': None,
            'val_auc': None,
            'val_loss': None,
            'threshold': None,
        }
    return {
        'name': name,
        'epoch': ckpt.get('epoch'),
        'val_f1': ckpt.get('val_f1'),
        'val_auc': ckpt.get('val_auc'),
        'val_loss': ckpt.get('val_loss'),
        'threshold': ckpt.get('threshold'),
    }


def fmt(v, n=4):
    if v is None:
        return '-'
    if isinstance(v, float):
        return f"{v:.{n}f}"
    return str(v)


def print_table(rows, title):
    print()
    print('=' * 100)
    print(title)
    print('-' * 100)
    print(f"{'name':45s} {'epoch':>6s} {'val_f1':>10s} {'val_auc':>10s} {'val_loss':>10s} {'thr':>6s}")
    for r in rows:
        print(
            f"{r['name'][:45]:45s} "
            f"{fmt(r['epoch'],0):>6s} "
            f"{fmt(r['val_f1']):>10s} "
            f"{fmt(r['val_auc']):>10s} "
            f"{fmt(r['val_loss']):>10s} "
            f"{fmt(r['threshold'],2):>6s}"
        )


ckpt_dir = Path(cfg['audio']['training']['checkpoint_dir'])
best_path = ckpt_dir / 'best_model.pt'
last_path = ckpt_dir / 'last_model.pt'

main_rows = [
    ckpt_row('best_model.pt', load_ckpt(best_path)),
    ckpt_row('last_model.pt', load_ckpt(last_path)),
]
print_table(main_rows, f"Main Checkpoints @ {ckpt_dir}")

# Recent best snapshots (timestamped folders)
snapshot_files = sorted(
    glob.glob(str(ckpt_dir / 'best_models' / '*' / 'model.pt')),
    key=os.path.getmtime,
    reverse=True,
)[:8]

snap_rows = []
for p in snapshot_files:
    name = Path(p).parent.name
    snap_rows.append(ckpt_row(name, load_ckpt(p)))

if snap_rows:
    print_table(snap_rows, 'Recent Best Snapshots (latest 8)')
else:
    print()
    print('No timestamped best snapshots found yet.')

# Grid search summary
grid_root = ckpt_dir / 'grid_search_mil'
run_dirs = sorted(glob.glob(str(grid_root / 'run_*')))
grid_rows = []
for rd in run_dirs:
    run_name = Path(rd).name
    ck = load_ckpt(Path(rd) / 'best_model.pt')
    row = ckpt_row(run_name, ck)
    grid_rows.append(row)

grid_rows = sorted(grid_rows, key=lambda r: (-1 if r['val_f1'] is None else 0, -(r['val_f1'] or -1.0)))

if grid_rows:
    print_table(grid_rows[:12], f'Grid Search Runs @ {grid_root} (top 12 by val_f1)')
    best_valid = [r for r in grid_rows if r['val_f1'] is not None]
    if best_valid:
        b = best_valid[0]
        print()
        print('Best grid run:')
        print(
            f"{b['name']} | epoch={fmt(b['epoch'],0)} | "
            f"val_f1={fmt(b['val_f1'])} | val_auc={fmt(b['val_auc'])} | val_loss={fmt(b['val_loss'])}"
        )
else:
    print()
    print('No grid-search runs found.')



Main Checkpoints @ checkpoints/audio_confirm_run_07_topk0.05_gw0.25_lr0.0001
----------------------------------------------------------------------------------------------------
name                                           epoch     val_f1    val_auc   val_loss    thr
best_model.pt                                      4     0.6701     0.6735     0.4669   0.85
last_model.pt                                     11     0.6257     0.6560     0.4598   0.65

Recent Best Snapshots (latest 8)
----------------------------------------------------------------------------------------------------
name                                           epoch     val_f1    val_auc   val_loss    thr
20260228_022712_ep004_f1_0.6701_vl_0.4669          4     0.6701     0.6735     0.4669   0.85
20260228_022712_ep003_f1_0.6463_vl_0.5321          3     0.6463     0.6864     0.5321   0.75
20260228_022712_ep002_f1_0.5813_vl_0.6369          2     0.5813     0.6714     0.6369   0.70
20260228_022712_ep001_f1_0.5225_vl_

## 7. Single `.flac` -> Single Label (Checkpoint-Only, In-Notebook)
Set `CHECKPOINT_PATH` and `AUDIO_FILE_PATH`, then run the next cell.
This reproduces MIL-style file-level inference by chunking the full audio, pooling window probabilities, and returning one final label.


## Note
Use this summary cell after training or grid search to avoid scrolling through verbose logs.


In [35]:
import json
import math
from pathlib import Path

import torch
import torchaudio

from audio_model import AudioCNN
from audio.audio_processing import DEFAULT_AUDIO_CFG, WeldModel


def load_json(path: Path):
    with open(path) as f:
        return json.load(f)


def load_runtime_cfg(checkpoint_path: Path):
    sidecar_path = checkpoint_path.parent / "config.json"
    runtime = {
        "audio_cfg": dict(DEFAULT_AUDIO_CFG),
        "dropout": 0.15,
        "task": "binary",
        "eval_pool_ratio": 0.05,
        "threshold": 0.5,
    }

    if not sidecar_path.exists():
        return runtime

    sidecar = load_json(sidecar_path)
    audio_cfg = sidecar.get("audio", {}).get("feature_params", {})
    runtime["audio_cfg"] = {**DEFAULT_AUDIO_CFG, **audio_cfg}
    runtime["dropout"] = float(sidecar.get("audio", {}).get("model", {}).get("dropout", 0.15))

    train_cfg = sidecar.get("audio", {}).get("training", {})
    mil_cfg = train_cfg.get("sequence_mil", {})
    runtime["task"] = train_cfg.get("task", "binary")
    runtime["eval_pool_ratio"] = float(
        mil_cfg.get("eval_pool_ratio", mil_cfg.get("topk_ratio_pos", 0.05))
    )
    runtime["threshold"] = float(mil_cfg.get("threshold", 0.5))
    return runtime


def load_label_mapping(checkpoint_path: Path, task: str):
    sidecar_path = checkpoint_path.parent / "config.json"
    if sidecar_path.exists():
        saved_cfg = load_json(sidecar_path)
        mapping = saved_cfg.get("label_to_idx")
        if mapping:
            return {k: int(v) for k, v in mapping.items()}

    if task == "binary":
        return {"defect": 0, "good_weld": 1}

    raise ValueError("Could not recover label mapping from checkpoint sidecar config")


def infer_num_classes_from_state_dict(state_dict):
    key = "backbone.head.3.weight"
    if key in state_dict:
        return int(state_dict[key].shape[0])
    raise ValueError("Could not infer num_classes from checkpoint")


def aggregate_topk(prob_seq: torch.Tensor, ratio: float):
    k = max(1, int(math.ceil(ratio * prob_seq.numel())))
    k = min(k, int(prob_seq.numel()))
    vals, _ = torch.topk(prob_seq, k=k)
    return vals.mean()


def window_defect_probs(model, waveform, chunk_samples, defect_idx, device, batch_size=256):
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)

    n_samples = waveform.shape[-1]
    n_chunks = n_samples // chunk_samples
    if n_chunks < 1:
        raise ValueError(
            f"Audio is too short: got {n_samples} samples, needs at least {chunk_samples}."
        )

    used = waveform[:, : n_chunks * chunk_samples]
    windows = used.reshape(1, n_chunks, chunk_samples).permute(1, 0, 2)  # (T, 1, S)

    probs = []
    model.eval()
    with torch.no_grad():
        for i in range(0, n_chunks, batch_size):
            batch = windows[i: i + batch_size].to(device)
            logits = model(batch)
            if logits.dim() == 2 and logits.size(1) == 1:
                p_def = torch.sigmoid(logits).squeeze(1)
            else:
                p_def = torch.softmax(logits, dim=1)[:, defect_idx]
            probs.append(p_def.cpu())

    return torch.cat(probs, dim=0)


def predict_single_flac(checkpoint_path: str, audio_file_path: str, threshold=None, eval_pool_ratio=None, batch_size=256):
    ckpt_path = Path(checkpoint_path)
    audio_path = Path(audio_file_path)
    if not ckpt_path.exists():
        raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
    if not audio_path.exists():
        raise FileNotFoundError(f"Audio file not found: {audio_path}")

    runtime = load_runtime_cfg(ckpt_path)
    if runtime["task"] != "binary":
        raise ValueError("This helper currently supports binary task only.")

    label_to_idx = load_label_mapping(ckpt_path, task=runtime["task"])
    if "defect" not in label_to_idx or "good_weld" not in label_to_idx:
        raise ValueError("Binary label mapping must contain 'defect' and 'good_weld'.")
    defect_idx = int(label_to_idx["defect"])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    try:
        checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True)
    except TypeError:
        checkpoint = torch.load(ckpt_path, map_location=device)

    num_classes = infer_num_classes_from_state_dict(checkpoint["model_state_dict"])
    backbone = AudioCNN(num_classes=num_classes, dropout=runtime["dropout"])
    model = WeldModel(backbone, cfg=runtime["audio_cfg"])
    model.load_state_dict(checkpoint["model_state_dict"])
    model.to(device)

    waveform, sr = torchaudio.load(audio_path)
    target_sr = int(runtime["audio_cfg"]["sampling_rate"])
    if sr != target_sr:
        waveform = torchaudio.functional.resample(waveform, sr, target_sr)

    chunk_samples = int(float(runtime["audio_cfg"]["chunk_length_in_s"]) * target_sr)
    probs = window_defect_probs(
        model=model,
        waveform=waveform,
        chunk_samples=chunk_samples,
        defect_idx=defect_idx,
        device=device,
        batch_size=batch_size,
    )

    pool_ratio = float(eval_pool_ratio) if eval_pool_ratio is not None else float(runtime["eval_pool_ratio"])
    p_defect = float(aggregate_topk(probs, pool_ratio).item())

    ckpt_thr = checkpoint.get("threshold", runtime["threshold"])
    thr = float(threshold) if threshold is not None else float(ckpt_thr)
    pred_label = "defect" if p_defect >= thr else "good_weld"

    return {
        "prediction": pred_label,
        "p_defect": p_defect,
        "threshold": thr,
        "pool_ratio": pool_ratio,
        "num_windows": int(probs.numel()),
        "device": str(device),
    }


# ---- Set only these two paths ----
CHECKPOINT_PATH = "checkpoints/audio/best_model.pt"
AUDIO_FILE_PATH = "sampleData/08-17-22-0011-00/08-17-22-0011-00.flac"

result = predict_single_flac(
    checkpoint_path=CHECKPOINT_PATH,
    audio_file_path=AUDIO_FILE_PATH,
    # threshold=0.35,
    # eval_pool_ratio=0.05,
    batch_size=256,
 )

print("Single prediction result:")
print(result)
print(f"Final label: {result['prediction']}")


Single prediction result:
{'prediction': 'defect', 'p_defect': 0.9113511443138123, 'threshold': 0.44999999999999996, 'pool_ratio': 0.05, 'num_windows': 38, 'device': 'cuda'}
Final label: defect


## 8. Deployment Guide for Friend (Two Inference Settings)
Use the exported deploy model: `checkpoints/audio/deploy_single_label.pt`.

> Important: this deploy model expects raw waveform with shape `(channels, samples)`.
> It always outputs one file-level decision dict: `{'label': 0/1, 'p_defect': float}` where `0=good_weld`, `1=defect`.

> The *same* `.pt` supports both modes below:
- **Mode A (Full audio):** pass entire audio waveform; model does split->window inference->aggregation internally.
- **Mode B (Single window):** pass exactly one window (`chunk_samples`, usually 16000 for 1s at 16kHz); model outputs one label for that window.

In [None]:
from pathlib import Path

import torch
import torchaudio

DEPLOY_PT = Path("checkpoints/audio/deploy_single_label.pt")
AUDIO_PATH = Path("/data1/malto/therness/data/Hackathon/defect-weld/burnthrough_weld_10_12-04-22_butt_joint/12-04-22-0161-02/12-04-22-0161-02.flac")

assert DEPLOY_PT.exists(), f"Deploy model not found: {DEPLOY_PT}"
assert AUDIO_PATH.exists(), f"Audio file not found: {AUDIO_PATH}"

deploy_model = torch.jit.load(str(DEPLOY_PT), map_location="cpu")
waveform, sr = torchaudio.load(str(AUDIO_PATH))

print(f"Loaded deploy model: {DEPLOY_PT}")
print(f"Loaded audio: {AUDIO_PATH}")
print(f"Sample rate: {sr} | waveform shape: {tuple(waveform.shape)}")

def decode_label(label_id: int) -> str:
    return "defect" if int(label_id) == 1 else "good_weld"

def run_deploy(wave: torch.Tensor):
    out = deploy_model(wave)
    label_id = int(out["label"].item())
    p_defect = float(out["p_defect"].item())
    return {
        "label_id": label_id,
        "label": decode_label(label_id),
        "p_defect": p_defect,
    }

In [None]:
# Mode A: full audio -> one label
full_result = run_deploy(waveform)
print("Mode A (full audio):", full_result)

In [None]:
# Mode B: single window -> one label
CHUNK_SAMPLES = 16000  # expected by current deploy model (1s at 16kHz)
assert waveform.shape[-1] >= CHUNK_SAMPLES, "Audio shorter than one full window"

single_window = waveform[:, :CHUNK_SAMPLES]
window_result = run_deploy(single_window)

print("Mode B (single window):", window_result)
print(f"single_window shape: {tuple(single_window.shape)}")

### Section 8B — Explicit Single-Window Example
This cell builds one exact window from a chosen start time and runs the deploy `.pt` on that single window only.

In [None]:
# Explicit single-window inference from a chosen time offset
AUDIO_PATH_WINDOW = AUDIO_PATH
START_SEC = 5.0  # choose the second where the 1-window slice starts

wave_w, sr_w = torchaudio.load(str(AUDIO_PATH_WINDOW))

# Current deploy model uses 1s windows at 16kHz => 16000 samples
chunk_samples = 16000
if hasattr(deploy_model, "chunk_samples"):
    try:
        chunk_samples = int(deploy_model.chunk_samples)
    except Exception:
        pass

start_sample = int(START_SEC * sr_w)
end_sample = start_sample + chunk_samples

assert sr_w == 16000, f"Expected 16kHz input for this deploy model, got {sr_w}"
assert end_sample <= wave_w.shape[-1], (
    f"Requested window [{start_sample}:{end_sample}] exceeds audio length {wave_w.shape[-1]}"
)

single_window_explicit = wave_w[:, start_sample:end_sample]
assert single_window_explicit.shape[-1] == chunk_samples

window_explicit_result = run_deploy(single_window_explicit)
print(f"Mode B explicit | start_sec={START_SEC} | window_shape={tuple(single_window_explicit.shape)}")
print(window_explicit_result)

## 9. Robust Single-Window Workflow (Friend-Ready)
This section is a complete reference for single-window inference with checks, mode comparison, and multi-offset scan.

In [None]:
from pathlib import Path

import torch
import torchaudio

# Parameters friend can edit
DEPLOY_PT_2 = Path("checkpoints/audio/deploy_single_label.pt")
AUDIO_PATH_2 = AUDIO_PATH  # reuse from Section 8
START_SEC_2 = 5.0
CHUNK_SAMPLES_2 = 16000

assert DEPLOY_PT_2.exists(), f"Deploy model not found: {DEPLOY_PT_2}"
assert Path(AUDIO_PATH_2).exists(), f"Audio file not found: {AUDIO_PATH_2}"

deploy_model_2 = torch.jit.load(str(DEPLOY_PT_2), map_location="cpu")
wave_2, sr_2 = torchaudio.load(str(AUDIO_PATH_2))
print(f"Loaded deploy model: {DEPLOY_PT_2}")
print(f"Loaded audio: {AUDIO_PATH_2}")
print(f"Sample rate: {sr_2} | waveform shape: {tuple(wave_2.shape)}")

def decode_label_2(label_id: int) -> str:
    return "defect" if int(label_id) == 1 else "good_weld"

def run_deploy_window(wave: torch.Tensor):
    out = deploy_model_2(wave)
    label_id = int(out["label"].item())
    p_defect = float(out["p_defect"].item())
    return {
        "label_id": label_id,
        "label": decode_label_2(label_id),
        "p_defect": p_defect,
    }

In [None]:
# Extract one exact window with validation + error handling
TARGET_SR_2 = 16000

if sr_2 != TARGET_SR_2:
    wave_2 = torchaudio.functional.resample(wave_2, sr_2, TARGET_SR_2)
    sr_2 = TARGET_SR_2
    print(f"Resampled audio to {sr_2} Hz")

if wave_2.shape[0] > 1:
    wave_2 = wave_2.mean(dim=0, keepdim=True)
    print("Converted multi-channel audio to mono")

audio_len = wave_2.shape[-1]
start_sample_2 = int(START_SEC_2 * sr_2)
end_sample_2 = start_sample_2 + CHUNK_SAMPLES_2

assert audio_len >= CHUNK_SAMPLES_2, (
    f"Audio too short: {audio_len} samples, need at least {CHUNK_SAMPLES_2}"
)
assert start_sample_2 >= 0, "start_sec must be >= 0"
assert end_sample_2 <= audio_len, (
    f"Invalid offset: window [{start_sample_2}:{end_sample_2}] exceeds audio length {audio_len}"
)

one_window = wave_2[:, start_sample_2:end_sample_2]
assert one_window.shape == (1, CHUNK_SAMPLES_2), (
    f"Unexpected window shape {tuple(one_window.shape)}"
)

single_window_result = run_deploy_window(one_window)
print(f"offset_sec={START_SEC_2} | samples=[{start_sample_2}:{end_sample_2}] | shape={tuple(one_window.shape)}")
print(single_window_result)

In [None]:
# Compare Mode A (full audio) vs Mode B (single window)
mode_a_result = run_deploy_window(wave_2)
mode_b_result = run_deploy_window(one_window)

print("Mode comparison")
print("-" * 80)
print({
    "mode": "A_full_audio",
    "label": mode_a_result["label"],
    "p_defect": round(mode_a_result["p_defect"], 6),
})
print({
    "mode": "B_single_window",
    "label": mode_b_result["label"],
    "p_defect": round(mode_b_result["p_defect"], 6),
    "start_sec": START_SEC_2,
})

In [None]:
# Optional: multi-offset single-window scan (compact table)
offsets_sec = [0.0, 2.0, 5.0, 10.0, 15.0]
rows = []

for off in offsets_sec:
    s = int(off * sr_2)
    e = s + CHUNK_SAMPLES_2
    if e > wave_2.shape[-1]:
        continue
    w = wave_2[:, s:e]
    r = run_deploy_window(w)
    rows.append({
        "start_sec": off,
        "start_sample": s,
        "end_sample": e,
        "label": r["label"],
        "p_defect": round(r["p_defect"], 6),
    })

if rows:
    print(f"{'start_sec':>9s} {'start_sample':>12s} {'end_sample':>10s} {'label':>10s} {'p_defect':>10s}")
    print("-" * 62)
    for row in rows:
        print(f"{row['start_sec']:9.2f} {row['start_sample']:12d} {row['end_sample']:10d} {row['label']:>10s} {row['p_defect']:10.6f}")
else:
    print("No valid offsets for this audio length.")