# Best 100-epoch full pipeline (Audio → Video → Fusion)

This notebook prepares a dedicated config and runs three training stages in sequence:
1. Audio (MIL multiclass) for 100 epochs
2. Video window model for 100 epochs
3. Temporal fusion for 100 epochs using the best audio+video checkpoints produced above

**Config split**: each model has its own config file so teammates can train audio & video independently and push without git conflicts:
- `configs/audio_pipeline.json` — audio model params
- `configs/video_pipeline.json` — video model params
- `configs/fusion_pipeline.json` — merged at fusion time from the two above + fusion-specific params

In [15]:
from pathlib import Path
import json
import os
import subprocess
import sys

ROOT = Path.cwd()
if not (ROOT / "configs" / "master_config.json").exists():
    raise RuntimeError(f"Run this notebook from repo root. Current dir: {ROOT}")

print(f"Repo root: {ROOT}")
print(f"Python: {sys.executable}")

Repo root: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito
Python: /home/alolli/miniconda3/envs/therness_env/bin/python


In [16]:
base_cfg_path = ROOT / "configs" / "master_config.json"

# ── Separate config files: one per model (no git conflicts) ──
audio_cfg_path = ROOT / "configs" / "audio_pipeline.json"
video_cfg_path = ROOT / "configs" / "video_pipeline.json"
fusion_cfg_path = ROOT / "configs" / "fusion_pipeline.json"

cfg = json.loads(base_cfg_path.read_text())

data_root_candidates = [
    Path("/data1/malto/therness/data/Hackathon"),
    Path("/root/data/train_data"),
    ROOT / "sampleData",
]
resolved_data_root = None
for candidate in data_root_candidates:
    if candidate.exists() and any(candidate.rglob("*.flac")):
        resolved_data_root = str(candidate)
        break
if resolved_data_root is None:
    raise FileNotFoundError(
        "No valid data root found. Checked: "
        + ", ".join(str(p) for p in data_root_candidates)
    )
cfg["train_data_root"] = resolved_data_root
cfg["data_root"] = resolved_data_root
print(f"Using data_root: {resolved_data_root}")

# ---- Target metric threshold: stop early if metric >= this ----
TARGET_METRIC_THRESHOLD = 0.95

# ---- AUDIO: best-known multiclass MIL config, 100 epochs ----
ac = cfg.setdefault("audio", {})
am = ac.setdefault("model", {})
at = ac.setdefault("training", {})
amil = at.setdefault("sequence_mil", {})

am["dropout"] = 0.15
at["task"] = "multiclass"
at["num_epochs"] = 100
at["lr"] = 7e-05
at["weight_decay"] = 3e-05
at["patience"] = 30
at["checkpoint_dir"] = "checkpoints/audio_best_100"
at["metric"] = "macro_f1"
at["target_metric_threshold"] = TARGET_METRIC_THRESHOLD

amil["enabled"] = True
amil["batch_size"] = 8
amil["topk_ratio_pos"] = 0.12
amil["topk_ratio_neg"] = 0.2
amil["eval_pool_ratio"] = 0.12
amil["auto_threshold"] = True
amil["good_window_weight"] = 0.0
amil["use_class_weights"] = True
amil["class_weight_power"] = 0.65
amil["multiclass_eval_mode"] = "topk_per_class"
amil["use_balanced_sampler"] = True
amil["balanced_sampler_power"] = 0.35

# ---- VIDEO: best-known window config, 100 epochs ----
vw = cfg.setdefault("video_window", {})
vwm = vw.setdefault("model", {})
vwt = vw.setdefault("training", {})

vwm["dropout"] = 0.12
vwt["epochs"] = 100
vwt["lr"] = 1e-4
vwt["weight_decay"] = 7e-5
vwt["class_weight_power"] = 1.0
vwt["use_balanced_sampler"] = True
vwt["balanced_sampler_power"] = 0.3
vwt["patience"] = 30
vwt["checkpoint_dir"] = "checkpoints/video_best_100"
vwt["checkpoint_path"] = "checkpoints/video_best_100/window_classifier.pth"
vwt["target_metric_threshold"] = TARGET_METRIC_THRESHOLD

# ---- FUSION: attention fusion (no RNN), 100 epochs ----
fc = cfg.setdefault("fusion", {})
fm = fc.setdefault("model", {})
ft = fc.setdefault("training", {})

fm["arch"] = "attention"          # <-- per-window attention fusion (no GRU)
fm["audio_dim"] = 128
fm["video_dim"] = 128
fm["hidden_dim"] = 192
fm["dropout"] = 0.2
fm["num_heads"] = 4               # multi-head self-attention heads
fm["attn_layers"] = 2             # number of transformer encoder layers

ft["sequence_len"] = 12           # temporal steps (read by run_fusion.py from training config)
ft["num_epochs"] = 100
ft["lr"] = 1e-4
ft["weight_decay"] = 1e-4
ft["batch_size"] = 64
ft["patience"] = 30
ft["checkpoint_dir"] = "checkpoints/fusion_best_100"
ft["target_metric_threshold"] = TARGET_METRIC_THRESHOLD

# ── Write separate config files ──
import copy as _copy
audio_cfg_path.write_text(json.dumps(cfg, indent=2))
video_cfg_path.write_text(json.dumps(cfg, indent=2))
fusion_cfg_path.write_text(json.dumps(cfg, indent=2))

print(f"Wrote configs:")
print(f"  audio  → {audio_cfg_path.name}")
print(f"  video  → {video_cfg_path.name}")
print(f"  fusion → {fusion_cfg_path.name}")
print(f"Target metric threshold: {TARGET_METRIC_THRESHOLD} (all models)")
print(f"Fusion arch: {fm['arch']} (per-window MLP + self-attention, no RNN)")

Using data_root: /data1/malto/therness/data/Hackathon
Wrote configs:
  audio  → audio_pipeline.json
  video  → video_pipeline.json
  fusion → fusion_pipeline.json
Target metric threshold: 0.95 (all models)
Fusion arch: attention (per-window MLP + self-attention, no RNN)


## Pre-extract video frames (one-time, speeds up training ~3×)

Extracts N uniformly-sampled JPEG frames per .avi video file into `data/video_frames/`.
Skips videos already extracted. Safe to re-run.

In [17]:
frames_dir = ROOT / "data" / "video_frames"
manifest_path = frames_dir / "manifest.json"

if manifest_path.exists():
    _manifest = json.loads(manifest_path.read_text())
    n_entries = len(_manifest.get("entries", []))
    print(f"Video frames already extracted: {frames_dir}")
    print(f"  manifest.json has {n_entries} entries — skipping extraction")
else:
    print(f"Extracting video frames to {frames_dir} ...")
    _cmd = [
        sys.executable, '-u', '-m', 'video.extract_frames',
        '--data_root', resolved_data_root,
        '--out_dir', str(frames_dir),
        '--num_frames', '8',
        '--img_size', '160',
        '--workers', '16',
    ]
    print('RUN:', ' '.join(_cmd))
    subprocess.run(_cmd, check=True, cwd=ROOT)
    assert manifest_path.exists(), f"Frame extraction failed — no manifest at {manifest_path}"
    print(f"Done. Frames saved to {frames_dir}")

Video frames already extracted: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/data/video_frames
  manifest.json has 1551 entries — skipping extraction


In [18]:
def run_cmd(cmd):
    print('\n' + '='*90)
    print('RUN:', ' '.join(cmd))
    print('='*90)
    subprocess.run(cmd, check=True, cwd=ROOT)

audio_ckpt = ROOT / "checkpoints" / "audio_best_100" / "best_model.pt"
video_ckpt = ROOT / "checkpoints" / "video_best_100" / "best_model.pt"
fusion_ckpt = ROOT / "checkpoints" / "fusion_best_100" / "best_model.pt"

print('Expected outputs:')
print(' -', audio_ckpt)
print(' -', video_ckpt)
print(' -', fusion_ckpt)

Expected outputs:
 - /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
 - /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
 - /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/fusion_best_100/best_model.pt


## 0) Hyperparameter tuning (short trials before full training)

Run 15-epoch trials with random search to find the best hyperparameters for each model.
Results are stored in `tuning_results` dict and auto-applied to the full 100-epoch config.

In [19]:
import copy, random as _rnd, shutil, time, torch

TUNING_EPOCHS = 15          # short trials
TUNING_PATIENCE = 8         # early stop within each trial
N_AUDIO_TRIALS = 6
N_VIDEO_TRIALS = 6
N_FUSION_TRIALS = 6

tuning_results = {"audio": [], "video": [], "fusion": []}

def _sample_uniform(lo, hi, log=False):
    """Sample from [lo, hi] uniformly (or log-uniformly)."""
    if log:
        import math
        return math.exp(_rnd.uniform(math.log(lo), math.log(hi)))
    return _rnd.uniform(lo, hi)

def _sample_choice(options):
    return _rnd.choice(options)

def run_trial(module, config_overrides, trial_cfg, trial_name, extra_args=None):
    """Run a short training trial and return the best metric from checkpoint."""
    trial_dir = f"checkpoints/_tuning/{trial_name}"
    trial_cfg_path = ROOT / "configs" / f"_tuning_{trial_name}.json"

    # Deep copy & apply overrides
    tcfg = copy.deepcopy(cfg)
    tcfg["data_root"] = cfg["data_root"]
    tcfg["train_data_root"] = cfg.get("train_data_root", cfg["data_root"])

    for key_path, value in config_overrides.items():
        parts = key_path.split(".")
        d = tcfg
        for p in parts[:-1]:
            d = d.setdefault(p, {})
        d[parts[-1]] = value

    trial_cfg_path.write_text(json.dumps(tcfg, indent=2))

    cmd = [sys.executable, '-u', '-m', module, '--config', str(trial_cfg_path)]
    if extra_args:
        cmd.extend(extra_args)

    print(f"\n{'─'*70}")
    print(f"TRIAL: {trial_name}")
    print(f"  Params: {trial_cfg}")
    print(f"{'─'*70}")
    t0 = time.time()

    try:
        subprocess.run(cmd, check=True, cwd=ROOT)
    except subprocess.CalledProcessError:
        print(f"  TRIAL FAILED: {trial_name}")
        trial_cfg_path.unlink(missing_ok=True)
        return None

    elapsed = time.time() - t0
    best_ckpt = ROOT / trial_dir / "best_model.pt"
    if not best_ckpt.exists():
        print(f"  No best checkpoint produced for {trial_name}")
        trial_cfg_path.unlink(missing_ok=True)
        return None

    ckpt = torch.load(best_ckpt, map_location="cpu")
    # Audio uses val_f1, video/fusion use hackathon_score
    score = ckpt.get("hackathon_score", ckpt.get("val_f1", 0.0))
    epoch = ckpt.get("epoch", "?")
    print(f"  RESULT: score={score:.4f}  epoch={epoch}  time={elapsed:.0f}s")

    # Clean up trial config file
    trial_cfg_path.unlink(missing_ok=True)

    return {"name": trial_name, "score": float(score), "epoch": epoch,
            "params": trial_cfg, "elapsed": elapsed}

print(f"Tuning config: {TUNING_EPOCHS} epochs, patience {TUNING_PATIENCE}")
print(f"Trials: audio={N_AUDIO_TRIALS}, video={N_VIDEO_TRIALS}, fusion={N_FUSION_TRIALS}")

Tuning config: 15 epochs, patience 8
Trials: audio=6, video=6, fusion=6


### 0a) Audio tuning

In [20]:

# ── Round 2: refined search around best region ──
_rnd.seed(99)

for i in range(N_AUDIO_TRIALS):
    trial_params = {
        "lr":           _sample_uniform(3.5e-5, 7e-5, log=True),   # narrowed around best 4.9e-5
        "weight_decay": _sample_uniform(1.5e-5, 4.5e-5, log=True), # narrowed around best 2.9e-5
        "dropout":      _sample_choice([0.08, 0.10, 0.12]),         # 0.10 dominated, try slightly lower too
        "cwp":          _sample_uniform(0.60, 0.75),                # narrowed around best 0.67
        "bsp":          _sample_uniform(0.35, 0.45),                # higher bsp was better (0.39)
        "topk_pos":     _sample_choice([0.12, 0.15, 0.18]),         # 0.15 dominated, test neighbors
    }
    trial_name = (
        f"audio_tune2_{i:02d}_lr={trial_params['lr']:.1e}"
        f"_wd={trial_params['weight_decay']:.1e}"
        f"_do={trial_params['dropout']}"
    )
    overrides = {
        "audio.training.num_epochs":                        TUNING_EPOCHS,
        "audio.training.patience":                          TUNING_PATIENCE,
        "audio.training.lr":                                trial_params["lr"],
        "audio.training.weight_decay":                      trial_params["weight_decay"],
        "audio.model.dropout":                              trial_params["dropout"],
        "audio.training.sequence_mil.class_weight_power":   trial_params["cwp"],
        "audio.training.sequence_mil.balanced_sampler_power": trial_params["bsp"],
        "audio.training.sequence_mil.topk_ratio_pos":       trial_params["topk_pos"],
        "audio.training.sequence_mil.eval_pool_ratio":      trial_params["topk_pos"],
        "audio.training.checkpoint_dir":                    f"checkpoints/_tuning/{trial_name}",
    }
    result = run_trial("audio.run_audio", overrides, trial_params, trial_name)
    if result:
        tuning_results["audio"].append(result)

# Re-sort including round-1 results
tuning_results["audio"].sort(key=lambda r: r["score"], reverse=True)
print(f"\n{'='*70}")
print("AUDIO TUNING RESULTS — ALL ROUNDS (sorted by score):")
print(f"{'='*70}")
for r in tuning_results["audio"]:
    print(f"  {r['score']:.4f}  ep={r['epoch']}  {r['name']}  params={r['params']}")

if tuning_results["audio"]:
    best_audio = tuning_results["audio"][0]
    print(f"\nBEST AUDIO: {best_audio['score']:.4f} — {best_audio['params']}")
else:
    print("\nNo successful audio trials")



──────────────────────────────────────────────────────────────────────
TRIAL: audio_tune2_00_lr=4.6e-05_wd=1.9e-05_do=0.08
  Params: {'lr': 4.631029660038342e-05, 'weight_decay': 1.8687512932167e-05, 'dropout': 0.08, 'cwp': 0.6345394092594436, 'bsp': 0.363324440091757, 'topk_pos': 0.12}
──────────────────────────────────────────────────────────────────────
Device: cuda

Total weld files: 1551
File label distribution (multiclass): {'excessive_penetration': 259, 'good_weld': 731, 'excessive_convexity': 80, 'lack_of_fusion': 158, 'crater_cracks': 75, 'burnthrough': 169, 'overlap': 79}
Split strategy: stratified
Train welds: 1240 | Val welds: 311
File split stats | train={ burnthrough: 135 (10.9%), crater_cracks: 60 (4.8%), excessive_convexity: 64 (5.2%), excessive_penetration: 207 (16.7%), good_weld: 585 (47.2%), lack_of_fusion: 126 (10.2%), overlap: 63 (5.1%) } | val={ burnthrough: 34 (10.9%), crater_cracks: 15 (4.8%), excessive_convexity: 16 (5.1%), excessive_penetration: 52 (16.7%), g

Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.7908 | Val loss: 1.7713 | Val Macro F1: 0.0466 | LR: 3.09401e-05
New best model saved (val_f1=0.0466)

Epoch 2/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.3247 | Val loss: 1.1047 | Val Macro F1: 0.1494 | LR: 4.63103e-05
New best model saved (val_f1=0.1494)

Epoch 3/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.8461 | Val loss: 0.8171 | Val Macro F1: 0.3940 | LR: 4.63103e-05
New best model saved (val_f1=0.3940)

Epoch 4/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6415 | Val loss: 0.6428 | Val Macro F1: 0.4229 | LR: 4.63103e-05
New best model saved (val_f1=0.4229)

Epoch 5/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5252 | Val loss: 0.5705 | Val Macro F1: 0.4295 | LR: 4.63103e-05
New best model saved (val_f1=0.4295)

Epoch 6/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4513 | Val loss: 0.5146 | Val Macro F1: 0.4085 | LR: 4.63103e-05
No improvement for 1 epoch(s)

Epoch 7/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4105 | Val loss: 0.5318 | Val Macro F1: 0.4282 | LR: 4.63103e-05
No improvement for 2 epoch(s)

Epoch 8/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4074 | Val loss: 0.5695 | Val Macro F1: 0.2677 | LR: 4.63103e-05
No improvement for 3 epoch(s)

Epoch 9/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3391 | Val loss: 0.3988 | Val Macro F1: 0.4633 | LR: 4.63103e-05
New best model saved (val_f1=0.4633)

Epoch 10/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3040 | Val loss: 0.4430 | Val Macro F1: 0.3933 | LR: 4.63103e-05
No improvement for 1 epoch(s)

Epoch 11/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2917 | Val loss: 0.3839 | Val Macro F1: 0.5219 | LR: 4.63103e-05
New best model saved (val_f1=0.5219)

Epoch 12/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2735 | Val loss: 0.4049 | Val Macro F1: 0.3277 | LR: 4.63103e-05
No improvement for 1 epoch(s)

Epoch 13/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2125 | Val loss: 0.2797 | Val Macro F1: 0.5438 | LR: 4.63103e-05
New best model saved (val_f1=0.5438)

Epoch 14/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1862 | Val loss: 0.2581 | Val Macro F1: 0.6583 | LR: 4.63103e-05
New best model saved (val_f1=0.6583)

Epoch 15/15
----------------------------------------


                                                                                      

Train loss: 0.1701 | Val loss: 0.1856 | Val Macro F1: 0.6914 | LR: 4.63103e-05
New best model saved (val_f1=0.6914)

Training complete. Best epoch: 15 (val_f1=0.6914)

Best epoch: 15
Train losses: ['1.7908', '1.3247', '0.8461', '0.6415', '0.5252', '0.4513', '0.4105', '0.4074', '0.3391', '0.3040', '0.2917', '0.2735', '0.2125', '0.1862', '0.1701']
Val losses:   ['1.7713', '1.1047', '0.8171', '0.6428', '0.5705', '0.5146', '0.5318', '0.5695', '0.3988', '0.4430', '0.3839', '0.4049', '0.2797', '0.2581', '0.1856']
  RESULT: score=0.6914  epoch=15  time=72s

──────────────────────────────────────────────────────────────────────
TRIAL: audio_tune2_01_lr=4.2e-05_wd=2.3e-05_do=0.12
  Params: {'lr': 4.16554692634056e-05, 'weight_decay': 2.2848671008910937e-05, 'dropout': 0.12, 'cwp': 0.7050732298254547, 'bsp': 0.35898362989602745, 'topk_pos': 0.18}
──────────────────────────────────────────────────────────────────────
Device: cuda

Total weld files: 1551
File label distribution (multiclass): {'exc

Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.6475 | Val loss: 1.7038 | Val Macro F1: 0.0268 | LR: 2.78302e-05
New best model saved (val_f1=0.0268)

Epoch 2/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.4036 | Val loss: 1.3562 | Val Macro F1: 0.0989 | LR: 4.16555e-05
New best model saved (val_f1=0.0989)

Epoch 3/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.0802 | Val loss: 1.0927 | Val Macro F1: 0.2081 | LR: 4.16555e-05
New best model saved (val_f1=0.2081)

Epoch 4/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.8947 | Val loss: 0.9267 | Val Macro F1: 0.3654 | LR: 4.16555e-05
New best model saved (val_f1=0.3654)

Epoch 5/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7614 | Val loss: 0.8196 | Val Macro F1: 0.3725 | LR: 4.16555e-05
New best model saved (val_f1=0.3725)

Epoch 6/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6512 | Val loss: 0.7589 | Val Macro F1: 0.3664 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 7/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5926 | Val loss: 0.7592 | Val Macro F1: 0.3940 | LR: 4.16555e-05
New best model saved (val_f1=0.3940)

Epoch 8/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5973 | Val loss: 0.7657 | Val Macro F1: 0.1441 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 9/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5207 | Val loss: 0.6449 | Val Macro F1: 0.5075 | LR: 4.16555e-05
New best model saved (val_f1=0.5075)

Epoch 10/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5110 | Val loss: 0.5707 | Val Macro F1: 0.5425 | LR: 4.16555e-05
New best model saved (val_f1=0.5425)

Epoch 11/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4627 | Val loss: 0.4428 | Val Macro F1: 0.6858 | LR: 4.16555e-05
New best model saved (val_f1=0.6858)

Epoch 12/15
----------------------------------------


                                                                                      

Train loss: 0.3555 | Val loss: 0.4442 | Val Macro F1: 0.7299 | LR: 4.16555e-05
New best model saved (val_f1=0.7299)

Epoch 13/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3107 | Val loss: 0.3735 | Val Macro F1: 0.7857 | LR: 4.16555e-05
New best model saved (val_f1=0.7857)

Epoch 14/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2722 | Val loss: 0.3167 | Val Macro F1: 0.7426 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 15/15
----------------------------------------


                                                                                      

Train loss: 0.2480 | Val loss: 0.3364 | Val Macro F1: 0.8144 | LR: 4.16555e-05
New best model saved (val_f1=0.8144)

Training complete. Best epoch: 15 (val_f1=0.8144)

Best epoch: 15
Train losses: ['1.6475', '1.4036', '1.0802', '0.8947', '0.7614', '0.6512', '0.5926', '0.5973', '0.5207', '0.5110', '0.4627', '0.3555', '0.3107', '0.2722', '0.2480']
Val losses:   ['1.7038', '1.3562', '1.0927', '0.9267', '0.8196', '0.7589', '0.7592', '0.7657', '0.6449', '0.5707', '0.4428', '0.4442', '0.3735', '0.3167', '0.3364']
  RESULT: score=0.8144  epoch=15  time=73s

──────────────────────────────────────────────────────────────────────
TRIAL: audio_tune2_02_lr=4.9e-05_wd=2.4e-05_do=0.12
  Params: {'lr': 4.913087318577617e-05, 'weight_decay': 2.3837698735305468e-05, 'dropout': 0.12, 'cwp': 0.6882610751875031, 'bsp': 0.4292614012226457, 'topk_pos': 0.15}
──────────────────────────────────────────────────────────────────────
Device: cuda

Total weld files: 1551
File label distribution (multiclass): {'exc

Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.7470 | Val loss: 1.7527 | Val Macro F1: 0.0498 | LR: 3.28245e-05
New best model saved (val_f1=0.0498)

Epoch 2/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.3842 | Val loss: 1.1988 | Val Macro F1: 0.1897 | LR: 4.91309e-05
New best model saved (val_f1=0.1897)

Epoch 3/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.9464 | Val loss: 0.9087 | Val Macro F1: 0.2987 | LR: 4.91309e-05
New best model saved (val_f1=0.2987)

Epoch 4/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7199 | Val loss: 0.7650 | Val Macro F1: 0.3275 | LR: 4.91309e-05
New best model saved (val_f1=0.3275)

Epoch 5/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6312 | Val loss: 0.7457 | Val Macro F1: 0.3493 | LR: 4.91309e-05
New best model saved (val_f1=0.3493)

Epoch 6/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5672 | Val loss: 0.7126 | Val Macro F1: 0.3932 | LR: 4.91309e-05
New best model saved (val_f1=0.3932)

Epoch 7/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5251 | Val loss: 0.7564 | Val Macro F1: 0.4968 | LR: 4.91309e-05
New best model saved (val_f1=0.4968)

Epoch 8/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4964 | Val loss: 0.7030 | Val Macro F1: 0.5326 | LR: 4.91309e-05
New best model saved (val_f1=0.5326)

Epoch 9/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4567 | Val loss: 0.6679 | Val Macro F1: 0.5176 | LR: 4.91309e-05
No improvement for 1 epoch(s)

Epoch 10/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4307 | Val loss: 0.4589 | Val Macro F1: 0.5357 | LR: 4.91309e-05
New best model saved (val_f1=0.5357)

Epoch 11/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4186 | Val loss: 0.5743 | Val Macro F1: 0.6171 | LR: 4.91309e-05
New best model saved (val_f1=0.6171)

Epoch 12/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3885 | Val loss: 0.5710 | Val Macro F1: 0.5710 | LR: 4.91309e-05
No improvement for 1 epoch(s)

Epoch 13/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3318 | Val loss: 0.4812 | Val Macro F1: 0.6028 | LR: 4.91309e-05
No improvement for 2 epoch(s)

Epoch 14/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3053 | Val loss: 0.3458 | Val Macro F1: 0.6837 | LR: 4.91309e-05
New best model saved (val_f1=0.6837)

Epoch 15/15
----------------------------------------


                                                                                      

Train loss: 0.2737 | Val loss: 0.5297 | Val Macro F1: 0.5720 | LR: 4.91309e-05
No improvement for 1 epoch(s)

Training complete. Best epoch: 14 (val_f1=0.6837)

Best epoch: 14
Train losses: ['1.7470', '1.3842', '0.9464', '0.7199', '0.6312', '0.5672', '0.5251', '0.4964', '0.4567', '0.4307', '0.4186', '0.3885', '0.3318', '0.3053', '0.2737']
Val losses:   ['1.7527', '1.1988', '0.9087', '0.7650', '0.7457', '0.7126', '0.7564', '0.7030', '0.6679', '0.4589', '0.5743', '0.5710', '0.4812', '0.3458', '0.5297']
  RESULT: score=0.6837  epoch=14  time=73s

──────────────────────────────────────────────────────────────────────
TRIAL: audio_tune2_03_lr=6.2e-05_wd=3.9e-05_do=0.08
  Params: {'lr': 6.188888545052817e-05, 'weight_decay': 3.864804215007592e-05, 'dropout': 0.08, 'cwp': 0.6228414801450259, 'bsp': 0.4160567895911641, 'topk_pos': 0.18}
──────────────────────────────────────────────────────────────────────
Device: cuda

Total weld files: 1551
File label distribution (multiclass): {'excessive_p

Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.7707 | Val loss: 1.7225 | Val Macro F1: 0.1621 | LR: 4.13482e-05
New best model saved (val_f1=0.1621)

Epoch 2/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.4513 | Val loss: 1.2493 | Val Macro F1: 0.1893 | LR: 6.18889e-05
New best model saved (val_f1=0.1893)

Epoch 3/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.0455 | Val loss: 1.0331 | Val Macro F1: 0.3693 | LR: 6.18889e-05
New best model saved (val_f1=0.3693)

Epoch 4/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.8607 | Val loss: 0.9726 | Val Macro F1: 0.3577 | LR: 6.18889e-05
No improvement for 1 epoch(s)

Epoch 5/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7749 | Val loss: 0.9418 | Val Macro F1: 0.3756 | LR: 6.18889e-05
New best model saved (val_f1=0.3756)

Epoch 6/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7547 | Val loss: 0.9547 | Val Macro F1: 0.3244 | LR: 6.18889e-05
No improvement for 1 epoch(s)

Epoch 7/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6840 | Val loss: 0.9417 | Val Macro F1: 0.4313 | LR: 6.18889e-05
New best model saved (val_f1=0.4313)

Epoch 8/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7073 | Val loss: 0.8993 | Val Macro F1: 0.4406 | LR: 6.18889e-05
New best model saved (val_f1=0.4406)

Epoch 9/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6938 | Val loss: 0.8968 | Val Macro F1: 0.5032 | LR: 6.18889e-05
New best model saved (val_f1=0.5032)

Epoch 10/15
----------------------------------------


                                                                                      

Train loss: 0.6131 | Val loss: 0.6293 | Val Macro F1: 0.5281 | LR: 6.18889e-05
New best model saved (val_f1=0.5281)

Epoch 11/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5678 | Val loss: 0.6333 | Val Macro F1: 0.5004 | LR: 6.18889e-05
No improvement for 1 epoch(s)

Epoch 12/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5170 | Val loss: 0.9812 | Val Macro F1: 0.3706 | LR: 6.18889e-05
No improvement for 2 epoch(s)

Epoch 13/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5364 | Val loss: 0.5569 | Val Macro F1: 0.6495 | LR: 6.18889e-05
New best model saved (val_f1=0.6495)

Epoch 14/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5555 | Val loss: 0.6357 | Val Macro F1: 0.7403 | LR: 6.18889e-05
New best model saved (val_f1=0.7403)

Epoch 15/15
----------------------------------------


                                                                                      

Train loss: 0.5337 | Val loss: 0.7571 | Val Macro F1: 0.4644 | LR: 6.18889e-05
No improvement for 1 epoch(s)

Training complete. Best epoch: 14 (val_f1=0.7403)

Best epoch: 14
Train losses: ['1.7707', '1.4513', '1.0455', '0.8607', '0.7749', '0.7547', '0.6840', '0.7073', '0.6938', '0.6131', '0.5678', '0.5170', '0.5364', '0.5555', '0.5337']
Val losses:   ['1.7225', '1.2493', '1.0331', '0.9726', '0.9418', '0.9547', '0.9417', '0.8993', '0.8968', '0.6293', '0.6333', '0.9812', '0.5569', '0.6357', '0.7571']
  RESULT: score=0.7403  epoch=14  time=73s

──────────────────────────────────────────────────────────────────────
TRIAL: audio_tune2_04_lr=4.4e-05_wd=4.2e-05_do=0.12
  Params: {'lr': 4.426485378973789e-05, 'weight_decay': 4.2448059869261876e-05, 'dropout': 0.12, 'cwp': 0.701606628046714, 'bsp': 0.358780221990525, 'topk_pos': 0.15}
──────────────────────────────────────────────────────────────────────
Device: cuda

Total weld files: 1551
File label distribution (multiclass): {'excessive_pe

Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.7038 | Val loss: 1.8607 | Val Macro F1: 0.0267 | LR: 2.95735e-05
New best model saved (val_f1=0.0267)

Epoch 2/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.5241 | Val loss: 1.4401 | Val Macro F1: 0.0343 | LR: 4.42649e-05
New best model saved (val_f1=0.0343)

Epoch 3/15
----------------------------------------


                                                                                     

Train loss: 1.1995 | Val loss: 1.1887 | Val Macro F1: 0.1791 | LR: 4.42649e-05
New best model saved (val_f1=0.1791)

Epoch 4/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.0432 | Val loss: 1.0480 | Val Macro F1: 0.2629 | LR: 4.42649e-05
New best model saved (val_f1=0.2629)

Epoch 5/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.9107 | Val loss: 0.9445 | Val Macro F1: 0.3184 | LR: 4.42649e-05
New best model saved (val_f1=0.3184)

Epoch 6/15
----------------------------------------


                                                                                      

Train loss: 0.7929 | Val loss: 0.8845 | Val Macro F1: 0.3456 | LR: 4.42649e-05
New best model saved (val_f1=0.3456)

Epoch 7/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7421 | Val loss: 0.8948 | Val Macro F1: 0.4256 | LR: 4.42649e-05
New best model saved (val_f1=0.4256)

Epoch 8/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7180 | Val loss: 0.7766 | Val Macro F1: 0.4436 | LR: 4.42649e-05
New best model saved (val_f1=0.4436)

Epoch 9/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5980 | Val loss: 0.6614 | Val Macro F1: 0.4883 | LR: 4.42649e-05
New best model saved (val_f1=0.4883)

Epoch 10/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5782 | Val loss: 0.5888 | Val Macro F1: 0.4969 | LR: 4.42649e-05
New best model saved (val_f1=0.4969)

Epoch 11/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4840 | Val loss: 0.4987 | Val Macro F1: 0.6317 | LR: 4.42649e-05
New best model saved (val_f1=0.6317)

Epoch 12/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4200 | Val loss: 0.5578 | Val Macro F1: 0.6356 | LR: 4.42649e-05
New best model saved (val_f1=0.6356)

Epoch 13/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4179 | Val loss: 0.4749 | Val Macro F1: 0.6667 | LR: 4.42649e-05
New best model saved (val_f1=0.6667)

Epoch 14/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4155 | Val loss: 0.4615 | Val Macro F1: 0.7586 | LR: 4.42649e-05
New best model saved (val_f1=0.7586)

Epoch 15/15
----------------------------------------


                                                                                      

Train loss: 0.4134 | Val loss: 0.5124 | Val Macro F1: 0.6213 | LR: 4.42649e-05
No improvement for 1 epoch(s)

Training complete. Best epoch: 14 (val_f1=0.7586)

Best epoch: 14
Train losses: ['1.7038', '1.5241', '1.1995', '1.0432', '0.9107', '0.7929', '0.7421', '0.7180', '0.5980', '0.5782', '0.4840', '0.4200', '0.4179', '0.4155', '0.4134']
Val losses:   ['1.8607', '1.4401', '1.1887', '1.0480', '0.9445', '0.8845', '0.8948', '0.7766', '0.6614', '0.5888', '0.4987', '0.5578', '0.4749', '0.4615', '0.5124']
  RESULT: score=0.7586  epoch=14  time=73s

──────────────────────────────────────────────────────────────────────
TRIAL: audio_tune2_05_lr=4.6e-05_wd=2.0e-05_do=0.08
  Params: {'lr': 4.6266810654077917e-05, 'weight_decay': 2.0166352328351125e-05, 'dropout': 0.08, 'cwp': 0.6637808988285412, 'bsp': 0.3862531453293338, 'topk_pos': 0.18}
──────────────────────────────────────────────────────────────────────
Device: cuda

Total weld files: 1551
File label distribution (multiclass): {'excessive

Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.8655 | Val loss: 1.7413 | Val Macro F1: 0.0524 | LR: 3.0911e-05
New best model saved (val_f1=0.0524)

Epoch 2/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.3815 | Val loss: 1.2323 | Val Macro F1: 0.1913 | LR: 4.62668e-05
New best model saved (val_f1=0.1913)

Epoch 3/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.9987 | Val loss: 0.9999 | Val Macro F1: 0.2494 | LR: 4.62668e-05
New best model saved (val_f1=0.2494)

Epoch 4/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.8550 | Val loss: 0.8359 | Val Macro F1: 0.4019 | LR: 4.62668e-05
New best model saved (val_f1=0.4019)

Epoch 5/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7541 | Val loss: 0.7475 | Val Macro F1: 0.4242 | LR: 4.62668e-05
New best model saved (val_f1=0.4242)

Epoch 6/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6816 | Val loss: 0.6707 | Val Macro F1: 0.4846 | LR: 4.62668e-05
New best model saved (val_f1=0.4846)

Epoch 7/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6088 | Val loss: 0.5944 | Val Macro F1: 0.5019 | LR: 4.62668e-05
New best model saved (val_f1=0.5019)

Epoch 8/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5667 | Val loss: 0.6593 | Val Macro F1: 0.4968 | LR: 4.62668e-05
No improvement for 1 epoch(s)

Epoch 9/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5084 | Val loss: 0.6191 | Val Macro F1: 0.5097 | LR: 4.62668e-05
New best model saved (val_f1=0.5097)

Epoch 10/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5107 | Val loss: 0.5518 | Val Macro F1: 0.4708 | LR: 4.62668e-05
No improvement for 1 epoch(s)

Epoch 11/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4952 | Val loss: 0.4674 | Val Macro F1: 0.6360 | LR: 4.62668e-05
New best model saved (val_f1=0.6360)

Epoch 12/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4109 | Val loss: 0.4888 | Val Macro F1: 0.5152 | LR: 4.62668e-05
No improvement for 1 epoch(s)

Epoch 13/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4001 | Val loss: 0.3481 | Val Macro F1: 0.6599 | LR: 4.62668e-05
New best model saved (val_f1=0.6599)

Epoch 14/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3531 | Val loss: 0.4188 | Val Macro F1: 0.7050 | LR: 4.62668e-05
New best model saved (val_f1=0.7050)

Epoch 15/15
----------------------------------------


                                                                                      

Train loss: 0.3620 | Val loss: 0.5667 | Val Macro F1: 0.4884 | LR: 4.62668e-05
No improvement for 1 epoch(s)

Training complete. Best epoch: 14 (val_f1=0.7050)

Best epoch: 14
Train losses: ['1.8655', '1.3815', '0.9987', '0.8550', '0.7541', '0.6816', '0.6088', '0.5667', '0.5084', '0.5107', '0.4952', '0.4109', '0.4001', '0.3531', '0.3620']
Val losses:   ['1.7413', '1.2323', '0.9999', '0.8359', '0.7475', '0.6707', '0.5944', '0.6593', '0.6191', '0.5518', '0.4674', '0.4888', '0.3481', '0.4188', '0.5667']
  RESULT: score=0.7050  epoch=14  time=74s

AUDIO TUNING RESULTS — ALL ROUNDS (sorted by score):
  0.8144  ep=15  audio_tune2_01_lr=4.2e-05_wd=2.3e-05_do=0.12  params={'lr': 4.16554692634056e-05, 'weight_decay': 2.2848671008910937e-05, 'dropout': 0.12, 'cwp': 0.7050732298254547, 'bsp': 0.35898362989602745, 'topk_pos': 0.18}
  0.7586  ep=14  audio_tune2_04_lr=4.4e-05_wd=4.2e-05_do=0.12  params={'lr': 4.426485378973789e-05, 'weight_decay': 4.2448059869261876e-05, 'dropout': 0.12, 'cwp': 0.70

### 0b) Video tuning (round 2 — narrowed search)

Round 1 peaked at epoch 10 then oscillated wildly → LR too high after warmup, too little regularisation.
Round 2 targets: **lower LR, more dropout, shorter warmup, more LR-schedule patience**.

In [None]:
# ── Video tuning — round 2: narrowed + LR schedule params ──
_rnd.seed(777)
N_VIDEO_TRIALS_R2 = 10  # more trials for better coverage

for i in range(N_VIDEO_TRIALS_R2):
    trial_params = {
        "lr":               _sample_uniform(3e-5, 1e-4, log=True),    # lower range — peak was at warmup end
        "weight_decay":     _sample_uniform(5e-5, 2e-4, log=True),    # slightly higher reg
        "dropout":          _sample_choice([0.15, 0.18, 0.20, 0.22, 0.25]),  # more dropout
        "cwp":              _sample_uniform(0.7, 1.2),                 # wider: try lighter class weights too
        "bsp":              _sample_uniform(0.30, 0.55),               # stronger balanced sampling
        "warmup_ratio":     _sample_choice([0.03, 0.05, 0.07]),       # shorter warmup (was 0.10)
        "plateau_patience": _sample_choice([6, 8, 10]),                # slower LR decay (was 4)
    }
    trial_name = (
        f"video_tune2_{i:02d}_lr={trial_params['lr']:.1e}"
        f"_wd={trial_params['weight_decay']:.1e}"
        f"_do={trial_params['dropout']}"
    )
    overrides = {
        "video_window.training.epochs":                 TUNING_EPOCHS,
        "video_window.training.patience":               TUNING_PATIENCE,
        "video_window.training.lr":                     trial_params["lr"],
        "video_window.training.weight_decay":           trial_params["weight_decay"],
        "video_window.model.dropout":                   trial_params["dropout"],
        "video_window.training.class_weight_power":     trial_params["cwp"],
        "video_window.training.balanced_sampler_power": trial_params["bsp"],
        "video_window.training.lr_schedule.warmup_ratio":       trial_params["warmup_ratio"],
        "video_window.training.lr_schedule.plateau_patience":   trial_params["plateau_patience"],
        "video_window.training.checkpoint_dir":         f"checkpoints/_tuning/{trial_name}",
        "video_window.training.checkpoint_path":        f"checkpoints/_tuning/{trial_name}/window_classifier.pth",
    }
    result = run_trial("video.run_video", overrides, trial_params, trial_name)
    if result:
        tuning_results["video"].append(result)

# Sort all rounds together
tuning_results["video"].sort(key=lambda r: r["score"], reverse=True)
print(f"\n{'='*70}")
print("VIDEO TUNING RESULTS — ALL ROUNDS (sorted by score):")
print(f"{'='*70}")
for r in tuning_results["video"]:
    print(f"  {r['score']:.4f}  ep={r['epoch']}  {r['name']}  params={r['params']}")

if tuning_results["video"]:
    best_video = tuning_results["video"][0]
    print(f"\nBEST VIDEO: {best_video['score']:.4f} — {best_video['params']}")
else:
    print("\nNo successful video trials")


──────────────────────────────────────────────────────────────────────
TRIAL: video_tune_00_lr=7.3e-05_wd=3.5e-05_do=0.15
  Params: {'lr': 7.285008052564927e-05, 'weight_decay': 3.451931166808711e-05, 'dropout': 0.15, 'cwp': 0.929966143759415, 'bsp': 0.40962587911858944}
──────────────────────────────────────────────────────────────────────
Device: cuda

Discovering video files in /data1/malto/therness/data/Hackathon...
       Scanning good_weld...
       Scanning defect-weld...
Found 1551 videos
  Code 00 (class 0): 731 videos
  Code 01 (class 1): 259 videos
  Code 02 (class 2): 169 videos
  Code 06 (class 3): 79 videos
  Code 07 (class 4): 158 videos
  Code 08 (class 5): 80 videos
  Code 11 (class 6): 75 videos
Train: 1268 videos | Val: 283 videos
Using pre-extracted frames from data/video_frames
Train: 1268 | Val: 283 | num_frames=12 [JPEG]
Balanced sampler: enabled (power=0.40962587911858944, videos/epoch=1268)
Model parameters: 190,759
Class weights: ['0.173', '0.508', '0.744', '

Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9489 | Train F1: 0.0454 | Val loss: 1.9983 | Val Macro F1: 0.0789 | Val Binary F1: 0.7906 | Hackathon: 0.5059 | LR: 4.85667e-05
New best model (hackathon_score=0.5059)

Epoch 2/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8973 | Train F1: 0.0646 | Val loss: 1.9604 | Val Macro F1: 0.1418 | Val Binary F1: 0.7906 | Hackathon: 0.5311 | LR: 7.28501e-05
New best model (hackathon_score=0.5311)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8275 | Train F1: 0.1344 | Val loss: 1.8568 | Val Macro F1: 0.1122 | Val Binary F1: 0.7906 | Hackathon: 0.5193 | LR: 7.28501e-05
No improvement for 1 epoch(s)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7400 | Train F1: 0.1746 | Val loss: 1.7732 | Val Macro F1: 0.1365 | Val Binary F1: 0.7906 | Hackathon: 0.5290 | LR: 7.28501e-05
No improvement for 2 epoch(s)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6707 | Train F1: 0.2073 | Val loss: 1.6941 | Val Macro F1: 0.2363 | Val Binary F1: 0.7906 | Hackathon: 0.5689 | LR: 7.28501e-05
New best model (hackathon_score=0.5689)

Epoch 6/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6220 | Train F1: 0.1953 | Val loss: 1.5983 | Val Macro F1: 0.3911 | Val Binary F1: 0.7906 | Hackathon: 0.6308 | LR: 7.28501e-05
New best model (hackathon_score=0.6308)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5769 | Train F1: 0.2374 | Val loss: 1.5997 | Val Macro F1: 0.2494 | Val Binary F1: 0.7906 | Hackathon: 0.5741 | LR: 7.28501e-05
No improvement for 1 epoch(s)

Epoch 8/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5738 | Train F1: 0.2838 | Val loss: 1.5704 | Val Macro F1: 0.4361 | Val Binary F1: 0.7906 | Hackathon: 0.6488 | LR: 7.28501e-05
New best model (hackathon_score=0.6488)

Epoch 9/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5033 | Train F1: 0.3305 | Val loss: 1.5708 | Val Macro F1: 0.2740 | Val Binary F1: 0.7906 | Hackathon: 0.5840 | LR: 7.28501e-05
No improvement for 1 epoch(s)

Epoch 10/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4397 | Train F1: 0.3098 | Val loss: 1.5018 | Val Macro F1: 0.4182 | Val Binary F1: 0.7906 | Hackathon: 0.6416 | LR: 7.28501e-05
No improvement for 2 epoch(s)

Epoch 11/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3894 | Train F1: 0.3854 | Val loss: 1.4861 | Val Macro F1: 0.3562 | Val Binary F1: 0.7906 | Hackathon: 0.6168 | LR: 7.28501e-05
No improvement for 3 epoch(s)

Epoch 12/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3679 | Train F1: 0.4318 | Val loss: 1.4613 | Val Macro F1: 0.5183 | Val Binary F1: 0.7906 | Hackathon: 0.6817 | LR: 7.28501e-05
New best model (hackathon_score=0.6817)

Epoch 13/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3356 | Train F1: 0.4662 | Val loss: 1.4837 | Val Macro F1: 0.4866 | Val Binary F1: 0.7906 | Hackathon: 0.6690 | LR: 7.28501e-05
No improvement for 1 epoch(s)

Epoch 14/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3221 | Train F1: 0.4649 | Val loss: 1.4920 | Val Macro F1: 0.5291 | Val Binary F1: 0.7906 | Hackathon: 0.6860 | LR: 7.28501e-05
New best model (hackathon_score=0.6860)

Epoch 15/15
----------------------------------------


                                                                    

Train loss: 1.2980 | Train F1: 0.5001 | Val loss: 1.4174 | Val Macro F1: 0.5175 | Val Binary F1: 0.7906 | Hackathon: 0.6814 | LR: 7.28501e-05
No improvement for 1 epoch(s)

Training complete. Best epoch: 14 (score=0.6860)
  RESULT: score=0.6860  epoch=14  time=45s

──────────────────────────────────────────────────────────────────────
TRIAL: video_tune_01_lr=1.4e-04_wd=5.5e-05_do=0.18
  Params: {'lr': 0.00013635134611198366, 'weight_decay': 5.522271961852559e-05, 'dropout': 0.18, 'cwp': 0.9496593095529039, 'bsp': 0.4130216547323422}
──────────────────────────────────────────────────────────────────────
Device: cuda

Discovering video files in /data1/malto/therness/data/Hackathon...
       Scanning good_weld...
       Scanning defect-weld...
Found 1551 videos
  Code 00 (class 0): 731 videos
  Code 01 (class 1): 259 videos
  Code 02 (class 2): 169 videos
  Code 06 (class 3): 79 videos
  Code 07 (class 4): 158 videos
  Code 08 (class 5): 80 videos
  Code 11 (class 6): 75 videos
Train: 126

Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9554 | Train F1: 0.0470 | Val loss: 1.9839 | Val Macro F1: 0.1230 | Val Binary F1: 0.7906 | Hackathon: 0.5235 | LR: 9.09009e-05
New best model (hackathon_score=0.5235)

Epoch 2/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8779 | Train F1: 0.0865 | Val loss: 1.9250 | Val Macro F1: 0.1163 | Val Binary F1: 0.7906 | Hackathon: 0.5209 | LR: 0.000136351
No improvement for 1 epoch(s)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7843 | Train F1: 0.1678 | Val loss: 1.8062 | Val Macro F1: 0.1580 | Val Binary F1: 0.7906 | Hackathon: 0.5376 | LR: 0.000136351
New best model (hackathon_score=0.5376)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6876 | Train F1: 0.1954 | Val loss: 1.7294 | Val Macro F1: 0.2384 | Val Binary F1: 0.7906 | Hackathon: 0.5697 | LR: 0.000136351
New best model (hackathon_score=0.5697)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6320 | Train F1: 0.2284 | Val loss: 1.6624 | Val Macro F1: 0.2445 | Val Binary F1: 0.7906 | Hackathon: 0.5722 | LR: 0.000136351
New best model (hackathon_score=0.5722)

Epoch 6/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5820 | Train F1: 0.2258 | Val loss: 1.6208 | Val Macro F1: 0.4152 | Val Binary F1: 0.7906 | Hackathon: 0.6404 | LR: 0.000136351
New best model (hackathon_score=0.6404)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5375 | Train F1: 0.2850 | Val loss: 1.6058 | Val Macro F1: 0.2768 | Val Binary F1: 0.7906 | Hackathon: 0.5851 | LR: 0.000136351
No improvement for 1 epoch(s)

Epoch 8/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5176 | Train F1: 0.3262 | Val loss: 1.6131 | Val Macro F1: 0.4603 | Val Binary F1: 0.7880 | Hackathon: 0.6569 | LR: 0.000136351
New best model (hackathon_score=0.6569)

Epoch 9/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3883 | Train F1: 0.4116 | Val loss: 1.5487 | Val Macro F1: 0.5132 | Val Binary F1: 0.7906 | Hackathon: 0.6796 | LR: 0.000136351
New best model (hackathon_score=0.6796)

Epoch 10/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3298 | Train F1: 0.4317 | Val loss: 1.5209 | Val Macro F1: 0.2549 | Val Binary F1: 0.7906 | Hackathon: 0.5763 | LR: 0.000136351
No improvement for 1 epoch(s)

Epoch 11/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3080 | Train F1: 0.4246 | Val loss: 1.6070 | Val Macro F1: 0.2815 | Val Binary F1: 0.7906 | Hackathon: 0.5870 | LR: 0.000136351
No improvement for 2 epoch(s)

Epoch 12/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3245 | Train F1: 0.4319 | Val loss: 1.4904 | Val Macro F1: 0.4970 | Val Binary F1: 0.7974 | Hackathon: 0.6773 | LR: 0.000136351
No improvement for 3 epoch(s)

Epoch 13/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2901 | Train F1: 0.4722 | Val loss: 1.5129 | Val Macro F1: 0.4400 | Val Binary F1: 0.8696 | Hackathon: 0.6978 | LR: 0.000136351
New best model (hackathon_score=0.6978)

Epoch 14/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2258 | Train F1: 0.4896 | Val loss: 1.5541 | Val Macro F1: 0.2908 | Val Binary F1: 0.7906 | Hackathon: 0.5907 | LR: 0.000136351
No improvement for 1 epoch(s)

Epoch 15/15
----------------------------------------


                                                                    

Train loss: 1.1875 | Train F1: 0.4864 | Val loss: 1.5474 | Val Macro F1: 0.1596 | Val Binary F1: 0.7906 | Hackathon: 0.5382 | LR: 0.000136351
No improvement for 2 epoch(s)

Training complete. Best epoch: 13 (score=0.6978)
  RESULT: score=0.6978  epoch=13  time=45s

──────────────────────────────────────────────────────────────────────
TRIAL: video_tune_02_lr=7.9e-05_wd=5.2e-05_do=0.14
  Params: {'lr': 7.905788845643617e-05, 'weight_decay': 5.1620901294495306e-05, 'dropout': 0.14, 'cwp': 1.060506936710693, 'bsp': 0.24094421118809012}
──────────────────────────────────────────────────────────────────────
Device: cuda

Discovering video files in /data1/malto/therness/data/Hackathon...
       Scanning good_weld...
       Scanning defect-weld...
Found 1551 videos
  Code 00 (class 0): 731 videos
  Code 01 (class 1): 259 videos
  Code 02 (class 2): 169 videos
  Code 06 (class 3): 79 videos
  Code 07 (class 4): 158 videos
  Code 08 (class 5): 80 videos
  Code 11 (class 6): 75 videos
Train: 126

Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9618 | Train F1: 0.0300 | Val loss: 1.9996 | Val Macro F1: 0.0274 | Val Binary F1: 0.7906 | Hackathon: 0.4853 | LR: 5.27053e-05
New best model (hackathon_score=0.4853)

Epoch 2/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9156 | Train F1: 0.0422 | Val loss: 1.9724 | Val Macro F1: 0.1343 | Val Binary F1: 0.7906 | Hackathon: 0.5281 | LR: 7.90579e-05
New best model (hackathon_score=0.5281)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8536 | Train F1: 0.0959 | Val loss: 1.8949 | Val Macro F1: 0.1147 | Val Binary F1: 0.7906 | Hackathon: 0.5202 | LR: 7.90579e-05
No improvement for 1 epoch(s)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7802 | Train F1: 0.1225 | Val loss: 1.8163 | Val Macro F1: 0.1376 | Val Binary F1: 0.7906 | Hackathon: 0.5294 | LR: 7.90579e-05
New best model (hackathon_score=0.5294)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7325 | Train F1: 0.1688 | Val loss: 1.7457 | Val Macro F1: 0.1883 | Val Binary F1: 0.7906 | Hackathon: 0.5497 | LR: 7.90579e-05
New best model (hackathon_score=0.5497)

Epoch 6/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6926 | Train F1: 0.1777 | Val loss: 1.6791 | Val Macro F1: 0.2177 | Val Binary F1: 0.7906 | Hackathon: 0.5614 | LR: 7.90579e-05
New best model (hackathon_score=0.5614)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6279 | Train F1: 0.2308 | Val loss: 1.6237 | Val Macro F1: 0.3639 | Val Binary F1: 0.7906 | Hackathon: 0.6199 | LR: 7.90579e-05
New best model (hackathon_score=0.6199)

Epoch 8/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6209 | Train F1: 0.2039 | Val loss: 1.6049 | Val Macro F1: 0.2449 | Val Binary F1: 0.7906 | Hackathon: 0.5723 | LR: 7.90579e-05
No improvement for 1 epoch(s)

Epoch 9/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5915 | Train F1: 0.2451 | Val loss: 1.5514 | Val Macro F1: 0.3190 | Val Binary F1: 0.7906 | Hackathon: 0.6020 | LR: 7.90579e-05
No improvement for 2 epoch(s)

Epoch 10/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5727 | Train F1: 0.2489 | Val loss: 1.5514 | Val Macro F1: 0.4521 | Val Binary F1: 0.7906 | Hackathon: 0.6552 | LR: 7.90579e-05
New best model (hackathon_score=0.6552)

Epoch 11/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4914 | Train F1: 0.3175 | Val loss: 1.5706 | Val Macro F1: 0.3229 | Val Binary F1: 0.7906 | Hackathon: 0.6035 | LR: 7.90579e-05
No improvement for 1 epoch(s)

Epoch 12/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4953 | Train F1: 0.3533 | Val loss: 1.5123 | Val Macro F1: 0.4703 | Val Binary F1: 0.7906 | Hackathon: 0.6625 | LR: 7.90579e-05
New best model (hackathon_score=0.6625)

Epoch 13/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4886 | Train F1: 0.3603 | Val loss: 1.5475 | Val Macro F1: 0.4562 | Val Binary F1: 0.7906 | Hackathon: 0.6568 | LR: 7.90579e-05
No improvement for 1 epoch(s)

Epoch 14/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4387 | Train F1: 0.3693 | Val loss: 1.3806 | Val Macro F1: 0.4421 | Val Binary F1: 0.7906 | Hackathon: 0.6512 | LR: 7.90579e-05
No improvement for 2 epoch(s)

Epoch 15/15
----------------------------------------


                                                                    

Train loss: 1.3743 | Train F1: 0.4199 | Val loss: 1.5493 | Val Macro F1: 0.5138 | Val Binary F1: 0.7906 | Hackathon: 0.6799 | LR: 7.90579e-05
New best model (hackathon_score=0.6799)

Training complete. Best epoch: 15 (score=0.6799)
  RESULT: score=0.6799  epoch=15  time=46s

──────────────────────────────────────────────────────────────────────
TRIAL: video_tune_03_lr=1.4e-04_wd=1.0e-04_do=0.18
  Params: {'lr': 0.00013961968004333973, 'weight_decay': 0.00010419240159677894, 'dropout': 0.18, 'cwp': 0.9632784330916634, 'bsp': 0.20164876005697804}
──────────────────────────────────────────────────────────────────────
Device: cuda

Discovering video files in /data1/malto/therness/data/Hackathon...
       Scanning good_weld...
       Scanning defect-weld...
Found 1551 videos
  Code 00 (class 0): 731 videos
  Code 01 (class 1): 259 videos
  Code 02 (class 2): 169 videos
  Code 06 (class 3): 79 videos
  Code 07 (class 4): 158 videos
  Code 08 (class 5): 80 videos
  Code 11 (class 6): 75 video

Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9889 | Train F1: 0.0492 | Val loss: 1.9922 | Val Macro F1: 0.0350 | Val Binary F1: 0.7906 | Hackathon: 0.4884 | LR: 9.30798e-05
New best model (hackathon_score=0.4884)

Epoch 2/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9130 | Train F1: 0.0633 | Val loss: 1.9447 | Val Macro F1: 0.1433 | Val Binary F1: 0.7906 | Hackathon: 0.5317 | LR: 0.00013962
New best model (hackathon_score=0.5317)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8859 | Train F1: 0.1149 | Val loss: 1.8847 | Val Macro F1: 0.1105 | Val Binary F1: 0.7906 | Hackathon: 0.5186 | LR: 0.00013962
No improvement for 1 epoch(s)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8080 | Train F1: 0.1511 | Val loss: 1.8149 | Val Macro F1: 0.2411 | Val Binary F1: 0.7906 | Hackathon: 0.5708 | LR: 0.00013962
New best model (hackathon_score=0.5708)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7555 | Train F1: 0.2014 | Val loss: 1.7514 | Val Macro F1: 0.3196 | Val Binary F1: 0.7906 | Hackathon: 0.6022 | LR: 0.00013962
New best model (hackathon_score=0.6022)

Epoch 6/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7104 | Train F1: 0.1980 | Val loss: 1.7054 | Val Macro F1: 0.3790 | Val Binary F1: 0.7906 | Hackathon: 0.6260 | LR: 0.00013962
New best model (hackathon_score=0.6260)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6663 | Train F1: 0.2403 | Val loss: 1.7058 | Val Macro F1: 0.1486 | Val Binary F1: 0.7906 | Hackathon: 0.5338 | LR: 0.00013962
No improvement for 1 epoch(s)

Epoch 8/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6460 | Train F1: 0.2517 | Val loss: 1.7058 | Val Macro F1: 0.1046 | Val Binary F1: 0.7819 | Hackathon: 0.5109 | LR: 0.00013962
No improvement for 2 epoch(s)

Epoch 9/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6203 | Train F1: 0.2651 | Val loss: 1.6766 | Val Macro F1: 0.5843 | Val Binary F1: 0.8048 | Hackathon: 0.7166 | LR: 0.00013962
New best model (hackathon_score=0.7166)

Epoch 10/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5912 | Train F1: 0.3241 | Val loss: 1.6009 | Val Macro F1: 0.3704 | Val Binary F1: 0.7557 | Hackathon: 0.6015 | LR: 0.00013962
No improvement for 1 epoch(s)

Epoch 11/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5111 | Train F1: 0.3763 | Val loss: 1.6226 | Val Macro F1: 0.5543 | Val Binary F1: 0.7720 | Hackathon: 0.6849 | LR: 0.00013962
No improvement for 2 epoch(s)

Epoch 12/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5248 | Train F1: 0.3906 | Val loss: 1.5915 | Val Macro F1: 0.4672 | Val Binary F1: 0.7800 | Hackathon: 0.6549 | LR: 0.00013962
No improvement for 3 epoch(s)

Epoch 13/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5191 | Train F1: 0.3782 | Val loss: 1.7147 | Val Macro F1: 0.1104 | Val Binary F1: 0.7906 | Hackathon: 0.5185 | LR: 0.00013962
No improvement for 4 epoch(s)

Epoch 14/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4886 | Train F1: 0.3793 | Val loss: 1.6682 | Val Macro F1: 0.2470 | Val Binary F1: 0.7923 | Hackathon: 0.5742 | LR: 0.00013962
No improvement for 5 epoch(s)

Epoch 15/15
----------------------------------------


                                                                    

Train loss: 1.4538 | Train F1: 0.4070 | Val loss: 1.6033 | Val Macro F1: 0.3590 | Val Binary F1: 0.7906 | Hackathon: 0.6180 | LR: 0.00013962
No improvement for 6 epoch(s)

Training complete. Best epoch: 9 (score=0.7166)
  RESULT: score=0.7166  epoch=9  time=46s

──────────────────────────────────────────────────────────────────────
TRIAL: video_tune_04_lr=1.2e-04_wd=3.5e-05_do=0.1
  Params: {'lr': 0.00012229200603954495, 'weight_decay': 3.535426901910123e-05, 'dropout': 0.1, 'cwp': 0.8778293684135485, 'bsp': 0.23554376610787528}
──────────────────────────────────────────────────────────────────────
Device: cuda

Discovering video files in /data1/malto/therness/data/Hackathon...
       Scanning good_weld...
       Scanning defect-weld...
Found 1551 videos
  Code 00 (class 0): 731 videos
  Code 01 (class 1): 259 videos
  Code 02 (class 2): 169 videos
  Code 06 (class 3): 79 videos
  Code 07 (class 4): 158 videos
  Code 08 (class 5): 80 videos
  Code 11 (class 6): 75 videos
Train: 1268 vi

Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9732 | Train F1: 0.0306 | Val loss: 1.9725 | Val Macro F1: 0.1230 | Val Binary F1: 0.7906 | Hackathon: 0.5235 | LR: 8.1528e-05
New best model (hackathon_score=0.5235)

Epoch 2/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8987 | Train F1: 0.0796 | Val loss: 1.8722 | Val Macro F1: 0.1159 | Val Binary F1: 0.7906 | Hackathon: 0.5207 | LR: 0.000122292
No improvement for 1 epoch(s)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7552 | Train F1: 0.1544 | Val loss: 1.7099 | Val Macro F1: 0.2326 | Val Binary F1: 0.7906 | Hackathon: 0.5674 | LR: 0.000122292
New best model (hackathon_score=0.5674)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6465 | Train F1: 0.1873 | Val loss: 1.6096 | Val Macro F1: 0.2257 | Val Binary F1: 0.7906 | Hackathon: 0.5646 | LR: 0.000122292
No improvement for 1 epoch(s)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5903 | Train F1: 0.3205 | Val loss: 1.5530 | Val Macro F1: 0.3773 | Val Binary F1: 0.7805 | Hackathon: 0.6192 | LR: 0.000122292
New best model (hackathon_score=0.6192)

Epoch 6/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5606 | Train F1: 0.3667 | Val loss: 1.5739 | Val Macro F1: 0.4782 | Val Binary F1: 0.7948 | Hackathon: 0.6682 | LR: 0.000122292
New best model (hackathon_score=0.6682)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4951 | Train F1: 0.4301 | Val loss: 1.4964 | Val Macro F1: 0.4431 | Val Binary F1: 0.7983 | Hackathon: 0.6562 | LR: 0.000122292
No improvement for 1 epoch(s)

Epoch 8/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4155 | Train F1: 0.4860 | Val loss: 1.4902 | Val Macro F1: 0.4248 | Val Binary F1: 0.7888 | Hackathon: 0.6432 | LR: 0.000122292
No improvement for 2 epoch(s)

Epoch 9/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3748 | Train F1: 0.5012 | Val loss: 1.5537 | Val Macro F1: 0.2344 | Val Binary F1: 0.7906 | Hackathon: 0.5681 | LR: 0.000122292
No improvement for 3 epoch(s)

Epoch 10/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3471 | Train F1: 0.5447 | Val loss: 1.4415 | Val Macro F1: 0.7223 | Val Binary F1: 0.9019 | Hackathon: 0.8300 | LR: 0.000122292
New best model (hackathon_score=0.8300)

Epoch 11/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2186 | Train F1: 0.6359 | Val loss: 1.3465 | Val Macro F1: 0.5569 | Val Binary F1: 0.8894 | Hackathon: 0.7564 | LR: 0.000122292
No improvement for 1 epoch(s)

Epoch 12/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2497 | Train F1: 0.5957 | Val loss: 1.3540 | Val Macro F1: 0.3008 | Val Binary F1: 0.7991 | Hackathon: 0.5998 | LR: 0.000122292
No improvement for 2 epoch(s)

Epoch 13/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2445 | Train F1: 0.5849 | Val loss: 1.5653 | Val Macro F1: 0.1733 | Val Binary F1: 0.7906 | Hackathon: 0.5437 | LR: 0.000122292
No improvement for 3 epoch(s)

Epoch 14/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2422 | Train F1: 0.5643 | Val loss: 1.5216 | Val Macro F1: 0.2613 | Val Binary F1: 0.7906 | Hackathon: 0.5789 | LR: 0.000122292
No improvement for 4 epoch(s)

Epoch 15/15
----------------------------------------


                                                                    

Train loss: 1.1705 | Train F1: 0.6399 | Val loss: 1.5012 | Val Macro F1: 0.2634 | Val Binary F1: 0.7906 | Hackathon: 0.5797 | LR: 0.000122292
No improvement for 5 epoch(s)

Training complete. Best epoch: 10 (score=0.8300)
  RESULT: score=0.8300  epoch=10  time=46s

──────────────────────────────────────────────────────────────────────
TRIAL: video_tune_05_lr=1.3e-04_wd=3.1e-05_do=0.15
  Params: {'lr': 0.00012782225149341223, 'weight_decay': 3.104306284695298e-05, 'dropout': 0.15, 'cwp': 1.022022425704278, 'bsp': 0.26633042631539083}
──────────────────────────────────────────────────────────────────────
Device: cuda

Discovering video files in /data1/malto/therness/data/Hackathon...
       Scanning good_weld...
       Scanning defect-weld...
Found 1551 videos
  Code 00 (class 0): 731 videos
  Code 01 (class 1): 259 videos
  Code 02 (class 2): 169 videos
  Code 06 (class 3): 79 videos
  Code 07 (class 4): 158 videos
  Code 08 (class 5): 80 videos
  Code 11 (class 6): 75 videos
Train: 126

Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9442 | Train F1: 0.0309 | Val loss: 1.9668 | Val Macro F1: 0.1459 | Val Binary F1: 0.7906 | Hackathon: 0.5327 | LR: 8.52148e-05
New best model (hackathon_score=0.5327)

Epoch 2/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8677 | Train F1: 0.1375 | Val loss: 1.8882 | Val Macro F1: 0.1099 | Val Binary F1: 0.7906 | Hackathon: 0.5183 | LR: 0.000127822
No improvement for 1 epoch(s)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7322 | Train F1: 0.1885 | Val loss: 1.7203 | Val Macro F1: 0.1233 | Val Binary F1: 0.7906 | Hackathon: 0.5237 | LR: 0.000127822
No improvement for 2 epoch(s)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6298 | Train F1: 0.1819 | Val loss: 1.6348 | Val Macro F1: 0.3021 | Val Binary F1: 0.7906 | Hackathon: 0.5952 | LR: 0.000127822
New best model (hackathon_score=0.5952)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5770 | Train F1: 0.2630 | Val loss: 1.6199 | Val Macro F1: 0.2495 | Val Binary F1: 0.7906 | Hackathon: 0.5742 | LR: 0.000127822
No improvement for 1 epoch(s)

Epoch 6/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5349 | Train F1: 0.3128 | Val loss: 1.5741 | Val Macro F1: 0.3251 | Val Binary F1: 0.7906 | Hackathon: 0.6044 | LR: 0.000127822
New best model (hackathon_score=0.6044)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4603 | Train F1: 0.3630 | Val loss: 1.5015 | Val Macro F1: 0.4756 | Val Binary F1: 0.7906 | Hackathon: 0.6646 | LR: 0.000127822
New best model (hackathon_score=0.6646)

Epoch 8/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3312 | Train F1: 0.4538 | Val loss: 1.5785 | Val Macro F1: 0.4382 | Val Binary F1: 0.7906 | Hackathon: 0.6497 | LR: 0.000127822
No improvement for 1 epoch(s)

Epoch 9/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3182 | Train F1: 0.4433 | Val loss: 1.4111 | Val Macro F1: 0.3653 | Val Binary F1: 0.8186 | Hackathon: 0.6373 | LR: 0.000127822
No improvement for 2 epoch(s)

Epoch 10/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2269 | Train F1: 0.5068 | Val loss: 1.2989 | Val Macro F1: 0.4517 | Val Binary F1: 0.7906 | Hackathon: 0.6551 | LR: 0.000127822
No improvement for 3 epoch(s)

Epoch 11/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1657 | Train F1: 0.5424 | Val loss: 1.4802 | Val Macro F1: 0.4824 | Val Binary F1: 0.7906 | Hackathon: 0.6673 | LR: 0.000127822
New best model (hackathon_score=0.6673)

Epoch 12/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1531 | Train F1: 0.5284 | Val loss: 1.3722 | Val Macro F1: 0.2726 | Val Binary F1: 0.7906 | Hackathon: 0.5834 | LR: 0.000127822
No improvement for 1 epoch(s)

Epoch 13/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1099 | Train F1: 0.5395 | Val loss: 1.3862 | Val Macro F1: 0.4344 | Val Binary F1: 0.8070 | Hackathon: 0.6580 | LR: 0.000127822
No improvement for 2 epoch(s)

Epoch 14/15
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.0357 | Train F1: 0.5529 | Val loss: 1.3983 | Val Macro F1: 0.5843 | Val Binary F1: 0.8204 | Hackathon: 0.7260 | LR: 0.000127822
New best model (hackathon_score=0.7260)

Epoch 15/15
----------------------------------------


                                                                     

Train loss: 0.9591 | Train F1: 0.5833 | Val loss: 2.4001 | Val Macro F1: 0.0071 | Val Binary F1: 0.7906 | Hackathon: 0.4772 | LR: 0.000127822
No improvement for 1 epoch(s)

Training complete. Best epoch: 14 (score=0.7260)
  RESULT: score=0.7260  epoch=14  time=46s

VIDEO TUNING RESULTS (sorted by score):
  0.8300  ep=10  video_tune_04_lr=1.2e-04_wd=3.5e-05_do=0.1  params={'lr': 0.00012229200603954495, 'weight_decay': 3.535426901910123e-05, 'dropout': 0.1, 'cwp': 0.8778293684135485, 'bsp': 0.23554376610787528}
  0.7260  ep=14  video_tune_05_lr=1.3e-04_wd=3.1e-05_do=0.15  params={'lr': 0.00012782225149341223, 'weight_decay': 3.104306284695298e-05, 'dropout': 0.15, 'cwp': 1.022022425704278, 'bsp': 0.26633042631539083}
  0.7166  ep=9  video_tune_03_lr=1.4e-04_wd=1.0e-04_do=0.18  params={'lr': 0.00013961968004333973, 'weight_decay': 0.00010419240159677894, 'dropout': 0.18, 'cwp': 0.9632784330916634, 'bsp': 0.20164876005697804}
  0.6978  ep=13  video_tune_01_lr=1.4e-04_wd=5.5e-05_do=0.18  pa

### 0c) Apply best tuning results to full-training config

This cell updates the pipeline config with the best hyperparameters found above, then rewrites the config JSON for the 100-epoch runs. Fusion tuning runs *after* audio + video training (needs their checkpoints).

In [None]:
# ── Apply best audio params ──
if tuning_results["audio"]:
    ba = tuning_results["audio"][0]["params"]
    cfg["audio"]["training"]["lr"]            = ba["lr"]
    cfg["audio"]["training"]["weight_decay"]  = ba["weight_decay"]
    cfg["audio"]["model"]["dropout"]          = ba["dropout"]
    cfg["audio"]["training"]["sequence_mil"]["class_weight_power"]     = ba["cwp"]
    cfg["audio"]["training"]["sequence_mil"]["balanced_sampler_power"] = ba["bsp"]
    cfg["audio"]["training"]["sequence_mil"]["topk_ratio_pos"]        = ba["topk_pos"]
    cfg["audio"]["training"]["sequence_mil"]["eval_pool_ratio"]       = ba["topk_pos"]
    print(f"Applied best AUDIO params (tuning score {tuning_results['audio'][0]['score']:.4f}):")
    print(f"  lr={ba['lr']:.2e}  wd={ba['weight_decay']:.2e}  dropout={ba['dropout']}")
    print(f"  cwp={ba['cwp']:.3f}  bsp={ba['bsp']:.3f}  topk_pos={ba['topk_pos']}")
else:
    print("No audio tuning results — keeping default params")

# ── Apply best video params ──
if tuning_results["video"]:
    bv = tuning_results["video"][0]["params"]
    cfg["video_window"]["training"]["lr"]                     = bv["lr"]
    cfg["video_window"]["training"]["weight_decay"]           = bv["weight_decay"]
    cfg["video_window"]["model"]["dropout"]                   = bv["dropout"]
    cfg["video_window"]["training"]["class_weight_power"]     = bv["cwp"]
    cfg["video_window"]["training"]["balanced_sampler_power"] = bv["bsp"]
    # Apply LR schedule params if present (from round 2 tuning)
    if "warmup_ratio" in bv:
        lr_sched = cfg["video_window"]["training"].setdefault("lr_schedule", {})
        lr_sched["warmup_ratio"] = bv["warmup_ratio"]
        lr_sched["plateau_patience"] = bv["plateau_patience"]
    print(f"\nApplied best VIDEO params (tuning score {tuning_results['video'][0]['score']:.4f}):")
    print(f"  lr={bv['lr']:.2e}  wd={bv['weight_decay']:.2e}  dropout={bv['dropout']}")
    print(f"  cwp={bv['cwp']:.3f}  bsp={bv['bsp']:.3f}")
    if "warmup_ratio" in bv:
        print(f"  warmup_ratio={bv['warmup_ratio']}  plateau_patience={bv['plateau_patience']}")
else:
    print("\nNo video tuning results — keeping default params")

# ── Restore full-training settings ──
cfg["audio"]["training"]["num_epochs"]       = 100
cfg["audio"]["training"]["patience"]         = 30
cfg["audio"]["training"]["checkpoint_dir"]   = "checkpoints/audio_best_100"
cfg["video_window"]["training"]["epochs"]    = 100
cfg["video_window"]["training"]["patience"]  = 30
cfg["video_window"]["training"]["checkpoint_dir"]   = "checkpoints/video_best_100"
cfg["video_window"]["training"]["checkpoint_path"]  = "checkpoints/video_best_100/window_classifier.pth"

# ── Write each model's config to its OWN file (no git conflicts) ──
audio_cfg_path.write_text(json.dumps(cfg, indent=2))
video_cfg_path.write_text(json.dumps(cfg, indent=2))
print(f"\nUpdated configs:")
print(f"  audio  → {audio_cfg_path.name}")
print(f"  video  → {video_cfg_path.name}")
print("(fusion config will be merged before fusion training)")

Applied best AUDIO params (tuning score 0.8144):
  lr=4.17e-05  wd=2.28e-05  dropout=0.12
  cwp=0.705  bsp=0.359  topk_pos=0.18

Applied best VIDEO params (tuning score 0.8300):
  lr=1.22e-04  wd=3.54e-05  dropout=0.1
  cwp=0.878  bsp=0.236

Updated configs:
  audio  → audio_pipeline.json
  video  → video_pipeline.json
(fusion config will be merged before fusion training)


## 1) Train audio model (100 epochs)

In [23]:
run_cmd([
    sys.executable, '-u', '-m', 'audio.run_audio',
    '--config', str(audio_cfg_path),
])

assert audio_ckpt.exists(), f"Audio checkpoint not found: {audio_ckpt}"
print(f"Audio best checkpoint: {audio_ckpt}")


RUN: /home/alolli/miniconda3/envs/therness_env/bin/python -u -m audio.run_audio --config /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/configs/audio_pipeline.json
Device: cuda

Total weld files: 1551
File label distribution (multiclass): {'excessive_penetration': 259, 'good_weld': 731, 'excessive_convexity': 80, 'lack_of_fusion': 158, 'crater_cracks': 75, 'burnthrough': 169, 'overlap': 79}
Split strategy: stratified
Train welds: 1240 | Val welds: 311
File split stats | train={ burnthrough: 135 (10.9%), crater_cracks: 60 (4.8%), excessive_convexity: 64 (5.2%), excessive_penetration: 207 (16.7%), good_weld: 585 (47.2%), lack_of_fusion: 126 (10.2%), overlap: 63 (5.1%) } | val={ burnthrough: 34 (10.9%), crater_cracks: 15 (4.8%), excessive_convexity: 16 (5.1%), excessive_penetration: 52 (16.7%), good_weld: 146 (46.9%), lack_of_fusion: 32 (10.3%), overlap: 16 (5.1%) }
Train files: 1240 | Val files: 311
Classes (7): {'burnthrough': 0, 'crater_cracks': 1, 'excessive_convexity

Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.8592 | Val loss: 2.0505 | Val Macro F1: 0.0132 | LR: 4.16555e-06
New best model saved (val_f1=0.0132)

Epoch 2/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.8482 | Val loss: 1.9777 | Val Macro F1: 0.0133 | LR: 8.33109e-06
New best model saved (val_f1=0.0133)

Epoch 3/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.7272 | Val loss: 1.8524 | Val Macro F1: 0.0131 | LR: 1.24966e-05
No improvement for 1 epoch(s)

Epoch 4/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.6007 | Val loss: 1.6080 | Val Macro F1: 0.0319 | LR: 1.66622e-05
New best model saved (val_f1=0.0319)

Epoch 5/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.3466 | Val loss: 1.3115 | Val Macro F1: 0.1573 | LR: 2.08277e-05
New best model saved (val_f1=0.1573)

Epoch 6/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.1167 | Val loss: 1.1233 | Val Macro F1: 0.2458 | LR: 2.49933e-05
New best model saved (val_f1=0.2458)

Epoch 7/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.9202 | Val loss: 0.8866 | Val Macro F1: 0.3367 | LR: 2.91588e-05
New best model saved (val_f1=0.3367)

Epoch 8/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7244 | Val loss: 0.7673 | Val Macro F1: 0.3376 | LR: 3.33244e-05
New best model saved (val_f1=0.3376)

Epoch 9/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6407 | Val loss: 0.6974 | Val Macro F1: 0.3058 | LR: 3.74899e-05
No improvement for 1 epoch(s)

Epoch 10/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6169 | Val loss: 0.6567 | Val Macro F1: 0.3675 | LR: 4.16555e-05
New best model saved (val_f1=0.3675)

Epoch 11/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4896 | Val loss: 0.5011 | Val Macro F1: 0.4154 | LR: 4.16555e-05
New best model saved (val_f1=0.4154)

Epoch 12/100
----------------------------------------


                                                                                      

Train loss: 0.3811 | Val loss: 0.4342 | Val Macro F1: 0.4813 | LR: 4.16555e-05
New best model saved (val_f1=0.4813)

Epoch 13/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3527 | Val loss: 0.4429 | Val Macro F1: 0.6237 | LR: 4.16555e-05
New best model saved (val_f1=0.6237)

Epoch 14/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3354 | Val loss: 0.4201 | Val Macro F1: 0.6047 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 15/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3299 | Val loss: 0.3378 | Val Macro F1: 0.6729 | LR: 4.16555e-05
New best model saved (val_f1=0.6729)

Epoch 16/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3265 | Val loss: 0.4699 | Val Macro F1: 0.6702 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 17/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3342 | Val loss: 0.5126 | Val Macro F1: 0.3080 | LR: 4.16555e-05
No improvement for 2 epoch(s)

Epoch 18/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3418 | Val loss: 0.3549 | Val Macro F1: 0.7316 | LR: 4.16555e-05
New best model saved (val_f1=0.7316)

Epoch 19/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3026 | Val loss: 0.3688 | Val Macro F1: 0.6795 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 20/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2767 | Val loss: 0.3218 | Val Macro F1: 0.6736 | LR: 4.16555e-05
No improvement for 2 epoch(s)

Epoch 21/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2699 | Val loss: 0.4003 | Val Macro F1: 0.6924 | LR: 4.16555e-05
No improvement for 3 epoch(s)

Epoch 22/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2556 | Val loss: 0.3587 | Val Macro F1: 0.7593 | LR: 4.16555e-05
New best model saved (val_f1=0.7593)

Epoch 23/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2614 | Val loss: 0.2871 | Val Macro F1: 0.7702 | LR: 4.16555e-05
New best model saved (val_f1=0.7702)

Epoch 24/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2466 | Val loss: 0.3813 | Val Macro F1: 0.5826 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 25/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2785 | Val loss: 0.4022 | Val Macro F1: 0.6783 | LR: 4.16555e-05
No improvement for 2 epoch(s)

Epoch 26/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2796 | Val loss: 0.2981 | Val Macro F1: 0.6285 | LR: 4.16555e-05
No improvement for 3 epoch(s)

Epoch 27/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2233 | Val loss: 0.2881 | Val Macro F1: 0.7270 | LR: 4.16555e-05
No improvement for 4 epoch(s)

Epoch 28/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1658 | Val loss: 0.2048 | Val Macro F1: 0.8665 | LR: 4.16555e-05
New best model saved (val_f1=0.8665)

Epoch 29/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1506 | Val loss: 0.2081 | Val Macro F1: 0.7999 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 30/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1341 | Val loss: 0.2374 | Val Macro F1: 0.7033 | LR: 4.16555e-05
No improvement for 2 epoch(s)

Epoch 31/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1234 | Val loss: 0.1496 | Val Macro F1: 0.7099 | LR: 4.16555e-05
No improvement for 3 epoch(s)

Epoch 32/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1268 | Val loss: 0.1564 | Val Macro F1: 0.8315 | LR: 4.16555e-05
No improvement for 4 epoch(s)

Epoch 33/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1117 | Val loss: 0.1259 | Val Macro F1: 0.7904 | LR: 4.16555e-05
No improvement for 5 epoch(s)

Epoch 34/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0898 | Val loss: 0.0901 | Val Macro F1: 0.8551 | LR: 2.08277e-05
No improvement for 6 epoch(s)

Epoch 35/100
----------------------------------------


                                                                                       

Train loss: 0.0831 | Val loss: 0.1218 | Val Macro F1: 0.9025 | LR: 2.08277e-05
New best model saved (val_f1=0.9025)

Epoch 36/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0926 | Val loss: 0.1006 | Val Macro F1: 0.8282 | LR: 2.08277e-05
No improvement for 1 epoch(s)

Epoch 37/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0811 | Val loss: 0.0977 | Val Macro F1: 0.9066 | LR: 2.08277e-05
New best model saved (val_f1=0.9066)

Epoch 38/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0799 | Val loss: 0.1071 | Val Macro F1: 0.8754 | LR: 2.08277e-05
No improvement for 1 epoch(s)

Epoch 39/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0761 | Val loss: 0.1299 | Val Macro F1: 0.8581 | LR: 2.08277e-05
No improvement for 2 epoch(s)

Epoch 40/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0885 | Val loss: 0.1290 | Val Macro F1: 0.8598 | LR: 2.08277e-05
No improvement for 3 epoch(s)

Epoch 41/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1040 | Val loss: 0.1232 | Val Macro F1: 0.9077 | LR: 2.08277e-05
New best model saved (val_f1=0.9077)

Epoch 42/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1097 | Val loss: 0.1515 | Val Macro F1: 0.8593 | LR: 2.08277e-05
No improvement for 1 epoch(s)

Epoch 43/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1214 | Val loss: 0.1570 | Val Macro F1: 0.8158 | LR: 2.08277e-05
No improvement for 2 epoch(s)

Epoch 44/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1431 | Val loss: 0.1774 | Val Macro F1: 0.8806 | LR: 2.08277e-05
No improvement for 3 epoch(s)

Epoch 45/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1364 | Val loss: 0.1738 | Val Macro F1: 0.7943 | LR: 2.08277e-05
No improvement for 4 epoch(s)

Epoch 46/100
----------------------------------------


                                                                                      

Train loss: 0.1374 | Val loss: 0.2075 | Val Macro F1: 0.8928 | LR: 2.08277e-05
No improvement for 5 epoch(s)

Epoch 47/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1091 | Val loss: 0.1554 | Val Macro F1: 0.9309 | LR: 1.04139e-05
New best model saved (val_f1=0.9309)

Epoch 48/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0965 | Val loss: 0.1593 | Val Macro F1: 0.8900 | LR: 1.04139e-05
No improvement for 1 epoch(s)

Epoch 49/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1005 | Val loss: 0.1336 | Val Macro F1: 0.8489 | LR: 1.04139e-05
No improvement for 2 epoch(s)

Epoch 50/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0970 | Val loss: 0.1359 | Val Macro F1: 0.7978 | LR: 1.04139e-05
No improvement for 3 epoch(s)

Epoch 51/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0957 | Val loss: 0.1483 | Val Macro F1: 0.8923 | LR: 1.04139e-05
No improvement for 4 epoch(s)

Epoch 52/100
----------------------------------------


                                                                                       

Train loss: 0.0998 | Val loss: 0.1206 | Val Macro F1: 0.9349 | LR: 1.04139e-05
New best model saved (val_f1=0.9349)

Epoch 53/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.1006 | Val loss: 0.1338 | Val Macro F1: 0.9022 | LR: 1.04139e-05
No improvement for 1 epoch(s)

Epoch 54/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0983 | Val loss: 0.2080 | Val Macro F1: 0.8971 | LR: 1.04139e-05
No improvement for 2 epoch(s)

Epoch 55/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.1036 | Val loss: 0.2228 | Val Macro F1: 0.8469 | LR: 1.04139e-05
No improvement for 3 epoch(s)

Epoch 56/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1062 | Val loss: 0.1834 | Val Macro F1: 0.9105 | LR: 1.04139e-05
No improvement for 4 epoch(s)

Epoch 57/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0914 | Val loss: 0.1075 | Val Macro F1: 0.8484 | LR: 1.04139e-05
No improvement for 5 epoch(s)

Epoch 58/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0736 | Val loss: 0.1086 | Val Macro F1: 0.9389 | LR: 5.20693e-06
New best model saved (val_f1=0.9389)

Epoch 59/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0690 | Val loss: 0.0989 | Val Macro F1: 0.9195 | LR: 5.20693e-06
No improvement for 1 epoch(s)

Epoch 60/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0670 | Val loss: 0.1169 | Val Macro F1: 0.9048 | LR: 5.20693e-06
No improvement for 2 epoch(s)

Epoch 61/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0632 | Val loss: 0.1093 | Val Macro F1: 0.8981 | LR: 5.20693e-06
No improvement for 3 epoch(s)

Epoch 62/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0592 | Val loss: 0.1026 | Val Macro F1: 0.8914 | LR: 5.20693e-06
No improvement for 4 epoch(s)

Epoch 63/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0593 | Val loss: 0.0916 | Val Macro F1: 0.9297 | LR: 5.20693e-06
No improvement for 5 epoch(s)

Epoch 64/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0529 | Val loss: 0.0966 | Val Macro F1: 0.9237 | LR: 2.60347e-06
No improvement for 6 epoch(s)

Epoch 65/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0546 | Val loss: 0.0934 | Val Macro F1: 0.9363 | LR: 2.60347e-06
No improvement for 7 epoch(s)

Epoch 66/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0508 | Val loss: 0.0774 | Val Macro F1: 0.9386 | LR: 2.60347e-06
No improvement for 8 epoch(s)

Epoch 67/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0529 | Val loss: 0.0810 | Val Macro F1: 0.9474 | LR: 2.60347e-06
New best model saved (val_f1=0.9474)

Epoch 68/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0541 | Val loss: 0.0808 | Val Macro F1: 0.9488 | LR: 2.60347e-06
New best model saved (val_f1=0.9488)

Epoch 69/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0484 | Val loss: 0.0894 | Val Macro F1: 0.9311 | LR: 2.60347e-06
No improvement for 1 epoch(s)

Epoch 70/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0485 | Val loss: 0.0842 | Val Macro F1: 0.9336 | LR: 2.60347e-06
No improvement for 2 epoch(s)

Epoch 71/100
----------------------------------------


                                                                                       

Train loss: 0.0513 | Val loss: 0.0857 | Val Macro F1: 0.9539 | LR: 2.60347e-06
New best model saved (val_f1=0.9539)

Target metric reached (0.9539 >= 0.95). Stopping at epoch 71

Training complete. Best epoch: 71 (val_f1=0.9539)

Best epoch: 71
Train losses: ['1.8592', '1.8482', '1.7272', '1.6007', '1.3466', '1.1167', '0.9202', '0.7244', '0.6407', '0.6169', '0.4896', '0.3811', '0.3527', '0.3354', '0.3299', '0.3265', '0.3342', '0.3418', '0.3026', '0.2767', '0.2699', '0.2556', '0.2614', '0.2466', '0.2785', '0.2796', '0.2233', '0.1658', '0.1506', '0.1341', '0.1234', '0.1268', '0.1117', '0.0898', '0.0831', '0.0926', '0.0811', '0.0799', '0.0761', '0.0885', '0.1040', '0.1097', '0.1214', '0.1431', '0.1364', '0.1374', '0.1091', '0.0965', '0.1005', '0.0970', '0.0957', '0.0998', '0.1006', '0.0983', '0.1036', '0.1062', '0.0914', '0.0736', '0.0690', '0.0670', '0.0632', '0.0592', '0.0593', '0.0529', '0.0546', '0.0508', '0.0529', '0.0541', '0.0484', '0.0485', '0.0513']
Val losses:   ['2.0505', '1.97

## 2) Train video model (100 epochs)

In [26]:
run_cmd([
    sys.executable, '-u', '-m', 'video.run_video',
    '--config', str(video_cfg_path),
])

assert video_ckpt.exists(), f"Video checkpoint not found: {video_ckpt}"
print(f"Video best checkpoint: {video_ckpt}")


RUN: /home/alolli/miniconda3/envs/therness_env/bin/python -u -m video.run_video --config /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/configs/video_pipeline.json
Device: cuda

Discovering video files in /data1/malto/therness/data/Hackathon...
       Scanning good_weld...
       Scanning defect-weld...
Found 1551 videos
  Code 00 (class 0): 731 videos
  Code 01 (class 1): 259 videos
  Code 02 (class 2): 169 videos
  Code 06 (class 3): 79 videos
  Code 07 (class 4): 158 videos
  Code 08 (class 5): 80 videos
  Code 11 (class 6): 75 videos
Train: 1268 videos | Val: 283 videos
Using pre-extracted frames from data/video_frames
Train: 1268 | Val: 283 | num_frames=12 [JPEG]
Balanced sampler: enabled (power=0.23554376610787528, videos/epoch=1268)
Model parameters: 190,759
Class weights: ['0.193', '0.533', '0.764', '1.549', '0.785', '1.791', '1.385']
Config saved to checkpoints/video_best_100/config.json

  TRAINING START — 100 epochs
  Checkpoint dir: checkpoints/video_best_1

Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9872 | Train F1: 0.0291 | Val loss: 2.0322 | Val Macro F1: 0.0274 | Val Binary F1: 0.7906 | Hackathon: 0.4853 | LR: 1.22292e-05
New best model (hackathon_score=0.4853)

Epoch 2/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9892 | Train F1: 0.0262 | Val loss: 2.0588 | Val Macro F1: 0.0274 | Val Binary F1: 0.7906 | Hackathon: 0.4853 | LR: 2.44584e-05
No improvement for 1 epoch(s)

Epoch 3/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9486 | Train F1: 0.0486 | Val loss: 1.9955 | Val Macro F1: 0.1499 | Val Binary F1: 0.7906 | Hackathon: 0.5343 | LR: 3.66876e-05
New best model (hackathon_score=0.5343)

Epoch 4/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9089 | Train F1: 0.0960 | Val loss: 1.9160 | Val Macro F1: 0.1211 | Val Binary F1: 0.7906 | Hackathon: 0.5228 | LR: 4.89168e-05
No improvement for 1 epoch(s)

Epoch 5/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8511 | Train F1: 0.1330 | Val loss: 1.8420 | Val Macro F1: 0.1147 | Val Binary F1: 0.7906 | Hackathon: 0.5202 | LR: 6.1146e-05
No improvement for 2 epoch(s)

Epoch 6/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7922 | Train F1: 0.1638 | Val loss: 1.7626 | Val Macro F1: 0.3098 | Val Binary F1: 0.7906 | Hackathon: 0.5983 | LR: 7.33752e-05
New best model (hackathon_score=0.5983)

Epoch 7/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7010 | Train F1: 0.2051 | Val loss: 1.6185 | Val Macro F1: 0.3994 | Val Binary F1: 0.7906 | Hackathon: 0.6341 | LR: 8.56044e-05
New best model (hackathon_score=0.6341)

Epoch 8/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6385 | Train F1: 0.2383 | Val loss: 1.5669 | Val Macro F1: 0.4208 | Val Binary F1: 0.7906 | Hackathon: 0.6427 | LR: 9.78336e-05
New best model (hackathon_score=0.6427)

Epoch 9/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5831 | Train F1: 0.3076 | Val loss: 1.5059 | Val Macro F1: 0.3823 | Val Binary F1: 0.7773 | Hackathon: 0.6193 | LR: 0.000110063
No improvement for 1 epoch(s)

Epoch 10/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5603 | Train F1: 0.3406 | Val loss: 1.5378 | Val Macro F1: 0.6852 | Val Binary F1: 0.8359 | Hackathon: 0.7756 | LR: 0.000122292
New best model (hackathon_score=0.7756)

Epoch 11/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4749 | Train F1: 0.4231 | Val loss: 1.4731 | Val Macro F1: 0.5836 | Val Binary F1: 0.8021 | Hackathon: 0.7147 | LR: 0.000122292
No improvement for 1 epoch(s)

Epoch 12/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4621 | Train F1: 0.4545 | Val loss: 1.3991 | Val Macro F1: 0.4507 | Val Binary F1: 0.7778 | Hackathon: 0.6470 | LR: 0.000122292
No improvement for 2 epoch(s)

Epoch 13/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4160 | Train F1: 0.4310 | Val loss: 1.7045 | Val Macro F1: 0.1915 | Val Binary F1: 0.7906 | Hackathon: 0.5510 | LR: 0.000122292
No improvement for 3 epoch(s)

Epoch 14/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3707 | Train F1: 0.4661 | Val loss: 1.3659 | Val Macro F1: 0.3146 | Val Binary F1: 0.7906 | Hackathon: 0.6002 | LR: 0.000122292
No improvement for 4 epoch(s)

Epoch 15/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3173 | Train F1: 0.5521 | Val loss: 1.5769 | Val Macro F1: 0.2728 | Val Binary F1: 0.7472 | Hackathon: 0.5574 | LR: 0.000122292
No improvement for 5 epoch(s)

Epoch 16/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2712 | Train F1: 0.5306 | Val loss: 1.3551 | Val Macro F1: 0.4231 | Val Binary F1: 0.7957 | Hackathon: 0.6467 | LR: 0.000122292
No improvement for 6 epoch(s)

Epoch 17/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2622 | Train F1: 0.5893 | Val loss: 1.3829 | Val Macro F1: 0.4975 | Val Binary F1: 0.8037 | Hackathon: 0.6812 | LR: 0.000122292
No improvement for 7 epoch(s)

Epoch 18/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1862 | Train F1: 0.5895 | Val loss: 1.4335 | Val Macro F1: 0.4255 | Val Binary F1: 0.7957 | Hackathon: 0.6476 | LR: 0.000122292
No improvement for 8 epoch(s)

Epoch 19/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2109 | Train F1: 0.6327 | Val loss: 1.3339 | Val Macro F1: 0.4437 | Val Binary F1: 0.7906 | Hackathon: 0.6519 | LR: 0.000122292
No improvement for 9 epoch(s)

Epoch 20/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1900 | Train F1: 0.6063 | Val loss: 1.4436 | Val Macro F1: 0.2077 | Val Binary F1: 0.7906 | Hackathon: 0.5575 | LR: 0.000122292
No improvement for 10 epoch(s)

Epoch 21/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2722 | Train F1: 0.5463 | Val loss: 1.5179 | Val Macro F1: 0.3953 | Val Binary F1: 0.7906 | Hackathon: 0.6325 | LR: 0.000122292
No improvement for 11 epoch(s)

Epoch 22/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2524 | Train F1: 0.5326 | Val loss: 1.3662 | Val Macro F1: 0.4517 | Val Binary F1: 0.7906 | Hackathon: 0.6550 | LR: 0.000122292
No improvement for 12 epoch(s)

Epoch 23/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1802 | Train F1: 0.5895 | Val loss: 1.9290 | Val Macro F1: 0.2269 | Val Binary F1: 0.7906 | Hackathon: 0.5651 | LR: 0.000122292
No improvement for 13 epoch(s)

Epoch 24/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1071 | Train F1: 0.5835 | Val loss: 1.3377 | Val Macro F1: 0.3478 | Val Binary F1: 0.7906 | Hackathon: 0.6135 | LR: 0.000122292
No improvement for 14 epoch(s)

Epoch 25/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.0253 | Train F1: 0.6239 | Val loss: 1.2987 | Val Macro F1: 0.4196 | Val Binary F1: 0.7906 | Hackathon: 0.6422 | LR: 6.1146e-05
No improvement for 15 epoch(s)

Epoch 26/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9483 | Train F1: 0.6737 | Val loss: 1.3067 | Val Macro F1: 0.5050 | Val Binary F1: 0.7906 | Hackathon: 0.6764 | LR: 6.1146e-05
No improvement for 16 epoch(s)

Epoch 27/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9265 | Train F1: 0.6640 | Val loss: 1.5048 | Val Macro F1: 0.3610 | Val Binary F1: 0.7906 | Hackathon: 0.6188 | LR: 6.1146e-05
No improvement for 17 epoch(s)

Epoch 28/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9857 | Train F1: 0.6318 | Val loss: 1.6048 | Val Macro F1: 0.2375 | Val Binary F1: 0.7906 | Hackathon: 0.5694 | LR: 6.1146e-05
No improvement for 18 epoch(s)

Epoch 29/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9333 | Train F1: 0.6729 | Val loss: 1.3938 | Val Macro F1: 0.4135 | Val Binary F1: 0.7906 | Hackathon: 0.6398 | LR: 6.1146e-05
No improvement for 19 epoch(s)

Epoch 30/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9174 | Train F1: 0.6904 | Val loss: 1.2156 | Val Macro F1: 0.6920 | Val Binary F1: 0.7906 | Hackathon: 0.7512 | LR: 6.1146e-05
No improvement for 20 epoch(s)

Epoch 31/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.8997 | Train F1: 0.6878 | Val loss: 1.3234 | Val Macro F1: 0.4305 | Val Binary F1: 0.7906 | Hackathon: 0.6466 | LR: 6.1146e-05
No improvement for 21 epoch(s)

Epoch 32/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9922 | Train F1: 0.6712 | Val loss: 1.4226 | Val Macro F1: 0.3500 | Val Binary F1: 0.7906 | Hackathon: 0.6143 | LR: 6.1146e-05
No improvement for 22 epoch(s)

Epoch 33/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9206 | Train F1: 0.6689 | Val loss: 1.5546 | Val Macro F1: 0.5132 | Val Binary F1: 0.7906 | Hackathon: 0.6796 | LR: 6.1146e-05
No improvement for 23 epoch(s)

Epoch 34/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.8484 | Train F1: 0.6815 | Val loss: 1.4335 | Val Macro F1: 0.1621 | Val Binary F1: 0.7906 | Hackathon: 0.5392 | LR: 6.1146e-05
No improvement for 24 epoch(s)

Epoch 35/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.7970 | Train F1: 0.6938 | Val loss: 1.3575 | Val Macro F1: 0.4094 | Val Binary F1: 0.7906 | Hackathon: 0.6381 | LR: 6.1146e-05
No improvement for 25 epoch(s)

Epoch 36/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.7229 | Train F1: 0.6899 | Val loss: 1.4394 | Val Macro F1: 0.2898 | Val Binary F1: 0.7906 | Hackathon: 0.5903 | LR: 3.0573e-05
No improvement for 26 epoch(s)

Epoch 37/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.7011 | Train F1: 0.6840 | Val loss: 1.1142 | Val Macro F1: 0.6353 | Val Binary F1: 0.7906 | Hackathon: 0.7285 | LR: 3.0573e-05
No improvement for 27 epoch(s)

Epoch 38/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.6451 | Train F1: 0.7024 | Val loss: 1.1500 | Val Macro F1: 0.7475 | Val Binary F1: 0.7906 | Hackathon: 0.7734 | LR: 3.0573e-05
No improvement for 28 epoch(s)

Epoch 39/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.6069 | Train F1: 0.7166 | Val loss: 1.0628 | Val Macro F1: 0.6668 | Val Binary F1: 0.7906 | Hackathon: 0.7411 | LR: 3.0573e-05
No improvement for 29 epoch(s)

Epoch 40/100
----------------------------------------


                                                                     

Train loss: 0.6080 | Train F1: 0.7158 | Val loss: 1.0663 | Val Macro F1: 0.6643 | Val Binary F1: 0.7906 | Hackathon: 0.7401 | LR: 3.0573e-05
No improvement for 30 epoch(s)

Early stopping (patience) at epoch 40

Training complete. Best epoch: 10 (score=0.7756)
Video best checkpoint: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt


### 0d) Merge audio + video configs → fusion config, then tune fusion

If your friend trained audio/video separately, `git pull` first so `audio_pipeline.json` and `video_pipeline.json` are up-to-date.

In [8]:
# ── Merge audio + video configs into fusion config ──
import copy as _copy

audio_saved = json.loads(audio_cfg_path.read_text())
video_saved = json.loads(video_cfg_path.read_text())

fusion_merged = _copy.deepcopy(audio_saved)
# Take video sections from video config (may have different tuned params)
fusion_merged["video_window"] = _copy.deepcopy(video_saved["video_window"])
# Keep fusion section from in-memory cfg (latest fusion defaults/tuning)
fusion_merged["fusion"] = _copy.deepcopy(cfg["fusion"])

fusion_cfg_path.write_text(json.dumps(fusion_merged, indent=2))
print(f"Merged config written → {fusion_cfg_path.name}")
print(f"  audio params from:  {audio_cfg_path.name}")
print(f"  video params from:  {video_cfg_path.name}")
print(f"  fusion params from: in-memory defaults")

# ── Fusion attention tuning ──
_rnd.seed(456)

assert audio_ckpt.exists(), f"Audio checkpoint needed for fusion tuning: {audio_ckpt}"
assert video_ckpt.exists(), f"Video checkpoint needed for fusion tuning: {video_ckpt}"

for i in range(N_FUSION_TRIALS):
    trial_params = {
        "lr":           _sample_uniform(5e-5, 2e-4, log=True),
        "weight_decay": _sample_uniform(5e-5, 2e-4, log=True),
        "dropout":      _sample_choice([0.15, 0.2, 0.25, 0.3]),
        "hidden_dim":   _sample_choice([128, 160, 192, 256]),
        "num_heads":    _sample_choice([2, 4]),
        "attn_layers":  _sample_choice([1, 2, 3]),
    }
    trial_name = (
        f"fusion_attn_{i:02d}_lr={trial_params['lr']:.1e}"
        f"_hd={trial_params['hidden_dim']}"
        f"_nh={trial_params['num_heads']}"
        f"_al={trial_params['attn_layers']}"
    )
    overrides = {
        "fusion.model.arch":                "attention",
        "fusion.training.num_epochs":       TUNING_EPOCHS,
        "fusion.training.patience":         TUNING_PATIENCE,
        "fusion.training.lr":               trial_params["lr"],
        "fusion.training.weight_decay":     trial_params["weight_decay"],
        "fusion.model.dropout":             trial_params["dropout"],
        "fusion.model.hidden_dim":          trial_params["hidden_dim"],
        "fusion.model.num_heads":           trial_params["num_heads"],
        "fusion.model.attn_layers":         trial_params["attn_layers"],
        "fusion.training.checkpoint_dir":   f"checkpoints/_tuning/{trial_name}",
    }
    result = run_trial(
        "fusion.run_fusion", overrides, trial_params, trial_name,
        extra_args=["--audio_checkpoint", str(audio_ckpt),
                     "--video_checkpoint", str(video_ckpt)],
    )
    if result:
        tuning_results["fusion"].append(result)

tuning_results["fusion"].sort(key=lambda r: r["score"], reverse=True)
print(f"\n{'='*70}")
print("FUSION TUNING RESULTS (sorted by score):")
print(f"{'='*70}")
for r in tuning_results["fusion"]:
    print(f"  {r['score']:.4f}  ep={r['epoch']}  {r['name']}  params={r['params']}")

if tuning_results["fusion"]:
    bf = tuning_results["fusion"][0]["params"]
    cfg["fusion"]["training"]["lr"]           = bf["lr"]
    cfg["fusion"]["training"]["weight_decay"] = bf["weight_decay"]
    cfg["fusion"]["model"]["dropout"]         = bf["dropout"]
    cfg["fusion"]["model"]["hidden_dim"]      = bf["hidden_dim"]
    cfg["fusion"]["model"]["num_heads"]       = bf["num_heads"]
    cfg["fusion"]["model"]["attn_layers"]     = bf["attn_layers"]
    # Restore full-training settings
    cfg["fusion"]["training"]["num_epochs"]     = 100
    cfg["fusion"]["training"]["patience"]       = 30
    cfg["fusion"]["training"]["checkpoint_dir"] = "checkpoints/fusion_best_100"

    # Update the merged fusion config with tuned params
    fusion_merged["fusion"] = _copy.deepcopy(cfg["fusion"])
    fusion_cfg_path.write_text(json.dumps(fusion_merged, indent=2))
    print(f"\nApplied best FUSION params (tuning score {tuning_results['fusion'][0]['score']:.4f}):")
    print(f"  lr={bf['lr']:.2e}  wd={bf['weight_decay']:.2e}  dropout={bf['dropout']}")
    print(f"  hidden_dim={bf['hidden_dim']}  num_heads={bf['num_heads']}  attn_layers={bf['attn_layers']}")
    print(f"Updated config: {fusion_cfg_path.name}")
else:
    # No tuning results — still write fusion config with defaults
    fusion_merged["fusion"] = _copy.deepcopy(cfg["fusion"])
    fusion_cfg_path.write_text(json.dumps(fusion_merged, indent=2))
    print("\nNo successful fusion trials — keeping default params")
    print(f"Wrote default fusion config: {fusion_cfg_path.name}")


──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_00_lr=1.4e-04_hd=256_nh=2_al=3
  Params: {'lr': 0.00014106939915668838, 'weight_decay': 0.00019094731280229325, 'dropout': 0.3, 'hidden_dim': 256, 'num_heads': 2, 'attn_layers': 3}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 0, 'crater_cracks': 1, 'excessive_convexity': 2, 'excessive_penetration': 3, 'good_weld': 4, 'lack_of_fusion': 5, 'overlap': 6}
Train: 1240 files | Val: 311 files

Extracting temporal audio sequences (T=12)...
  Train: torch.Size([1240, 12, 128]) | Val: torch.Size([311, 12, 128])
Building audio↔v

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.95]         

Train loss: 2.0115 | Train F1: 0.1228 | Val Macro F1: 0.0940 | Val Binary F1: 0.1461 | Hackathon: 0.1252
New best model (hackathon_score=0.1252)

Epoch 2/15
----------------------------------------
Train loss: 1.9363 | Train F1: 0.1255 | Val Macro F1: 0.0944 | Val Binary F1: 0.1667 | Hackathon: 0.1378
New best model (hackathon_score=0.1378)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.7]           

Train loss: 1.8837 | Train F1: 0.1534 | Val Macro F1: 0.0942 | Val Binary F1: 0.1564 | Hackathon: 0.1315
No improvement for 1 epoch(s)

Epoch 4/15
----------------------------------------
Train loss: 1.7770 | Train F1: 0.1985 | Val Macro F1: 0.2162 | Val Binary F1: 0.1356 | Hackathon: 0.1678
New best model (hackathon_score=0.1678)

Epoch 5/15
----------------------------------------


Training:  85%|████████▌ | 17/20 [00:00<00:00, 165.33it/s, loss=1.57]

Train loss: 1.6772 | Train F1: 0.2853 | Val Macro F1: 0.2326 | Val Binary F1: 0.1667 | Hackathon: 0.1931
New best model (hackathon_score=0.1931)

Epoch 6/15
----------------------------------------
Train loss: 1.5684 | Train F1: 0.4006 | Val Macro F1: 0.3132 | Val Binary F1: 0.2812 | Hackathon: 0.2940
New best model (hackathon_score=0.2940)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.36]          

Train loss: 1.4427 | Train F1: 0.5401 | Val Macro F1: 0.6466 | Val Binary F1: 0.6929 | Hackathon: 0.6744
New best model (hackathon_score=0.6744)

Epoch 8/15
----------------------------------------
Train loss: 1.3402 | Train F1: 0.6420 | Val Macro F1: 0.7989 | Val Binary F1: 0.7971 | Hackathon: 0.7978
New best model (hackathon_score=0.7978)

Epoch 9/15
----------------------------------------


                                                                     

Train loss: 1.0817 | Train F1: 0.8547 | Val Macro F1: 0.9527 | Val Binary F1: 0.9664 | Hackathon: 0.9609
New best model (hackathon_score=0.9609)

Target metric reached (0.9609 >= 0.95). Stopping at epoch 9

Training complete. Best epoch: 9 (score=0.9609)
  RESULT: score=0.9609  epoch=9  time=30s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_01_lr=1.4e-04_hd=192_nh=4_al=1
  Params: {'lr': 0.0001377235425660948, 'weight_decay': 0.0001739301417385073, 'dropout': 0.15, 'hidden_dim': 192, 'num_heads': 4, 'attn_layers': 1}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 0

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.75]        

Train loss: 1.8935 | Train F1: 0.1181 | Val Macro F1: 0.1642 | Val Binary F1: 0.7660 | Hackathon: 0.5252
New best model (hackathon_score=0.5252)

Epoch 2/15
----------------------------------------
Train loss: 1.8207 | Train F1: 0.1765 | Val Macro F1: 0.1442 | Val Binary F1: 0.4566 | Hackathon: 0.3317
No improvement for 1 epoch(s)

Epoch 3/15
----------------------------------------
Train loss: 1.7476 | Train F1: 0.2086 | Val Macro F1: 0.1509 | Val Binary F1: 0.3981 | Hackathon: 0.2992
No improvement for 2 epoch(s)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.61]

Train loss: 1.6806 | Train F1: 0.2730 | Val Macro F1: 0.1693 | Val Binary F1: 0.2812 | Hackathon: 0.2365
No improvement for 3 epoch(s)

Epoch 5/15
----------------------------------------
Train loss: 1.6161 | Train F1: 0.3291 | Val Macro F1: 0.2711 | Val Binary F1: 0.3902 | Hackathon: 0.3426
No improvement for 4 epoch(s)

Epoch 6/15
----------------------------------------
Train loss: 1.5657 | Train F1: 0.4190 | Val Macro F1: 0.3331 | Val Binary F1: 0.5333 | Hackathon: 0.4532
No improvement for 5 epoch(s)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.42]

Train loss: 1.5008 | Train F1: 0.4924 | Val Macro F1: 0.5625 | Val Binary F1: 0.6475 | Hackathon: 0.6135
New best model (hackathon_score=0.6135)

Epoch 8/15
----------------------------------------
Train loss: 1.4247 | Train F1: 0.5769 | Val Macro F1: 0.6553 | Val Binary F1: 0.7109 | Hackathon: 0.6887
New best model (hackathon_score=0.6887)

Epoch 9/15
----------------------------------------
Train loss: 1.2046 | Train F1: 0.7319 | Val Macro F1: 0.8714 | Val Binary F1: 0.8990 | Hackathon: 0.8880
New best model (hackathon_score=0.8880)

Epoch 10/15
----------------------------------------


                                                            

Train loss: 0.8210 | Train F1: 0.9144 | Val Macro F1: 0.9587 | Val Binary F1: 0.9536 | Hackathon: 0.9556
New best model (hackathon_score=0.9556)

Target metric reached (0.9556 >= 0.95). Stopping at epoch 10

Training complete. Best epoch: 10 (score=0.9556)
  RESULT: score=0.9556  epoch=10  time=31s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_02_lr=5.6e-05_hd=192_nh=2_al=1
  Params: {'lr': 5.620424750892037e-05, 'weight_decay': 0.0001311798207968172, 'dropout': 0.15, 'hidden_dim': 192, 'num_heads': 2, 'attn_layers': 1}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough'

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.83]        

Train loss: 1.9178 | Train F1: 0.0942 | Val Macro F1: 0.0665 | Val Binary F1: 0.6943 | Hackathon: 0.4432
New best model (hackathon_score=0.4432)

Epoch 2/15
----------------------------------------
Train loss: 1.8662 | Train F1: 0.1396 | Val Macro F1: 0.1657 | Val Binary F1: 0.7344 | Hackathon: 0.5069
New best model (hackathon_score=0.5069)

Epoch 3/15
----------------------------------------
Train loss: 1.8255 | Train F1: 0.1762 | Val Macro F1: 0.1328 | Val Binary F1: 0.5217 | Hackathon: 0.3662
No improvement for 1 epoch(s)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.79]

Train loss: 1.7830 | Train F1: 0.1906 | Val Macro F1: 0.1394 | Val Binary F1: 0.5217 | Hackathon: 0.3688
No improvement for 2 epoch(s)

Epoch 5/15
----------------------------------------
Train loss: 1.7582 | Train F1: 0.1801 | Val Macro F1: 0.1306 | Val Binary F1: 0.4131 | Hackathon: 0.3001
No improvement for 3 epoch(s)

Epoch 6/15
----------------------------------------
Train loss: 1.7135 | Train F1: 0.2415 | Val Macro F1: 0.1720 | Val Binary F1: 0.4706 | Hackathon: 0.3511
No improvement for 4 epoch(s)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.63]

Train loss: 1.6828 | Train F1: 0.2521 | Val Macro F1: 0.2064 | Val Binary F1: 0.4206 | Hackathon: 0.3349
No improvement for 5 epoch(s)

Epoch 8/15
----------------------------------------
Train loss: 1.6422 | Train F1: 0.2933 | Val Macro F1: 0.2091 | Val Binary F1: 0.3942 | Hackathon: 0.3202
No improvement for 6 epoch(s)

Epoch 9/15
----------------------------------------
Train loss: 1.6270 | Train F1: 0.3327 | Val Macro F1: 0.2655 | Val Binary F1: 0.4608 | Hackathon: 0.3827
No improvement for 7 epoch(s)

Epoch 10/15
----------------------------------------


                                                           

Train loss: 1.5627 | Train F1: 0.4113 | Val Macro F1: 0.3256 | Val Binary F1: 0.5045 | Hackathon: 0.4330
No improvement for 8 epoch(s)

Early stopping (patience) at epoch 10

Training complete. Best epoch: 2 (score=0.5069)
  RESULT: score=0.5069  epoch=2  time=30s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_03_lr=6.5e-05_hd=192_nh=2_al=3
  Params: {'lr': 6.507669643172911e-05, 'weight_decay': 0.0001376527180043546, 'dropout': 0.3, 'hidden_dim': 192, 'num_heads': 2, 'attn_layers': 3}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 0, 'crater_cracks': 1, 'excessive_

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.87]         

Train loss: 1.9262 | Train F1: 0.1638 | Val Macro F1: 0.2607 | Val Binary F1: 0.6566 | Hackathon: 0.4983
New best model (hackathon_score=0.4983)

Epoch 2/15
----------------------------------------
Train loss: 1.8793 | Train F1: 0.1873 | Val Macro F1: 0.2598 | Val Binary F1: 0.5000 | Hackathon: 0.4039
No improvement for 1 epoch(s)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.83]          

Train loss: 1.8336 | Train F1: 0.2130 | Val Macro F1: 0.2575 | Val Binary F1: 0.3500 | Hackathon: 0.3130
No improvement for 2 epoch(s)

Epoch 4/15
----------------------------------------
Train loss: 1.8250 | Train F1: 0.2032 | Val Macro F1: 0.2653 | Val Binary F1: 0.3902 | Hackathon: 0.3403
No improvement for 3 epoch(s)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.81]          

Train loss: 1.7776 | Train F1: 0.2412 | Val Macro F1: 0.2788 | Val Binary F1: 0.3500 | Hackathon: 0.3215
No improvement for 4 epoch(s)

Epoch 6/15
----------------------------------------
Train loss: 1.7538 | Train F1: 0.2542 | Val Macro F1: 0.3031 | Val Binary F1: 0.3902 | Hackathon: 0.3554
No improvement for 5 epoch(s)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.67]          

Train loss: 1.7136 | Train F1: 0.2685 | Val Macro F1: 0.2981 | Val Binary F1: 0.3582 | Hackathon: 0.3342
No improvement for 6 epoch(s)

Epoch 8/15
----------------------------------------
Train loss: 1.6563 | Train F1: 0.3205 | Val Macro F1: 0.3553 | Val Binary F1: 0.4651 | Hackathon: 0.4212
No improvement for 7 epoch(s)

Epoch 9/15
----------------------------------------


                                                                     

Train loss: 1.5708 | Train F1: 0.3819 | Val Macro F1: 0.3919 | Val Binary F1: 0.5652 | Hackathon: 0.4959
No improvement for 8 epoch(s)

Early stopping (patience) at epoch 9

Training complete. Best epoch: 1 (score=0.4983)
  RESULT: score=0.4983  epoch=1  time=31s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_04_lr=1.0e-04_hd=256_nh=2_al=2
  Params: {'lr': 0.00010470272294668067, 'weight_decay': 8.269509567573889e-05, 'dropout': 0.25, 'hidden_dim': 256, 'num_heads': 2, 'attn_layers': 2}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 0, 'crater_cracks': 1, 'excessive

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.75]        

Train loss: 1.9954 | Train F1: 0.1121 | Val Macro F1: 0.0931 | Val Binary F1: 0.1034 | Hackathon: 0.0993
New best model (hackathon_score=0.0993)

Epoch 2/15
----------------------------------------
Train loss: 1.8614 | Train F1: 0.1701 | Val Macro F1: 0.0937 | Val Binary F1: 0.1356 | Hackathon: 0.1189
New best model (hackathon_score=0.1189)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.57]

Train loss: 1.7301 | Train F1: 0.2285 | Val Macro F1: 0.1263 | Val Binary F1: 0.1768 | Hackathon: 0.1566
New best model (hackathon_score=0.1566)

Epoch 4/15
----------------------------------------
Train loss: 1.5642 | Train F1: 0.3346 | Val Macro F1: 0.3580 | Val Binary F1: 0.3725 | Hackathon: 0.3667
New best model (hackathon_score=0.3667)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.23]

Train loss: 1.3955 | Train F1: 0.5251 | Val Macro F1: 0.6958 | Val Binary F1: 0.7328 | Hackathon: 0.7180
New best model (hackathon_score=0.7180)

Epoch 6/15
----------------------------------------
Train loss: 1.1941 | Train F1: 0.7429 | Val Macro F1: 0.8089 | Val Binary F1: 0.8269 | Hackathon: 0.8197
New best model (hackathon_score=0.8197)

Epoch 7/15
----------------------------------------


                                                           

Train loss: 1.0057 | Train F1: 0.8635 | Val Macro F1: 0.9708 | Val Binary F1: 0.9702 | Hackathon: 0.9705
New best model (hackathon_score=0.9705)

Target metric reached (0.9705 >= 0.95). Stopping at epoch 7

Training complete. Best epoch: 7 (score=0.9705)
  RESULT: score=0.9705  epoch=7  time=30s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_05_lr=1.5e-04_hd=160_nh=4_al=2
  Params: {'lr': 0.00015112725127357162, 'weight_decay': 5.9795266194611525e-05, 'dropout': 0.3, 'hidden_dim': 160, 'num_heads': 4, 'attn_layers': 2}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 

  self.attn_encoder = nn.TransformerEncoder(
Validation:   0%|          | 0/5 [00:00<?, ?it/s]                    

Train loss: 1.9416 | Train F1: 0.1402 | Val Macro F1: 0.1712 | Val Binary F1: 0.2902 | Hackathon: 0.2426
New best model (hackathon_score=0.2426)

Epoch 2/15
----------------------------------------
Train loss: 1.7847 | Train F1: 0.2412 | Val Macro F1: 0.3028 | Val Binary F1: 0.3077 | Hackathon: 0.3057
New best model (hackathon_score=0.3057)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.52]          

Train loss: 1.6509 | Train F1: 0.3521 | Val Macro F1: 0.3897 | Val Binary F1: 0.4722 | Hackathon: 0.4392
New best model (hackathon_score=0.4392)

Epoch 4/15
----------------------------------------
Train loss: 1.4896 | Train F1: 0.4934 | Val Macro F1: 0.6523 | Val Binary F1: 0.6902 | Hackathon: 0.6750
New best model (hackathon_score=0.6750)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.21]          

Train loss: 1.3003 | Train F1: 0.6918 | Val Macro F1: 0.7939 | Val Binary F1: 0.8198 | Hackathon: 0.8094
New best model (hackathon_score=0.8094)

Epoch 6/15
----------------------------------------
Train loss: 1.1593 | Train F1: 0.7938 | Val Macro F1: 0.8975 | Val Binary F1: 0.8952 | Hackathon: 0.8961
New best model (hackathon_score=0.8961)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=0.966]         

Train loss: 1.0154 | Train F1: 0.8696 | Val Macro F1: 0.9346 | Val Binary F1: 0.9333 | Hackathon: 0.9338
New best model (hackathon_score=0.9338)

Epoch 8/15
----------------------------------------
Train loss: 0.8542 | Train F1: 0.9173 | Val Macro F1: 0.9487 | Val Binary F1: 0.9547 | Hackathon: 0.9523
New best model (hackathon_score=0.9523)

Target metric reached (0.9523 >= 0.95). Stopping at epoch 8

Training complete. Best epoch: 8 (score=0.9523)


                                                            

  RESULT: score=0.9523  epoch=8  time=30s

FUSION TUNING RESULTS (sorted by score):
  0.9705  ep=7  fusion_attn_04_lr=1.0e-04_hd=256_nh=2_al=2  params={'lr': 0.00010470272294668067, 'weight_decay': 8.269509567573889e-05, 'dropout': 0.25, 'hidden_dim': 256, 'num_heads': 2, 'attn_layers': 2}
  0.9609  ep=9  fusion_attn_00_lr=1.4e-04_hd=256_nh=2_al=3  params={'lr': 0.00014106939915668838, 'weight_decay': 0.00019094731280229325, 'dropout': 0.3, 'hidden_dim': 256, 'num_heads': 2, 'attn_layers': 3}
  0.9556  ep=10  fusion_attn_01_lr=1.4e-04_hd=192_nh=4_al=1  params={'lr': 0.0001377235425660948, 'weight_decay': 0.0001739301417385073, 'dropout': 0.15, 'hidden_dim': 192, 'num_heads': 4, 'attn_layers': 1}
  0.9523  ep=8  fusion_attn_05_lr=1.5e-04_hd=160_nh=4_al=2  params={'lr': 0.00015112725127357162, 'weight_decay': 5.9795266194611525e-05, 'dropout': 0.3, 'hidden_dim': 160, 'num_heads': 4, 'attn_layers': 2}
  0.5069  ep=2  fusion_attn_02_lr=5.6e-05_hd=192_nh=2_al=1  params={'lr': 5.620424750892

## 3) Train fusion model (100 epochs) with best audio+video and tuned params

In [None]:
run_cmd([
    sys.executable, '-u', '-m', 'fusion.run_fusion',
    '--config', str(fusion_cfg_path),
    '--audio_checkpoint', str(audio_ckpt),
    '--video_checkpoint', str(video_ckpt),
])

assert fusion_ckpt.exists(), f"Fusion checkpoint not found: {fusion_ckpt}"
print(f"Fusion best checkpoint: {fusion_ckpt}")

## 4) Optional: evaluate best fusion checkpoint

In [None]:
run_cmd([
    sys.executable, '-u', '-m', 'fusion.run_fusion',
    '--config', str(fusion_cfg_path),
    '--audio_checkpoint', str(audio_ckpt),
    '--video_checkpoint', str(video_ckpt),
    '--test_only',
    '--checkpoint', str(fusion_ckpt),
])