# Best 100-epoch full pipeline (Audio → Video → Fusion)

This notebook prepares a dedicated config and runs three training stages in sequence:
1. Audio (MIL multiclass) for 100 epochs
2. Video window model for 100 epochs
3. Temporal fusion for 100 epochs using the best audio+video checkpoints produced above

**Config split**: each model has its own config file so teammates can train audio & video independently and push without git conflicts:
- `configs/audio_pipeline.json` — audio model params
- `configs/video_pipeline.json` — video model params
- `configs/fusion_pipeline.json` — merged at fusion time from the two above + fusion-specific params

In [15]:
from pathlib import Path
import json
import os
import subprocess
import sys

ROOT = Path.cwd()
if not (ROOT / "configs" / "master_config.json").exists():
    raise RuntimeError(f"Run this notebook from repo root. Current dir: {ROOT}")

print(f"Repo root: {ROOT}")
print(f"Python: {sys.executable}")

Repo root: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito
Python: /home/alolli/miniconda3/envs/therness_env/bin/python


In [16]:
base_cfg_path = ROOT / "configs" / "master_config.json"

# ── Separate config files: one per model (no git conflicts) ──
audio_cfg_path = ROOT / "configs" / "audio_pipeline.json"
video_cfg_path = ROOT / "configs" / "video_pipeline.json"
fusion_cfg_path = ROOT / "configs" / "fusion_pipeline.json"

cfg = json.loads(base_cfg_path.read_text())

data_root_candidates = [
    Path("/data1/malto/therness/data/Hackathon"),
    Path("/root/data/train_data"),
    ROOT / "sampleData",
]
resolved_data_root = None
for candidate in data_root_candidates:
    if candidate.exists() and any(candidate.rglob("*.flac")):
        resolved_data_root = str(candidate)
        break
if resolved_data_root is None:
    raise FileNotFoundError(
        "No valid data root found. Checked: "
        + ", ".join(str(p) for p in data_root_candidates)
    )
cfg["train_data_root"] = resolved_data_root
cfg["data_root"] = resolved_data_root
print(f"Using data_root: {resolved_data_root}")

# ---- Target metric threshold: stop early if metric >= this ----
TARGET_METRIC_THRESHOLD = 0.95

# ---- AUDIO: best-known multiclass MIL config, 100 epochs ----
ac = cfg.setdefault("audio", {})
am = ac.setdefault("model", {})
at = ac.setdefault("training", {})
amil = at.setdefault("sequence_mil", {})

am["dropout"] = 0.15
at["task"] = "multiclass"
at["num_epochs"] = 100
at["lr"] = 7e-05
at["weight_decay"] = 3e-05
at["patience"] = 30
at["checkpoint_dir"] = "checkpoints/audio_best_100"
at["metric"] = "macro_f1"
at["target_metric_threshold"] = TARGET_METRIC_THRESHOLD

amil["enabled"] = True
amil["batch_size"] = 8
amil["topk_ratio_pos"] = 0.12
amil["topk_ratio_neg"] = 0.2
amil["eval_pool_ratio"] = 0.12
amil["auto_threshold"] = True
amil["good_window_weight"] = 0.0
amil["use_class_weights"] = True
amil["class_weight_power"] = 0.65
amil["multiclass_eval_mode"] = "topk_per_class"
amil["use_balanced_sampler"] = True
amil["balanced_sampler_power"] = 0.35

# ---- VIDEO: best-known window config, 100 epochs ----
vw = cfg.setdefault("video_window", {})
vwm = vw.setdefault("model", {})
vwt = vw.setdefault("training", {})

vwm["dropout"] = 0.12
vwt["epochs"] = 100
vwt["lr"] = 1e-4
vwt["weight_decay"] = 7e-5
vwt["class_weight_power"] = 1.0
vwt["use_balanced_sampler"] = True
vwt["balanced_sampler_power"] = 0.3
vwt["patience"] = 30
vwt["checkpoint_dir"] = "checkpoints/video_best_100"
vwt["checkpoint_path"] = "checkpoints/video_best_100/window_classifier.pth"
vwt["target_metric_threshold"] = TARGET_METRIC_THRESHOLD

# ---- FUSION: attention fusion (no RNN), 100 epochs ----
fc = cfg.setdefault("fusion", {})
fm = fc.setdefault("model", {})
ft = fc.setdefault("training", {})

fm["arch"] = "attention"          # <-- per-window attention fusion (no GRU)
fm["audio_dim"] = 128
fm["video_dim"] = 128
fm["hidden_dim"] = 192
fm["dropout"] = 0.2
fm["num_heads"] = 4               # multi-head self-attention heads
fm["attn_layers"] = 2             # number of transformer encoder layers

ft["sequence_len"] = 12           # temporal steps (read by run_fusion.py from training config)
ft["num_epochs"] = 100
ft["lr"] = 1e-4
ft["weight_decay"] = 1e-4
ft["batch_size"] = 64
ft["patience"] = 30
ft["checkpoint_dir"] = "checkpoints/fusion_best_100"
ft["target_metric_threshold"] = TARGET_METRIC_THRESHOLD

# ── Write separate config files ──
import copy as _copy
audio_cfg_path.write_text(json.dumps(cfg, indent=2))
video_cfg_path.write_text(json.dumps(cfg, indent=2))
fusion_cfg_path.write_text(json.dumps(cfg, indent=2))

print(f"Wrote configs:")
print(f"  audio  → {audio_cfg_path.name}")
print(f"  video  → {video_cfg_path.name}")
print(f"  fusion → {fusion_cfg_path.name}")
print(f"Target metric threshold: {TARGET_METRIC_THRESHOLD} (all models)")
print(f"Fusion arch: {fm['arch']} (per-window MLP + self-attention, no RNN)")

Using data_root: /data1/malto/therness/data/Hackathon
Wrote configs:
  audio  → audio_pipeline.json
  video  → video_pipeline.json
  fusion → fusion_pipeline.json
Target metric threshold: 0.95 (all models)
Fusion arch: attention (per-window MLP + self-attention, no RNN)


## Pre-extract video frames (one-time, speeds up training ~3×)

Extracts N uniformly-sampled JPEG frames per .avi video file into `data/video_frames/`.
Skips videos already extracted. Safe to re-run.

In [17]:
frames_dir = ROOT / "data" / "video_frames"
manifest_path = frames_dir / "manifest.json"

if manifest_path.exists():
    _manifest = json.loads(manifest_path.read_text())
    n_entries = len(_manifest.get("entries", []))
    print(f"Video frames already extracted: {frames_dir}")
    print(f"  manifest.json has {n_entries} entries — skipping extraction")
else:
    print(f"Extracting video frames to {frames_dir} ...")
    _cmd = [
        sys.executable, '-u', '-m', 'video.extract_frames',
        '--data_root', resolved_data_root,
        '--out_dir', str(frames_dir),
        '--num_frames', '8',
        '--img_size', '160',
        '--workers', '16',
    ]
    print('RUN:', ' '.join(_cmd))
    subprocess.run(_cmd, check=True, cwd=ROOT)
    assert manifest_path.exists(), f"Frame extraction failed — no manifest at {manifest_path}"
    print(f"Done. Frames saved to {frames_dir}")

Video frames already extracted: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/data/video_frames
  manifest.json has 1551 entries — skipping extraction


In [18]:
def run_cmd(cmd):
    print('\n' + '='*90)
    print('RUN:', ' '.join(cmd))
    print('='*90)
    subprocess.run(cmd, check=True, cwd=ROOT)

audio_ckpt = ROOT / "checkpoints" / "audio_best_100" / "best_model.pt"
video_ckpt = ROOT / "checkpoints" / "video_best_100" / "best_model.pt"
fusion_ckpt = ROOT / "checkpoints" / "fusion_best_100" / "best_model.pt"

print('Expected outputs:')
print(' -', audio_ckpt)
print(' -', video_ckpt)
print(' -', fusion_ckpt)

Expected outputs:
 - /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
 - /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
 - /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/fusion_best_100/best_model.pt


## 0) Hyperparameter tuning (short trials before full training)

Run 15-epoch trials with random search to find the best hyperparameters for each model.
Results are stored in `tuning_results` dict and auto-applied to the full 100-epoch config.

In [19]:
import copy, random as _rnd, shutil, time, torch

TUNING_EPOCHS = 15          # short trials
TUNING_PATIENCE = 8         # early stop within each trial
N_AUDIO_TRIALS = 6
N_VIDEO_TRIALS = 6
N_FUSION_TRIALS = 6

tuning_results = {"audio": [], "video": [], "fusion": []}

def _sample_uniform(lo, hi, log=False):
    """Sample from [lo, hi] uniformly (or log-uniformly)."""
    if log:
        import math
        return math.exp(_rnd.uniform(math.log(lo), math.log(hi)))
    return _rnd.uniform(lo, hi)

def _sample_choice(options):
    return _rnd.choice(options)

def run_trial(module, config_overrides, trial_cfg, trial_name, extra_args=None):
    """Run a short training trial and return the best metric from checkpoint."""
    trial_dir = f"checkpoints/_tuning/{trial_name}"
    trial_cfg_path = ROOT / "configs" / f"_tuning_{trial_name}.json"

    # Deep copy & apply overrides
    tcfg = copy.deepcopy(cfg)
    tcfg["data_root"] = cfg["data_root"]
    tcfg["train_data_root"] = cfg.get("train_data_root", cfg["data_root"])

    for key_path, value in config_overrides.items():
        parts = key_path.split(".")
        d = tcfg
        for p in parts[:-1]:
            d = d.setdefault(p, {})
        d[parts[-1]] = value

    trial_cfg_path.write_text(json.dumps(tcfg, indent=2))

    cmd = [sys.executable, '-u', '-m', module, '--config', str(trial_cfg_path)]
    if extra_args:
        cmd.extend(extra_args)

    print(f"\n{'─'*70}")
    print(f"TRIAL: {trial_name}")
    print(f"  Params: {trial_cfg}")
    print(f"{'─'*70}")
    t0 = time.time()

    try:
        subprocess.run(cmd, check=True, cwd=ROOT)
    except subprocess.CalledProcessError:
        print(f"  TRIAL FAILED: {trial_name}")
        trial_cfg_path.unlink(missing_ok=True)
        return None

    elapsed = time.time() - t0
    best_ckpt = ROOT / trial_dir / "best_model.pt"
    if not best_ckpt.exists():
        print(f"  No best checkpoint produced for {trial_name}")
        trial_cfg_path.unlink(missing_ok=True)
        return None

    ckpt = torch.load(best_ckpt, map_location="cpu")
    # Audio uses val_f1, video/fusion use hackathon_score
    score = ckpt.get("hackathon_score", ckpt.get("val_f1", 0.0))
    epoch = ckpt.get("epoch", "?")
    print(f"  RESULT: score={score:.4f}  epoch={epoch}  time={elapsed:.0f}s")

    # Clean up trial config file
    trial_cfg_path.unlink(missing_ok=True)

    return {"name": trial_name, "score": float(score), "epoch": epoch,
            "params": trial_cfg, "elapsed": elapsed}

print(f"Tuning config: {TUNING_EPOCHS} epochs, patience {TUNING_PATIENCE}")
print(f"Trials: audio={N_AUDIO_TRIALS}, video={N_VIDEO_TRIALS}, fusion={N_FUSION_TRIALS}")

Tuning config: 15 epochs, patience 8
Trials: audio=6, video=6, fusion=6


### 0a) Audio tuning

In [None]:

# ── Round 2: refined search around best region ──
_rnd.seed(99)

for i in range(N_AUDIO_TRIALS):
    trial_params = {
        "lr":           _sample_uniform(3.5e-5, 7e-5, log=True),   # narrowed around best 4.9e-5
        "weight_decay": _sample_uniform(1.5e-5, 4.5e-5, log=True), # narrowed around best 2.9e-5
        "dropout":      _sample_choice([0.08, 0.10, 0.12]),         # 0.10 dominated, try slightly lower too
        "cwp":          _sample_uniform(0.60, 0.75),                # narrowed around best 0.67
        "bsp":          _sample_uniform(0.35, 0.45),                # higher bsp was better (0.39)
        "topk_pos":     _sample_choice([0.12, 0.15, 0.18]),         # 0.15 dominated, test neighbors
    }
    trial_name = (
        f"audio_tune2_{i:02d}_lr={trial_params['lr']:.1e}"
        f"_wd={trial_params['weight_decay']:.1e}"
        f"_do={trial_params['dropout']}"
    )
    overrides = {
        "audio.training.num_epochs":                        TUNING_EPOCHS,
        "audio.training.patience":                          TUNING_PATIENCE,
        "audio.training.lr":                                trial_params["lr"],
        "audio.training.weight_decay":                      trial_params["weight_decay"],
        "audio.model.dropout":                              trial_params["dropout"],
        "audio.training.sequence_mil.class_weight_power":   trial_params["cwp"],
        "audio.training.sequence_mil.balanced_sampler_power": trial_params["bsp"],
        "audio.training.sequence_mil.topk_ratio_pos":       trial_params["topk_pos"],
        "audio.training.sequence_mil.eval_pool_ratio":      trial_params["topk_pos"],
        "audio.training.checkpoint_dir":                    f"checkpoints/_tuning/{trial_name}",
    }
    result = run_trial("audio.run_audio", overrides, trial_params, trial_name)
    if result:
        tuning_results["audio"].append(result)

# Re-sort including round-1 results
tuning_results["audio"].sort(key=lambda r: r["score"], reverse=True)
print(f"\n{'='*70}")
print("AUDIO TUNING RESULTS — ALL ROUNDS (sorted by score):")
print(f"{'='*70}")
for r in tuning_results["audio"]:
    print(f"  {r['score']:.4f}  ep={r['epoch']}  {r['name']}  params={r['params']}")

if tuning_results["audio"]:
    best_audio = tuning_results["audio"][0]
    print(f"\nBEST AUDIO: {best_audio['score']:.4f} — {best_audio['params']}")
else:
    print("\nNo successful audio trials")



──────────────────────────────────────────────────────────────────────
TRIAL: audio_tune2_00_lr=4.6e-05_wd=1.9e-05_do=0.08
  Params: {'lr': 4.631029660038342e-05, 'weight_decay': 1.8687512932167e-05, 'dropout': 0.08, 'cwp': 0.6345394092594436, 'bsp': 0.363324440091757, 'topk_pos': 0.12}
──────────────────────────────────────────────────────────────────────
Device: cuda

Total weld files: 1551
File label distribution (multiclass): {'excessive_penetration': 259, 'good_weld': 731, 'excessive_convexity': 80, 'lack_of_fusion': 158, 'crater_cracks': 75, 'burnthrough': 169, 'overlap': 79}
Split strategy: stratified
Train welds: 1240 | Val welds: 311
File split stats | train={ burnthrough: 135 (10.9%), crater_cracks: 60 (4.8%), excessive_convexity: 64 (5.2%), excessive_penetration: 207 (16.7%), good_weld: 585 (47.2%), lack_of_fusion: 126 (10.2%), overlap: 63 (5.1%) } | val={ burnthrough: 34 (10.9%), crater_cracks: 15 (4.8%), excessive_convexity: 16 (5.1%), excessive_penetration: 52 (16.7%), g

Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.7908 | Val loss: 1.7713 | Val Macro F1: 0.0466 | LR: 3.09401e-05
New best model saved (val_f1=0.0466)

Epoch 2/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.3247 | Val loss: 1.1047 | Val Macro F1: 0.1494 | LR: 4.63103e-05
New best model saved (val_f1=0.1494)

Epoch 3/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.8461 | Val loss: 0.8171 | Val Macro F1: 0.3940 | LR: 4.63103e-05
New best model saved (val_f1=0.3940)

Epoch 4/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6415 | Val loss: 0.6428 | Val Macro F1: 0.4229 | LR: 4.63103e-05
New best model saved (val_f1=0.4229)

Epoch 5/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5252 | Val loss: 0.5705 | Val Macro F1: 0.4295 | LR: 4.63103e-05
New best model saved (val_f1=0.4295)

Epoch 6/15
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4513 | Val loss: 0.5146 | Val Macro F1: 0.4085 | LR: 4.63103e-05
No improvement for 1 epoch(s)

Epoch 7/15
----------------------------------------


Training MIL multiclass:  65%|██████▌   | 101/155 [00:02<00:01, 43.55it/s, loss=0.426]

### 0b) Video tuning

In [None]:
_rnd.seed(123)

for i in range(N_VIDEO_TRIALS):
    trial_params = {
        "lr":           _sample_uniform(7e-5, 1.5e-4, log=True),
        "weight_decay": _sample_uniform(3e-5, 1.5e-4, log=True),
        "dropout":      _sample_choice([0.10, 0.12, 0.14, 0.15, 0.18]),
        "cwp":          _sample_uniform(0.85, 1.15),
        "bsp":          _sample_uniform(0.2, 0.45),
    }
    trial_name = (
        f"video_tune_{i:02d}_lr={trial_params['lr']:.1e}"
        f"_wd={trial_params['weight_decay']:.1e}"
        f"_do={trial_params['dropout']}"
    )
    overrides = {
        "video_window.training.epochs":                 TUNING_EPOCHS,
        "video_window.training.patience":               TUNING_PATIENCE,
        "video_window.training.lr":                     trial_params["lr"],
        "video_window.training.weight_decay":           trial_params["weight_decay"],
        "video_window.model.dropout":                   trial_params["dropout"],
        "video_window.training.class_weight_power":     trial_params["cwp"],
        "video_window.training.balanced_sampler_power": trial_params["bsp"],
        "video_window.training.checkpoint_dir":         f"checkpoints/_tuning/{trial_name}",
        "video_window.training.checkpoint_path":        f"checkpoints/_tuning/{trial_name}/window_classifier.pth",
    }
    result = run_trial("video.run_video", overrides, trial_params, trial_name)
    if result:
        tuning_results["video"].append(result)

tuning_results["video"].sort(key=lambda r: r["score"], reverse=True)
print(f"\n{'='*70}")
print("VIDEO TUNING RESULTS (sorted by score):")
print(f"{'='*70}")
for r in tuning_results["video"]:
    print(f"  {r['score']:.4f}  ep={r['epoch']}  {r['name']}  params={r['params']}")

if tuning_results["video"]:
    best_video = tuning_results["video"][0]
    print(f"\nBEST VIDEO: {best_video['score']:.4f} — {best_video['params']}")
else:
    print("\nNo successful video trials")

### 0c) Apply best tuning results to full-training config

This cell updates the pipeline config with the best hyperparameters found above, then rewrites the config JSON for the 100-epoch runs. Fusion tuning runs *after* audio + video training (needs their checkpoints).

In [13]:
# ── Apply best audio params ──
if tuning_results["audio"]:
    ba = tuning_results["audio"][0]["params"]
    cfg["audio"]["training"]["lr"]            = ba["lr"]
    cfg["audio"]["training"]["weight_decay"]  = ba["weight_decay"]
    cfg["audio"]["model"]["dropout"]          = ba["dropout"]
    cfg["audio"]["training"]["sequence_mil"]["class_weight_power"]     = ba["cwp"]
    cfg["audio"]["training"]["sequence_mil"]["balanced_sampler_power"] = ba["bsp"]
    cfg["audio"]["training"]["sequence_mil"]["topk_ratio_pos"]        = ba["topk_pos"]
    cfg["audio"]["training"]["sequence_mil"]["eval_pool_ratio"]       = ba["topk_pos"]
    print(f"Applied best AUDIO params (tuning score {tuning_results['audio'][0]['score']:.4f}):")
    print(f"  lr={ba['lr']:.2e}  wd={ba['weight_decay']:.2e}  dropout={ba['dropout']}")
    print(f"  cwp={ba['cwp']:.3f}  bsp={ba['bsp']:.3f}  topk_pos={ba['topk_pos']}")
else:
    print("No audio tuning results — keeping default params")

# ── Apply best video params ──
if tuning_results["video"]:
    bv = tuning_results["video"][0]["params"]
    cfg["video_window"]["training"]["lr"]                     = bv["lr"]
    cfg["video_window"]["training"]["weight_decay"]           = bv["weight_decay"]
    cfg["video_window"]["model"]["dropout"]                   = bv["dropout"]
    cfg["video_window"]["training"]["class_weight_power"]     = bv["cwp"]
    cfg["video_window"]["training"]["balanced_sampler_power"] = bv["bsp"]
    print(f"\nApplied best VIDEO params (tuning score {tuning_results['video'][0]['score']:.4f}):")
    print(f"  lr={bv['lr']:.2e}  wd={bv['weight_decay']:.2e}  dropout={bv['dropout']}")
    print(f"  cwp={bv['cwp']:.3f}  bsp={bv['bsp']:.3f}")
else:
    print("\nNo video tuning results — keeping default params")

# ── Restore full-training settings ──
cfg["audio"]["training"]["num_epochs"]       = 100
cfg["audio"]["training"]["patience"]         = 30
cfg["audio"]["training"]["checkpoint_dir"]   = "checkpoints/audio_best_100"
cfg["video_window"]["training"]["epochs"]    = 100
cfg["video_window"]["training"]["patience"]  = 30
cfg["video_window"]["training"]["checkpoint_dir"]   = "checkpoints/video_best_100"
cfg["video_window"]["training"]["checkpoint_path"]  = "checkpoints/video_best_100/window_classifier.pth"

# ── Write each model's config to its OWN file (no git conflicts) ──
audio_cfg_path.write_text(json.dumps(cfg, indent=2))
video_cfg_path.write_text(json.dumps(cfg, indent=2))
print(f"\nUpdated configs:")
print(f"  audio  → {audio_cfg_path.name}")
print(f"  video  → {video_cfg_path.name}")
print("(fusion config will be merged before fusion training)")

Applied best AUDIO params (tuning score 0.8293):
  lr=4.17e-05  wd=2.28e-05  dropout=0.12
  cwp=0.705  bsp=0.359  topk_pos=0.18

No video tuning results — keeping default params

Updated config written: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/configs/master_config.best_100_pipeline.json


## 1) Train audio model (100 epochs)

In [14]:
run_cmd([
    sys.executable, '-u', '-m', 'audio.run_audio',
    '--config', str(audio_cfg_path),
])

assert audio_ckpt.exists(), f"Audio checkpoint not found: {audio_ckpt}"
print(f"Audio best checkpoint: {audio_ckpt}")


RUN: /home/alolli/miniconda3/envs/therness_env/bin/python -u -m audio.run_audio --config /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/configs/master_config.best_100_pipeline.json
Device: cuda

Total weld files: 1551
File label distribution (multiclass): {'excessive_penetration': 259, 'good_weld': 731, 'excessive_convexity': 80, 'lack_of_fusion': 158, 'crater_cracks': 75, 'burnthrough': 169, 'overlap': 79}
Split strategy: stratified
Train welds: 1240 | Val welds: 311
File split stats | train={ burnthrough: 135 (10.9%), crater_cracks: 60 (4.8%), excessive_convexity: 64 (5.2%), excessive_penetration: 207 (16.7%), good_weld: 585 (47.2%), lack_of_fusion: 126 (10.2%), overlap: 63 (5.1%) } | val={ burnthrough: 34 (10.9%), crater_cracks: 15 (4.8%), excessive_convexity: 16 (5.1%), excessive_penetration: 52 (16.7%), good_weld: 146 (46.9%), lack_of_fusion: 32 (10.3%), overlap: 16 (5.1%) }
Train files: 1240 | Val files: 311
Classes (7): {'burnthrough': 0, 'crater_cracks': 1, 'ex

Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.7846 | Val loss: 1.8934 | Val Macro F1: 0.0283 | LR: 4.16555e-06
New best model saved (val_f1=0.0283)

Epoch 2/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.7505 | Val loss: 1.8472 | Val Macro F1: 0.0280 | LR: 8.33109e-06
No improvement for 1 epoch(s)

Epoch 3/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.6918 | Val loss: 1.7117 | Val Macro F1: 0.0982 | LR: 1.24966e-05
New best model saved (val_f1=0.0982)

Epoch 4/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.5477 | Val loss: 1.4906 | Val Macro F1: 0.1350 | LR: 1.66622e-05
New best model saved (val_f1=0.1350)

Epoch 5/100
----------------------------------------


                                                                                     

Train loss: 1.3397 | Val loss: 1.2974 | Val Macro F1: 0.1775 | LR: 2.08277e-05
New best model saved (val_f1=0.1775)

Epoch 6/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                     

Train loss: 1.1326 | Val loss: 1.1502 | Val Macro F1: 0.1818 | LR: 2.49933e-05
New best model saved (val_f1=0.1818)

Epoch 7/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.9470 | Val loss: 0.9835 | Val Macro F1: 0.2508 | LR: 2.91588e-05
New best model saved (val_f1=0.2508)

Epoch 8/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.7961 | Val loss: 0.8529 | Val Macro F1: 0.3118 | LR: 3.33244e-05
New best model saved (val_f1=0.3118)

Epoch 9/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6639 | Val loss: 0.7468 | Val Macro F1: 0.3488 | LR: 3.74899e-05
New best model saved (val_f1=0.3488)

Epoch 10/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.6243 | Val loss: 0.7127 | Val Macro F1: 0.4328 | LR: 4.16555e-05
New best model saved (val_f1=0.4328)

Epoch 11/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5729 | Val loss: 0.6547 | Val Macro F1: 0.4677 | LR: 4.16555e-05
New best model saved (val_f1=0.4677)

Epoch 12/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5612 | Val loss: 0.7063 | Val Macro F1: 0.4984 | LR: 4.16555e-05
New best model saved (val_f1=0.4984)

Epoch 13/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.5044 | Val loss: 0.7376 | Val Macro F1: 0.3533 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 14/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.4800 | Val loss: 0.6055 | Val Macro F1: 0.5188 | LR: 4.16555e-05
New best model saved (val_f1=0.5188)

Epoch 15/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3796 | Val loss: 0.4277 | Val Macro F1: 0.5880 | LR: 4.16555e-05
New best model saved (val_f1=0.5880)

Epoch 16/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3224 | Val loss: 0.3653 | Val Macro F1: 0.6643 | LR: 4.16555e-05
New best model saved (val_f1=0.6643)

Epoch 17/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2868 | Val loss: 0.3631 | Val Macro F1: 0.6976 | LR: 4.16555e-05
New best model saved (val_f1=0.6976)

Epoch 18/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2438 | Val loss: 0.3094 | Val Macro F1: 0.7259 | LR: 4.16555e-05
New best model saved (val_f1=0.7259)

Epoch 19/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2429 | Val loss: 0.3373 | Val Macro F1: 0.7481 | LR: 4.16555e-05
New best model saved (val_f1=0.7481)

Epoch 20/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2435 | Val loss: 0.3396 | Val Macro F1: 0.6477 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 21/100
----------------------------------------


                                                                                      

Train loss: 0.2483 | Val loss: 0.4341 | Val Macro F1: 0.6398 | LR: 4.16555e-05
No improvement for 2 epoch(s)

Epoch 22/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2297 | Val loss: 0.3856 | Val Macro F1: 0.7937 | LR: 4.16555e-05
New best model saved (val_f1=0.7937)

Epoch 23/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2518 | Val loss: 0.3411 | Val Macro F1: 0.7513 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 24/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2439 | Val loss: 0.4091 | Val Macro F1: 0.6658 | LR: 4.16555e-05
No improvement for 2 epoch(s)

Epoch 25/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2587 | Val loss: 0.3886 | Val Macro F1: 0.8044 | LR: 4.16555e-05
New best model saved (val_f1=0.8044)

Epoch 26/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2412 | Val loss: 0.2793 | Val Macro F1: 0.8243 | LR: 4.16555e-05
New best model saved (val_f1=0.8243)

Epoch 27/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2646 | Val loss: 0.4005 | Val Macro F1: 0.4123 | LR: 4.16555e-05
No improvement for 1 epoch(s)

Epoch 28/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2570 | Val loss: 0.4665 | Val Macro F1: 0.6717 | LR: 4.16555e-05
No improvement for 2 epoch(s)

Epoch 29/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.3116 | Val loss: 0.4570 | Val Macro F1: 0.6264 | LR: 4.16555e-05
No improvement for 3 epoch(s)

Epoch 30/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.2195 | Val loss: 0.2316 | Val Macro F1: 0.8147 | LR: 4.16555e-05
No improvement for 4 epoch(s)

Epoch 31/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1604 | Val loss: 0.2328 | Val Macro F1: 0.8060 | LR: 4.16555e-05
No improvement for 5 epoch(s)

Epoch 32/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                      

Train loss: 0.1154 | Val loss: 0.1330 | Val Macro F1: 0.8744 | LR: 2.08277e-05
New best model saved (val_f1=0.8744)

Epoch 33/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0974 | Val loss: 0.1223 | Val Macro F1: 0.8826 | LR: 2.08277e-05
New best model saved (val_f1=0.8826)

Epoch 34/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0987 | Val loss: 0.1241 | Val Macro F1: 0.8857 | LR: 2.08277e-05
New best model saved (val_f1=0.8857)

Epoch 35/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0962 | Val loss: 0.1859 | Val Macro F1: 0.8532 | LR: 2.08277e-05
No improvement for 1 epoch(s)

Epoch 36/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0979 | Val loss: 0.1405 | Val Macro F1: 0.8659 | LR: 2.08277e-05
No improvement for 2 epoch(s)

Epoch 37/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0864 | Val loss: 0.1353 | Val Macro F1: 0.8906 | LR: 2.08277e-05
New best model saved (val_f1=0.8906)

Epoch 38/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0920 | Val loss: 0.1388 | Val Macro F1: 0.8944 | LR: 2.08277e-05
New best model saved (val_f1=0.8944)

Epoch 39/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0897 | Val loss: 0.1484 | Val Macro F1: 0.8523 | LR: 2.08277e-05
No improvement for 1 epoch(s)

Epoch 40/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0874 | Val loss: 0.2085 | Val Macro F1: 0.8708 | LR: 2.08277e-05
No improvement for 2 epoch(s)

Epoch 41/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0961 | Val loss: 0.1396 | Val Macro F1: 0.8400 | LR: 2.08277e-05
No improvement for 3 epoch(s)

Epoch 42/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0873 | Val loss: 0.2008 | Val Macro F1: 0.8249 | LR: 2.08277e-05
No improvement for 4 epoch(s)

Epoch 43/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0955 | Val loss: 0.1717 | Val Macro F1: 0.8498 | LR: 2.08277e-05
No improvement for 5 epoch(s)

Epoch 44/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0813 | Val loss: 0.1565 | Val Macro F1: 0.8855 | LR: 1.04139e-05
No improvement for 6 epoch(s)

Epoch 45/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0784 | Val loss: 0.1191 | Val Macro F1: 0.9141 | LR: 1.04139e-05
New best model saved (val_f1=0.9141)

Epoch 46/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0789 | Val loss: 0.1217 | Val Macro F1: 0.8946 | LR: 1.04139e-05
No improvement for 1 epoch(s)

Epoch 47/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0815 | Val loss: 0.1476 | Val Macro F1: 0.9128 | LR: 1.04139e-05
No improvement for 2 epoch(s)

Epoch 48/100
----------------------------------------


                                                                                       

Train loss: 0.0776 | Val loss: 0.1639 | Val Macro F1: 0.8131 | LR: 1.04139e-05
No improvement for 3 epoch(s)

Epoch 49/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0782 | Val loss: 0.1288 | Val Macro F1: 0.8888 | LR: 1.04139e-05
No improvement for 4 epoch(s)

Epoch 50/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0795 | Val loss: 0.1265 | Val Macro F1: 0.9002 | LR: 1.04139e-05
No improvement for 5 epoch(s)

Epoch 51/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0688 | Val loss: 0.1138 | Val Macro F1: 0.8890 | LR: 5.20693e-06
No improvement for 6 epoch(s)

Epoch 52/100
----------------------------------------


                                                                                       

Train loss: 0.0745 | Val loss: 0.1148 | Val Macro F1: 0.8869 | LR: 5.20693e-06
No improvement for 7 epoch(s)

Epoch 53/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0672 | Val loss: 0.1064 | Val Macro F1: 0.9142 | LR: 5.20693e-06
New best model saved (val_f1=0.9142)

Epoch 54/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0635 | Val loss: 0.1157 | Val Macro F1: 0.8860 | LR: 5.20693e-06
No improvement for 1 epoch(s)

Epoch 55/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0602 | Val loss: 0.1235 | Val Macro F1: 0.8944 | LR: 5.20693e-06
No improvement for 2 epoch(s)

Epoch 56/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0576 | Val loss: 0.1088 | Val Macro F1: 0.8917 | LR: 5.20693e-06
No improvement for 3 epoch(s)

Epoch 57/100
----------------------------------------


                                                                                       

Train loss: 0.0538 | Val loss: 0.0988 | Val Macro F1: 0.9194 | LR: 5.20693e-06
New best model saved (val_f1=0.9194)

Epoch 58/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0522 | Val loss: 0.0934 | Val Macro F1: 0.9051 | LR: 5.20693e-06
No improvement for 1 epoch(s)

Epoch 59/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0520 | Val loss: 0.1194 | Val Macro F1: 0.9328 | LR: 5.20693e-06
New best model saved (val_f1=0.9328)

Epoch 60/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0519 | Val loss: 0.0978 | Val Macro F1: 0.9043 | LR: 5.20693e-06
No improvement for 1 epoch(s)

Epoch 61/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0474 | Val loss: 0.1262 | Val Macro F1: 0.8168 | LR: 5.20693e-06
No improvement for 2 epoch(s)

Epoch 62/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0464 | Val loss: 0.1096 | Val Macro F1: 0.8913 | LR: 5.20693e-06
No improvement for 3 epoch(s)

Epoch 63/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0448 | Val loss: 0.1092 | Val Macro F1: 0.8854 | LR: 5.20693e-06
No improvement for 4 epoch(s)

Epoch 64/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0454 | Val loss: 0.1172 | Val Macro F1: 0.8590 | LR: 5.20693e-06
No improvement for 5 epoch(s)

Epoch 65/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0433 | Val loss: 0.0890 | Val Macro F1: 0.9272 | LR: 2.60347e-06
No improvement for 6 epoch(s)

Epoch 66/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0414 | Val loss: 0.0867 | Val Macro F1: 0.9162 | LR: 2.60347e-06
No improvement for 7 epoch(s)

Epoch 67/100
----------------------------------------


                                                                                       

Train loss: 0.0417 | Val loss: 0.0841 | Val Macro F1: 0.9072 | LR: 2.60347e-06
No improvement for 8 epoch(s)

Epoch 68/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0411 | Val loss: 0.1068 | Val Macro F1: 0.9258 | LR: 2.60347e-06
No improvement for 9 epoch(s)

Epoch 69/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0385 | Val loss: 0.0989 | Val Macro F1: 0.9183 | LR: 2.60347e-06
No improvement for 10 epoch(s)

Epoch 70/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0377 | Val loss: 0.0922 | Val Macro F1: 0.9157 | LR: 1.30173e-06
No improvement for 11 epoch(s)

Epoch 71/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0376 | Val loss: 0.0883 | Val Macro F1: 0.9267 | LR: 1.30173e-06
No improvement for 12 epoch(s)

Epoch 72/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0378 | Val loss: 0.0869 | Val Macro F1: 0.9138 | LR: 1.30173e-06
No improvement for 13 epoch(s)

Epoch 73/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0376 | Val loss: 0.0920 | Val Macro F1: 0.9090 | LR: 1.30173e-06
No improvement for 14 epoch(s)

Epoch 74/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0362 | Val loss: 0.0882 | Val Macro F1: 0.9120 | LR: 1.30173e-06
No improvement for 15 epoch(s)

Epoch 75/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0350 | Val loss: 0.0920 | Val Macro F1: 0.9081 | LR: 1e-06
No improvement for 16 epoch(s)

Epoch 76/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0345 | Val loss: 0.0918 | Val Macro F1: 0.8996 | LR: 1e-06
No improvement for 17 epoch(s)

Epoch 77/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0347 | Val loss: 0.0852 | Val Macro F1: 0.9123 | LR: 1e-06
No improvement for 18 epoch(s)

Epoch 78/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0365 | Val loss: 0.0841 | Val Macro F1: 0.9151 | LR: 1e-06
No improvement for 19 epoch(s)

Epoch 79/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0352 | Val loss: 0.0910 | Val Macro F1: 0.9079 | LR: 1e-06
No improvement for 20 epoch(s)

Epoch 80/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0379 | Val loss: 0.0879 | Val Macro F1: 0.9141 | LR: 1e-06
No improvement for 21 epoch(s)

Epoch 81/100
----------------------------------------


                                                                                       

Train loss: 0.0368 | Val loss: 0.0902 | Val Macro F1: 0.9169 | LR: 1e-06
No improvement for 22 epoch(s)

Epoch 82/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0365 | Val loss: 0.0910 | Val Macro F1: 0.9151 | LR: 1e-06
No improvement for 23 epoch(s)

Epoch 83/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0365 | Val loss: 0.0833 | Val Macro F1: 0.9223 | LR: 1e-06
No improvement for 24 epoch(s)

Epoch 84/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0358 | Val loss: 0.0952 | Val Macro F1: 0.9078 | LR: 1e-06
No improvement for 25 epoch(s)

Epoch 85/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0375 | Val loss: 0.0906 | Val Macro F1: 0.9158 | LR: 1e-06
No improvement for 26 epoch(s)

Epoch 86/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0375 | Val loss: 0.0984 | Val Macro F1: 0.9035 | LR: 1e-06
No improvement for 27 epoch(s)

Epoch 87/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0389 | Val loss: 0.0927 | Val Macro F1: 0.9048 | LR: 1e-06
No improvement for 28 epoch(s)

Epoch 88/100
----------------------------------------


Training MIL multiclass:   0%|          | 0/155 [00:00<?, ?it/s]                       

Train loss: 0.0406 | Val loss: 0.0949 | Val Macro F1: 0.9080 | LR: 1e-06
No improvement for 29 epoch(s)

Epoch 89/100
----------------------------------------


                                                                                       

Train loss: 0.0392 | Val loss: 0.0881 | Val Macro F1: 0.9230 | LR: 1e-06
No improvement for 30 epoch(s)

Early stopping (patience) at epoch 89

Training complete. Best epoch: 59 (val_f1=0.9328)

Best epoch: 59
Train losses: ['1.7846', '1.7505', '1.6918', '1.5477', '1.3397', '1.1326', '0.9470', '0.7961', '0.6639', '0.6243', '0.5729', '0.5612', '0.5044', '0.4800', '0.3796', '0.3224', '0.2868', '0.2438', '0.2429', '0.2435', '0.2483', '0.2297', '0.2518', '0.2439', '0.2587', '0.2412', '0.2646', '0.2570', '0.3116', '0.2195', '0.1604', '0.1154', '0.0974', '0.0987', '0.0962', '0.0979', '0.0864', '0.0920', '0.0897', '0.0874', '0.0961', '0.0873', '0.0955', '0.0813', '0.0784', '0.0789', '0.0815', '0.0776', '0.0782', '0.0795', '0.0688', '0.0745', '0.0672', '0.0635', '0.0602', '0.0576', '0.0538', '0.0522', '0.0520', '0.0519', '0.0474', '0.0464', '0.0448', '0.0454', '0.0433', '0.0414', '0.0417', '0.0411', '0.0385', '0.0377', '0.0376', '0.0378', '0.0376', '0.0362', '0.0350', '0.0345', '0.0347', '0.03

## 2) Train video model (100 epochs)

In [15]:
run_cmd([
    sys.executable, '-u', '-m', 'video.run_video',
    '--config', str(video_cfg_path),
])

assert video_ckpt.exists(), f"Video checkpoint not found: {video_ckpt}"
print(f"Video best checkpoint: {video_ckpt}")


RUN: /home/alolli/miniconda3/envs/therness_env/bin/python -u -m video.run_video --config /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/configs/master_config.best_100_pipeline.json
Device: cuda

Discovering video files in /data1/malto/therness/data/Hackathon...
       Scanning good_weld...
       Scanning defect-weld...
Found 1551 videos
  Code 00 (class 0): 731 videos
  Code 01 (class 1): 259 videos
  Code 02 (class 2): 169 videos
  Code 06 (class 3): 79 videos
  Code 07 (class 4): 158 videos
  Code 08 (class 5): 80 videos
  Code 11 (class 6): 75 videos
Train: 1268 videos | Val: 283 videos
Using pre-extracted frames from data/video_frames
Train: 1268 | Val: 283 | num_frames=12 [JPEG]
Balanced sampler: enabled (power=0.3, videos/epoch=1268)
Model parameters: 190,759
Class weights: ['0.150', '0.476', '0.718', '1.606', '0.740', '1.895', '1.414']
Config saved to checkpoints/video_best_100/config.json

  TRAINING START — 100 epochs
  Checkpoint dir: checkpoints/video_best_

Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9901 | Train F1: 0.0228 | Val loss: 2.0306 | Val Macro F1: 0.0274 | Val Binary F1: 0.7906 | Hackathon: 0.4853 | LR: 1e-05
New best model (hackathon_score=0.4853)

Epoch 2/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9629 | Train F1: 0.0209 | Val loss: 2.0754 | Val Macro F1: 0.0274 | Val Binary F1: 0.7906 | Hackathon: 0.4853 | LR: 2e-05
No improvement for 1 epoch(s)

Epoch 3/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9433 | Train F1: 0.0351 | Val loss: 2.0454 | Val Macro F1: 0.0275 | Val Binary F1: 0.7906 | Hackathon: 0.4853 | LR: 3e-05
New best model (hackathon_score=0.4853)

Epoch 4/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.9468 | Train F1: 0.0416 | Val loss: 2.0123 | Val Macro F1: 0.0843 | Val Binary F1: 0.7906 | Hackathon: 0.5081 | LR: 4e-05
New best model (hackathon_score=0.5081)

Epoch 5/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8944 | Train F1: 0.0700 | Val loss: 1.9724 | Val Macro F1: 0.1413 | Val Binary F1: 0.7906 | Hackathon: 0.5309 | LR: 5e-05
New best model (hackathon_score=0.5309)

Epoch 6/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8735 | Train F1: 0.0904 | Val loss: 1.9130 | Val Macro F1: 0.1253 | Val Binary F1: 0.7906 | Hackathon: 0.5245 | LR: 6e-05
No improvement for 1 epoch(s)

Epoch 7/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.8011 | Train F1: 0.1218 | Val loss: 1.8554 | Val Macro F1: 0.1143 | Val Binary F1: 0.7906 | Hackathon: 0.5201 | LR: 7e-05
No improvement for 2 epoch(s)

Epoch 8/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7516 | Train F1: 0.1338 | Val loss: 1.8163 | Val Macro F1: 0.2428 | Val Binary F1: 0.7906 | Hackathon: 0.5715 | LR: 8e-05
New best model (hackathon_score=0.5715)

Epoch 9/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.7083 | Train F1: 0.1338 | Val loss: 1.7213 | Val Macro F1: 0.1098 | Val Binary F1: 0.7906 | Hackathon: 0.5183 | LR: 9e-05
No improvement for 1 epoch(s)

Epoch 10/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6872 | Train F1: 0.1158 | Val loss: 1.6883 | Val Macro F1: 0.2520 | Val Binary F1: 0.7906 | Hackathon: 0.5752 | LR: 0.0001
New best model (hackathon_score=0.5752)

Epoch 11/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6539 | Train F1: 0.1998 | Val loss: 1.6613 | Val Macro F1: 0.2794 | Val Binary F1: 0.7906 | Hackathon: 0.5861 | LR: 0.0001
New best model (hackathon_score=0.5861)

Epoch 12/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6145 | Train F1: 0.2278 | Val loss: 1.6266 | Val Macro F1: 0.3585 | Val Binary F1: 0.7906 | Hackathon: 0.6177 | LR: 0.0001
New best model (hackathon_score=0.6177)

Epoch 13/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.6028 | Train F1: 0.2013 | Val loss: 1.5987 | Val Macro F1: 0.4184 | Val Binary F1: 0.7906 | Hackathon: 0.6417 | LR: 0.0001
New best model (hackathon_score=0.6417)

Epoch 14/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5791 | Train F1: 0.2769 | Val loss: 1.6395 | Val Macro F1: 0.2597 | Val Binary F1: 0.7880 | Hackathon: 0.5767 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 15/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.5526 | Train F1: 0.2732 | Val loss: 1.6261 | Val Macro F1: 0.2429 | Val Binary F1: 0.7906 | Hackathon: 0.5715 | LR: 0.0001
No improvement for 2 epoch(s)

Epoch 16/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4923 | Train F1: 0.3904 | Val loss: 1.6573 | Val Macro F1: 0.3763 | Val Binary F1: 0.7906 | Hackathon: 0.6249 | LR: 0.0001
No improvement for 3 epoch(s)

Epoch 17/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.4500 | Train F1: 0.4059 | Val loss: 1.5709 | Val Macro F1: 0.1482 | Val Binary F1: 0.7906 | Hackathon: 0.5336 | LR: 0.0001
No improvement for 4 epoch(s)

Epoch 18/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3790 | Train F1: 0.4428 | Val loss: 1.4506 | Val Macro F1: 0.4889 | Val Binary F1: 0.7906 | Hackathon: 0.6699 | LR: 0.0001
New best model (hackathon_score=0.6699)

Epoch 19/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3741 | Train F1: 0.4186 | Val loss: 1.5910 | Val Macro F1: 0.2341 | Val Binary F1: 0.7906 | Hackathon: 0.5680 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 20/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3566 | Train F1: 0.4589 | Val loss: 1.4706 | Val Macro F1: 0.4011 | Val Binary F1: 0.7897 | Hackathon: 0.6343 | LR: 0.0001
No improvement for 2 epoch(s)

Epoch 21/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3143 | Train F1: 0.4462 | Val loss: 1.5286 | Val Macro F1: 0.5092 | Val Binary F1: 0.8277 | Hackathon: 0.7003 | LR: 0.0001
New best model (hackathon_score=0.7003)

Epoch 22/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.3054 | Train F1: 0.4459 | Val loss: 1.5022 | Val Macro F1: 0.6158 | Val Binary F1: 0.8371 | Hackathon: 0.7486 | LR: 0.0001
New best model (hackathon_score=0.7486)

Epoch 23/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2718 | Train F1: 0.4778 | Val loss: 1.5041 | Val Macro F1: 0.3660 | Val Binary F1: 0.7906 | Hackathon: 0.6208 | LR: 0.0001
No improvement for 1 epoch(s)

Epoch 24/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2235 | Train F1: 0.4924 | Val loss: 1.4500 | Val Macro F1: 0.5306 | Val Binary F1: 0.8204 | Hackathon: 0.7045 | LR: 5e-05
No improvement for 2 epoch(s)

Epoch 25/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1998 | Train F1: 0.5212 | Val loss: 1.3822 | Val Macro F1: 0.4464 | Val Binary F1: 0.7906 | Hackathon: 0.6529 | LR: 5e-05
No improvement for 3 epoch(s)

Epoch 26/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1586 | Train F1: 0.5278 | Val loss: 1.3938 | Val Macro F1: 0.4583 | Val Binary F1: 0.7906 | Hackathon: 0.6577 | LR: 5e-05
No improvement for 4 epoch(s)

Epoch 27/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1910 | Train F1: 0.5149 | Val loss: 1.3760 | Val Macro F1: 0.3733 | Val Binary F1: 0.7906 | Hackathon: 0.6237 | LR: 5e-05
No improvement for 5 epoch(s)

Epoch 28/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2094 | Train F1: 0.5062 | Val loss: 1.3713 | Val Macro F1: 0.4318 | Val Binary F1: 0.7906 | Hackathon: 0.6471 | LR: 5e-05
No improvement for 6 epoch(s)

Epoch 29/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1843 | Train F1: 0.5214 | Val loss: 1.5294 | Val Macro F1: 0.3547 | Val Binary F1: 0.7906 | Hackathon: 0.6162 | LR: 5e-05
No improvement for 7 epoch(s)

Epoch 30/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1879 | Train F1: 0.5269 | Val loss: 1.4756 | Val Macro F1: 0.4675 | Val Binary F1: 0.7906 | Hackathon: 0.6614 | LR: 5e-05
No improvement for 8 epoch(s)

Epoch 31/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2004 | Train F1: 0.5050 | Val loss: 1.4142 | Val Macro F1: 0.2786 | Val Binary F1: 0.7906 | Hackathon: 0.5858 | LR: 5e-05
No improvement for 9 epoch(s)

Epoch 32/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.2509 | Train F1: 0.4962 | Val loss: 1.5550 | Val Macro F1: 0.4198 | Val Binary F1: 0.7906 | Hackathon: 0.6423 | LR: 5e-05
No improvement for 10 epoch(s)

Epoch 33/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.1749 | Train F1: 0.4943 | Val loss: 1.5186 | Val Macro F1: 0.4604 | Val Binary F1: 0.7906 | Hackathon: 0.6585 | LR: 5e-05
No improvement for 11 epoch(s)

Epoch 34/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.0922 | Train F1: 0.5676 | Val loss: 1.3175 | Val Macro F1: 0.4991 | Val Binary F1: 0.7906 | Hackathon: 0.6740 | LR: 2.5e-05
No improvement for 12 epoch(s)

Epoch 35/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.0474 | Train F1: 0.5791 | Val loss: 1.2683 | Val Macro F1: 0.5687 | Val Binary F1: 0.7906 | Hackathon: 0.7018 | LR: 2.5e-05
No improvement for 13 epoch(s)

Epoch 36/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.0408 | Train F1: 0.5974 | Val loss: 1.2575 | Val Macro F1: 0.5280 | Val Binary F1: 0.7906 | Hackathon: 0.6856 | LR: 2.5e-05
No improvement for 14 epoch(s)

Epoch 37/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.0212 | Train F1: 0.6166 | Val loss: 1.2971 | Val Macro F1: 0.5824 | Val Binary F1: 0.7906 | Hackathon: 0.7073 | LR: 2.5e-05
No improvement for 15 epoch(s)

Epoch 38/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                    

Train loss: 1.0090 | Train F1: 0.5966 | Val loss: 1.2679 | Val Macro F1: 0.5238 | Val Binary F1: 0.7906 | Hackathon: 0.6839 | LR: 2.5e-05
No improvement for 16 epoch(s)

Epoch 39/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9661 | Train F1: 0.6308 | Val loss: 1.2456 | Val Macro F1: 0.5447 | Val Binary F1: 0.7906 | Hackathon: 0.6922 | LR: 2.5e-05
No improvement for 17 epoch(s)

Epoch 40/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9484 | Train F1: 0.6313 | Val loss: 1.2960 | Val Macro F1: 0.6051 | Val Binary F1: 0.7906 | Hackathon: 0.7164 | LR: 2.5e-05
No improvement for 18 epoch(s)

Epoch 41/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9488 | Train F1: 0.6283 | Val loss: 1.2824 | Val Macro F1: 0.4486 | Val Binary F1: 0.7906 | Hackathon: 0.6538 | LR: 2.5e-05
No improvement for 19 epoch(s)

Epoch 42/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9102 | Train F1: 0.6467 | Val loss: 1.2777 | Val Macro F1: 0.5176 | Val Binary F1: 0.7906 | Hackathon: 0.6814 | LR: 2.5e-05
No improvement for 20 epoch(s)

Epoch 43/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9272 | Train F1: 0.6416 | Val loss: 1.2485 | Val Macro F1: 0.5399 | Val Binary F1: 0.7906 | Hackathon: 0.6903 | LR: 2.5e-05
No improvement for 21 epoch(s)

Epoch 44/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9307 | Train F1: 0.6390 | Val loss: 1.6218 | Val Macro F1: 0.1750 | Val Binary F1: 0.7906 | Hackathon: 0.5443 | LR: 2.5e-05
No improvement for 22 epoch(s)

Epoch 45/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.9114 | Train F1: 0.6724 | Val loss: 1.2135 | Val Macro F1: 0.6278 | Val Binary F1: 0.7906 | Hackathon: 0.7255 | LR: 1.25e-05
No improvement for 23 epoch(s)

Epoch 46/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.8725 | Train F1: 0.6743 | Val loss: 1.2792 | Val Macro F1: 0.6812 | Val Binary F1: 0.7906 | Hackathon: 0.7468 | LR: 1.25e-05
No improvement for 24 epoch(s)

Epoch 47/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.8571 | Train F1: 0.6605 | Val loss: 1.1868 | Val Macro F1: 0.6368 | Val Binary F1: 0.7906 | Hackathon: 0.7291 | LR: 1.25e-05
No improvement for 25 epoch(s)

Epoch 48/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.8551 | Train F1: 0.6720 | Val loss: 1.2133 | Val Macro F1: 0.5848 | Val Binary F1: 0.7906 | Hackathon: 0.7083 | LR: 1.25e-05
No improvement for 26 epoch(s)

Epoch 49/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.8536 | Train F1: 0.6735 | Val loss: 1.1995 | Val Macro F1: 0.5505 | Val Binary F1: 0.7906 | Hackathon: 0.6946 | LR: 1.25e-05
No improvement for 27 epoch(s)

Epoch 50/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.8547 | Train F1: 0.6878 | Val loss: 1.2635 | Val Macro F1: 0.5743 | Val Binary F1: 0.7906 | Hackathon: 0.7041 | LR: 1.25e-05
No improvement for 28 epoch(s)

Epoch 51/100
----------------------------------------


Training:   0%|          | 0/40 [00:00<?, ?it/s]                     

Train loss: 0.8567 | Train F1: 0.6834 | Val loss: 1.2122 | Val Macro F1: 0.5746 | Val Binary F1: 0.7906 | Hackathon: 0.7042 | LR: 1.25e-05
No improvement for 29 epoch(s)

Epoch 52/100
----------------------------------------


                                                                     

Train loss: 0.8617 | Train F1: 0.6650 | Val loss: 1.2558 | Val Macro F1: 0.6734 | Val Binary F1: 0.7906 | Hackathon: 0.7437 | LR: 1.25e-05
No improvement for 30 epoch(s)

Early stopping (patience) at epoch 52

Training complete. Best epoch: 22 (score=0.7486)
Video best checkpoint: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt


### 0d) Merge audio + video configs → fusion config, then tune fusion

If your friend trained audio/video separately, `git pull` first so `audio_pipeline.json` and `video_pipeline.json` are up-to-date.

In [8]:
# ── Merge audio + video configs into fusion config ──
import copy as _copy

audio_saved = json.loads(audio_cfg_path.read_text())
video_saved = json.loads(video_cfg_path.read_text())

fusion_merged = _copy.deepcopy(audio_saved)
# Take video sections from video config (may have different tuned params)
fusion_merged["video_window"] = _copy.deepcopy(video_saved["video_window"])
# Keep fusion section from in-memory cfg (latest fusion defaults/tuning)
fusion_merged["fusion"] = _copy.deepcopy(cfg["fusion"])

fusion_cfg_path.write_text(json.dumps(fusion_merged, indent=2))
print(f"Merged config written → {fusion_cfg_path.name}")
print(f"  audio params from:  {audio_cfg_path.name}")
print(f"  video params from:  {video_cfg_path.name}")
print(f"  fusion params from: in-memory defaults")

# ── Fusion attention tuning ──
_rnd.seed(456)

assert audio_ckpt.exists(), f"Audio checkpoint needed for fusion tuning: {audio_ckpt}"
assert video_ckpt.exists(), f"Video checkpoint needed for fusion tuning: {video_ckpt}"

for i in range(N_FUSION_TRIALS):
    trial_params = {
        "lr":           _sample_uniform(5e-5, 2e-4, log=True),
        "weight_decay": _sample_uniform(5e-5, 2e-4, log=True),
        "dropout":      _sample_choice([0.15, 0.2, 0.25, 0.3]),
        "hidden_dim":   _sample_choice([128, 160, 192, 256]),
        "num_heads":    _sample_choice([2, 4]),
        "attn_layers":  _sample_choice([1, 2, 3]),
    }
    trial_name = (
        f"fusion_attn_{i:02d}_lr={trial_params['lr']:.1e}"
        f"_hd={trial_params['hidden_dim']}"
        f"_nh={trial_params['num_heads']}"
        f"_al={trial_params['attn_layers']}"
    )
    overrides = {
        "fusion.model.arch":                "attention",
        "fusion.training.num_epochs":       TUNING_EPOCHS,
        "fusion.training.patience":         TUNING_PATIENCE,
        "fusion.training.lr":               trial_params["lr"],
        "fusion.training.weight_decay":     trial_params["weight_decay"],
        "fusion.model.dropout":             trial_params["dropout"],
        "fusion.model.hidden_dim":          trial_params["hidden_dim"],
        "fusion.model.num_heads":           trial_params["num_heads"],
        "fusion.model.attn_layers":         trial_params["attn_layers"],
        "fusion.training.checkpoint_dir":   f"checkpoints/_tuning/{trial_name}",
    }
    result = run_trial(
        "fusion.run_fusion", overrides, trial_params, trial_name,
        extra_args=["--audio_checkpoint", str(audio_ckpt),
                     "--video_checkpoint", str(video_ckpt)],
    )
    if result:
        tuning_results["fusion"].append(result)

tuning_results["fusion"].sort(key=lambda r: r["score"], reverse=True)
print(f"\n{'='*70}")
print("FUSION TUNING RESULTS (sorted by score):")
print(f"{'='*70}")
for r in tuning_results["fusion"]:
    print(f"  {r['score']:.4f}  ep={r['epoch']}  {r['name']}  params={r['params']}")

if tuning_results["fusion"]:
    bf = tuning_results["fusion"][0]["params"]
    cfg["fusion"]["training"]["lr"]           = bf["lr"]
    cfg["fusion"]["training"]["weight_decay"] = bf["weight_decay"]
    cfg["fusion"]["model"]["dropout"]         = bf["dropout"]
    cfg["fusion"]["model"]["hidden_dim"]      = bf["hidden_dim"]
    cfg["fusion"]["model"]["num_heads"]       = bf["num_heads"]
    cfg["fusion"]["model"]["attn_layers"]     = bf["attn_layers"]
    # Restore full-training settings
    cfg["fusion"]["training"]["num_epochs"]     = 100
    cfg["fusion"]["training"]["patience"]       = 30
    cfg["fusion"]["training"]["checkpoint_dir"] = "checkpoints/fusion_best_100"

    # Update the merged fusion config with tuned params
    fusion_merged["fusion"] = _copy.deepcopy(cfg["fusion"])
    fusion_cfg_path.write_text(json.dumps(fusion_merged, indent=2))
    print(f"\nApplied best FUSION params (tuning score {tuning_results['fusion'][0]['score']:.4f}):")
    print(f"  lr={bf['lr']:.2e}  wd={bf['weight_decay']:.2e}  dropout={bf['dropout']}")
    print(f"  hidden_dim={bf['hidden_dim']}  num_heads={bf['num_heads']}  attn_layers={bf['attn_layers']}")
    print(f"Updated config: {fusion_cfg_path.name}")
else:
    # No tuning results — still write fusion config with defaults
    fusion_merged["fusion"] = _copy.deepcopy(cfg["fusion"])
    fusion_cfg_path.write_text(json.dumps(fusion_merged, indent=2))
    print("\nNo successful fusion trials — keeping default params")
    print(f"Wrote default fusion config: {fusion_cfg_path.name}")


──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_00_lr=1.4e-04_hd=256_nh=2_al=3
  Params: {'lr': 0.00014106939915668838, 'weight_decay': 0.00019094731280229325, 'dropout': 0.3, 'hidden_dim': 256, 'num_heads': 2, 'attn_layers': 3}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 0, 'crater_cracks': 1, 'excessive_convexity': 2, 'excessive_penetration': 3, 'good_weld': 4, 'lack_of_fusion': 5, 'overlap': 6}
Train: 1240 files | Val: 311 files

Extracting temporal audio sequences (T=12)...
  Train: torch.Size([1240, 12, 128]) | Val: torch.Size([311, 12, 128])
Building audio↔v

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.95]         

Train loss: 2.0115 | Train F1: 0.1228 | Val Macro F1: 0.0940 | Val Binary F1: 0.1461 | Hackathon: 0.1252
New best model (hackathon_score=0.1252)

Epoch 2/15
----------------------------------------
Train loss: 1.9363 | Train F1: 0.1255 | Val Macro F1: 0.0944 | Val Binary F1: 0.1667 | Hackathon: 0.1378
New best model (hackathon_score=0.1378)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.7]           

Train loss: 1.8837 | Train F1: 0.1534 | Val Macro F1: 0.0942 | Val Binary F1: 0.1564 | Hackathon: 0.1315
No improvement for 1 epoch(s)

Epoch 4/15
----------------------------------------
Train loss: 1.7770 | Train F1: 0.1985 | Val Macro F1: 0.2162 | Val Binary F1: 0.1356 | Hackathon: 0.1678
New best model (hackathon_score=0.1678)

Epoch 5/15
----------------------------------------


Training:  85%|████████▌ | 17/20 [00:00<00:00, 165.33it/s, loss=1.57]

Train loss: 1.6772 | Train F1: 0.2853 | Val Macro F1: 0.2326 | Val Binary F1: 0.1667 | Hackathon: 0.1931
New best model (hackathon_score=0.1931)

Epoch 6/15
----------------------------------------
Train loss: 1.5684 | Train F1: 0.4006 | Val Macro F1: 0.3132 | Val Binary F1: 0.2812 | Hackathon: 0.2940
New best model (hackathon_score=0.2940)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.36]          

Train loss: 1.4427 | Train F1: 0.5401 | Val Macro F1: 0.6466 | Val Binary F1: 0.6929 | Hackathon: 0.6744
New best model (hackathon_score=0.6744)

Epoch 8/15
----------------------------------------
Train loss: 1.3402 | Train F1: 0.6420 | Val Macro F1: 0.7989 | Val Binary F1: 0.7971 | Hackathon: 0.7978
New best model (hackathon_score=0.7978)

Epoch 9/15
----------------------------------------


                                                                     

Train loss: 1.0817 | Train F1: 0.8547 | Val Macro F1: 0.9527 | Val Binary F1: 0.9664 | Hackathon: 0.9609
New best model (hackathon_score=0.9609)

Target metric reached (0.9609 >= 0.95). Stopping at epoch 9

Training complete. Best epoch: 9 (score=0.9609)
  RESULT: score=0.9609  epoch=9  time=30s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_01_lr=1.4e-04_hd=192_nh=4_al=1
  Params: {'lr': 0.0001377235425660948, 'weight_decay': 0.0001739301417385073, 'dropout': 0.15, 'hidden_dim': 192, 'num_heads': 4, 'attn_layers': 1}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 0

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.75]        

Train loss: 1.8935 | Train F1: 0.1181 | Val Macro F1: 0.1642 | Val Binary F1: 0.7660 | Hackathon: 0.5252
New best model (hackathon_score=0.5252)

Epoch 2/15
----------------------------------------
Train loss: 1.8207 | Train F1: 0.1765 | Val Macro F1: 0.1442 | Val Binary F1: 0.4566 | Hackathon: 0.3317
No improvement for 1 epoch(s)

Epoch 3/15
----------------------------------------
Train loss: 1.7476 | Train F1: 0.2086 | Val Macro F1: 0.1509 | Val Binary F1: 0.3981 | Hackathon: 0.2992
No improvement for 2 epoch(s)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.61]

Train loss: 1.6806 | Train F1: 0.2730 | Val Macro F1: 0.1693 | Val Binary F1: 0.2812 | Hackathon: 0.2365
No improvement for 3 epoch(s)

Epoch 5/15
----------------------------------------
Train loss: 1.6161 | Train F1: 0.3291 | Val Macro F1: 0.2711 | Val Binary F1: 0.3902 | Hackathon: 0.3426
No improvement for 4 epoch(s)

Epoch 6/15
----------------------------------------
Train loss: 1.5657 | Train F1: 0.4190 | Val Macro F1: 0.3331 | Val Binary F1: 0.5333 | Hackathon: 0.4532
No improvement for 5 epoch(s)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.42]

Train loss: 1.5008 | Train F1: 0.4924 | Val Macro F1: 0.5625 | Val Binary F1: 0.6475 | Hackathon: 0.6135
New best model (hackathon_score=0.6135)

Epoch 8/15
----------------------------------------
Train loss: 1.4247 | Train F1: 0.5769 | Val Macro F1: 0.6553 | Val Binary F1: 0.7109 | Hackathon: 0.6887
New best model (hackathon_score=0.6887)

Epoch 9/15
----------------------------------------
Train loss: 1.2046 | Train F1: 0.7319 | Val Macro F1: 0.8714 | Val Binary F1: 0.8990 | Hackathon: 0.8880
New best model (hackathon_score=0.8880)

Epoch 10/15
----------------------------------------


                                                            

Train loss: 0.8210 | Train F1: 0.9144 | Val Macro F1: 0.9587 | Val Binary F1: 0.9536 | Hackathon: 0.9556
New best model (hackathon_score=0.9556)

Target metric reached (0.9556 >= 0.95). Stopping at epoch 10

Training complete. Best epoch: 10 (score=0.9556)
  RESULT: score=0.9556  epoch=10  time=31s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_02_lr=5.6e-05_hd=192_nh=2_al=1
  Params: {'lr': 5.620424750892037e-05, 'weight_decay': 0.0001311798207968172, 'dropout': 0.15, 'hidden_dim': 192, 'num_heads': 2, 'attn_layers': 1}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough'

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.83]        

Train loss: 1.9178 | Train F1: 0.0942 | Val Macro F1: 0.0665 | Val Binary F1: 0.6943 | Hackathon: 0.4432
New best model (hackathon_score=0.4432)

Epoch 2/15
----------------------------------------
Train loss: 1.8662 | Train F1: 0.1396 | Val Macro F1: 0.1657 | Val Binary F1: 0.7344 | Hackathon: 0.5069
New best model (hackathon_score=0.5069)

Epoch 3/15
----------------------------------------
Train loss: 1.8255 | Train F1: 0.1762 | Val Macro F1: 0.1328 | Val Binary F1: 0.5217 | Hackathon: 0.3662
No improvement for 1 epoch(s)

Epoch 4/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.79]

Train loss: 1.7830 | Train F1: 0.1906 | Val Macro F1: 0.1394 | Val Binary F1: 0.5217 | Hackathon: 0.3688
No improvement for 2 epoch(s)

Epoch 5/15
----------------------------------------
Train loss: 1.7582 | Train F1: 0.1801 | Val Macro F1: 0.1306 | Val Binary F1: 0.4131 | Hackathon: 0.3001
No improvement for 3 epoch(s)

Epoch 6/15
----------------------------------------
Train loss: 1.7135 | Train F1: 0.2415 | Val Macro F1: 0.1720 | Val Binary F1: 0.4706 | Hackathon: 0.3511
No improvement for 4 epoch(s)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.63]

Train loss: 1.6828 | Train F1: 0.2521 | Val Macro F1: 0.2064 | Val Binary F1: 0.4206 | Hackathon: 0.3349
No improvement for 5 epoch(s)

Epoch 8/15
----------------------------------------
Train loss: 1.6422 | Train F1: 0.2933 | Val Macro F1: 0.2091 | Val Binary F1: 0.3942 | Hackathon: 0.3202
No improvement for 6 epoch(s)

Epoch 9/15
----------------------------------------
Train loss: 1.6270 | Train F1: 0.3327 | Val Macro F1: 0.2655 | Val Binary F1: 0.4608 | Hackathon: 0.3827
No improvement for 7 epoch(s)

Epoch 10/15
----------------------------------------


                                                           

Train loss: 1.5627 | Train F1: 0.4113 | Val Macro F1: 0.3256 | Val Binary F1: 0.5045 | Hackathon: 0.4330
No improvement for 8 epoch(s)

Early stopping (patience) at epoch 10

Training complete. Best epoch: 2 (score=0.5069)
  RESULT: score=0.5069  epoch=2  time=30s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_03_lr=6.5e-05_hd=192_nh=2_al=3
  Params: {'lr': 6.507669643172911e-05, 'weight_decay': 0.0001376527180043546, 'dropout': 0.3, 'hidden_dim': 192, 'num_heads': 2, 'attn_layers': 3}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 0, 'crater_cracks': 1, 'excessive_

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.87]         

Train loss: 1.9262 | Train F1: 0.1638 | Val Macro F1: 0.2607 | Val Binary F1: 0.6566 | Hackathon: 0.4983
New best model (hackathon_score=0.4983)

Epoch 2/15
----------------------------------------
Train loss: 1.8793 | Train F1: 0.1873 | Val Macro F1: 0.2598 | Val Binary F1: 0.5000 | Hackathon: 0.4039
No improvement for 1 epoch(s)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.83]          

Train loss: 1.8336 | Train F1: 0.2130 | Val Macro F1: 0.2575 | Val Binary F1: 0.3500 | Hackathon: 0.3130
No improvement for 2 epoch(s)

Epoch 4/15
----------------------------------------
Train loss: 1.8250 | Train F1: 0.2032 | Val Macro F1: 0.2653 | Val Binary F1: 0.3902 | Hackathon: 0.3403
No improvement for 3 epoch(s)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.81]          

Train loss: 1.7776 | Train F1: 0.2412 | Val Macro F1: 0.2788 | Val Binary F1: 0.3500 | Hackathon: 0.3215
No improvement for 4 epoch(s)

Epoch 6/15
----------------------------------------
Train loss: 1.7538 | Train F1: 0.2542 | Val Macro F1: 0.3031 | Val Binary F1: 0.3902 | Hackathon: 0.3554
No improvement for 5 epoch(s)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.67]          

Train loss: 1.7136 | Train F1: 0.2685 | Val Macro F1: 0.2981 | Val Binary F1: 0.3582 | Hackathon: 0.3342
No improvement for 6 epoch(s)

Epoch 8/15
----------------------------------------
Train loss: 1.6563 | Train F1: 0.3205 | Val Macro F1: 0.3553 | Val Binary F1: 0.4651 | Hackathon: 0.4212
No improvement for 7 epoch(s)

Epoch 9/15
----------------------------------------


                                                                     

Train loss: 1.5708 | Train F1: 0.3819 | Val Macro F1: 0.3919 | Val Binary F1: 0.5652 | Hackathon: 0.4959
No improvement for 8 epoch(s)

Early stopping (patience) at epoch 9

Training complete. Best epoch: 1 (score=0.4983)
  RESULT: score=0.4983  epoch=1  time=31s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_04_lr=1.0e-04_hd=256_nh=2_al=2
  Params: {'lr': 0.00010470272294668067, 'weight_decay': 8.269509567573889e-05, 'dropout': 0.25, 'hidden_dim': 256, 'num_heads': 2, 'attn_layers': 2}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 0, 'crater_cracks': 1, 'excessive

  self.attn_encoder = nn.TransformerEncoder(
Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.75]        

Train loss: 1.9954 | Train F1: 0.1121 | Val Macro F1: 0.0931 | Val Binary F1: 0.1034 | Hackathon: 0.0993
New best model (hackathon_score=0.0993)

Epoch 2/15
----------------------------------------
Train loss: 1.8614 | Train F1: 0.1701 | Val Macro F1: 0.0937 | Val Binary F1: 0.1356 | Hackathon: 0.1189
New best model (hackathon_score=0.1189)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.57]

Train loss: 1.7301 | Train F1: 0.2285 | Val Macro F1: 0.1263 | Val Binary F1: 0.1768 | Hackathon: 0.1566
New best model (hackathon_score=0.1566)

Epoch 4/15
----------------------------------------
Train loss: 1.5642 | Train F1: 0.3346 | Val Macro F1: 0.3580 | Val Binary F1: 0.3725 | Hackathon: 0.3667
New best model (hackathon_score=0.3667)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.23]

Train loss: 1.3955 | Train F1: 0.5251 | Val Macro F1: 0.6958 | Val Binary F1: 0.7328 | Hackathon: 0.7180
New best model (hackathon_score=0.7180)

Epoch 6/15
----------------------------------------
Train loss: 1.1941 | Train F1: 0.7429 | Val Macro F1: 0.8089 | Val Binary F1: 0.8269 | Hackathon: 0.8197
New best model (hackathon_score=0.8197)

Epoch 7/15
----------------------------------------


                                                           

Train loss: 1.0057 | Train F1: 0.8635 | Val Macro F1: 0.9708 | Val Binary F1: 0.9702 | Hackathon: 0.9705
New best model (hackathon_score=0.9705)

Target metric reached (0.9705 >= 0.95). Stopping at epoch 7

Training complete. Best epoch: 7 (score=0.9705)
  RESULT: score=0.9705  epoch=7  time=30s

──────────────────────────────────────────────────────────────────────
TRIAL: fusion_attn_05_lr=1.5e-04_hd=160_nh=4_al=2
  Params: {'lr': 0.00015112725127357162, 'weight_decay': 5.9795266194611525e-05, 'dropout': 0.3, 'hidden_dim': 160, 'num_heads': 4, 'attn_layers': 2}
──────────────────────────────────────────────────────────────────────
Device: cuda
Loading audio backbone...
  Audio backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_best_100/best_model.pt
Loading video backbone...
  Video backbone loaded from /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/video_best_100/best_model.pt
Classes (7): {'burnthrough': 

  self.attn_encoder = nn.TransformerEncoder(
Validation:   0%|          | 0/5 [00:00<?, ?it/s]                    

Train loss: 1.9416 | Train F1: 0.1402 | Val Macro F1: 0.1712 | Val Binary F1: 0.2902 | Hackathon: 0.2426
New best model (hackathon_score=0.2426)

Epoch 2/15
----------------------------------------
Train loss: 1.7847 | Train F1: 0.2412 | Val Macro F1: 0.3028 | Val Binary F1: 0.3077 | Hackathon: 0.3057
New best model (hackathon_score=0.3057)

Epoch 3/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.52]          

Train loss: 1.6509 | Train F1: 0.3521 | Val Macro F1: 0.3897 | Val Binary F1: 0.4722 | Hackathon: 0.4392
New best model (hackathon_score=0.4392)

Epoch 4/15
----------------------------------------
Train loss: 1.4896 | Train F1: 0.4934 | Val Macro F1: 0.6523 | Val Binary F1: 0.6902 | Hackathon: 0.6750
New best model (hackathon_score=0.6750)

Epoch 5/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=1.21]          

Train loss: 1.3003 | Train F1: 0.6918 | Val Macro F1: 0.7939 | Val Binary F1: 0.8198 | Hackathon: 0.8094
New best model (hackathon_score=0.8094)

Epoch 6/15
----------------------------------------
Train loss: 1.1593 | Train F1: 0.7938 | Val Macro F1: 0.8975 | Val Binary F1: 0.8952 | Hackathon: 0.8961
New best model (hackathon_score=0.8961)

Epoch 7/15
----------------------------------------


Training:   0%|          | 0/20 [00:00<?, ?it/s, loss=0.966]         

Train loss: 1.0154 | Train F1: 0.8696 | Val Macro F1: 0.9346 | Val Binary F1: 0.9333 | Hackathon: 0.9338
New best model (hackathon_score=0.9338)

Epoch 8/15
----------------------------------------
Train loss: 0.8542 | Train F1: 0.9173 | Val Macro F1: 0.9487 | Val Binary F1: 0.9547 | Hackathon: 0.9523
New best model (hackathon_score=0.9523)

Target metric reached (0.9523 >= 0.95). Stopping at epoch 8

Training complete. Best epoch: 8 (score=0.9523)


                                                            

  RESULT: score=0.9523  epoch=8  time=30s

FUSION TUNING RESULTS (sorted by score):
  0.9705  ep=7  fusion_attn_04_lr=1.0e-04_hd=256_nh=2_al=2  params={'lr': 0.00010470272294668067, 'weight_decay': 8.269509567573889e-05, 'dropout': 0.25, 'hidden_dim': 256, 'num_heads': 2, 'attn_layers': 2}
  0.9609  ep=9  fusion_attn_00_lr=1.4e-04_hd=256_nh=2_al=3  params={'lr': 0.00014106939915668838, 'weight_decay': 0.00019094731280229325, 'dropout': 0.3, 'hidden_dim': 256, 'num_heads': 2, 'attn_layers': 3}
  0.9556  ep=10  fusion_attn_01_lr=1.4e-04_hd=192_nh=4_al=1  params={'lr': 0.0001377235425660948, 'weight_decay': 0.0001739301417385073, 'dropout': 0.15, 'hidden_dim': 192, 'num_heads': 4, 'attn_layers': 1}
  0.9523  ep=8  fusion_attn_05_lr=1.5e-04_hd=160_nh=4_al=2  params={'lr': 0.00015112725127357162, 'weight_decay': 5.9795266194611525e-05, 'dropout': 0.3, 'hidden_dim': 160, 'num_heads': 4, 'attn_layers': 2}
  0.5069  ep=2  fusion_attn_02_lr=5.6e-05_hd=192_nh=2_al=1  params={'lr': 5.620424750892

## 3) Train fusion model (100 epochs) with best audio+video and tuned params

In [None]:
run_cmd([
    sys.executable, '-u', '-m', 'fusion.run_fusion',
    '--config', str(fusion_cfg_path),
    '--audio_checkpoint', str(audio_ckpt),
    '--video_checkpoint', str(video_ckpt),
])

assert fusion_ckpt.exists(), f"Fusion checkpoint not found: {fusion_ckpt}"
print(f"Fusion best checkpoint: {fusion_ckpt}")

## 4) Optional: evaluate best fusion checkpoint

In [None]:
run_cmd([
    sys.executable, '-u', '-m', 'fusion.run_fusion',
    '--config', str(fusion_cfg_path),
    '--audio_checkpoint', str(audio_ckpt),
    '--video_checkpoint', str(video_ckpt),
    '--test_only',
    '--checkpoint', str(fusion_ckpt),
])