# Audio Training in 5 Cells (Single Config File)
This notebook uses only one config: `configs/master_config.json`.

Rules:
- No extra config files are created.
- Training/eval/export always read from `configs/master_config.json`.
- To change parameters, edit that one file directly, then rerun cells 2-5.

Decision rule:
- Accept model if `F1_DEFECT >= 0.70` and `RECALL_DEFECT >= 0.80` on your evaluation script.
- Otherwise improve data quality/split policy first, then revisit parameters.

In [None]:
import json
import os
import shlex
import subprocess
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd()
PYTHON_BIN = sys.executable
RUN_CFG = Path('configs/master_config.json')

def run_live(cmd, cwd=None):
    cmd = [str(c) for c in cmd]
    print('$ ' + ' '.join(shlex.quote(c) for c in cmd))
    env = os.environ.copy()
    env['PYTHONUNBUFFERED'] = '1'
    p = subprocess.Popen(
        cmd,
        cwd=str(cwd or PROJECT_ROOT),
        env=env,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
    )
    assert p.stdout is not None
    for line in p.stdout:
        print(line, end='')
    rc = p.wait()
    print(f'\n[exit_code={rc}]')
    if rc != 0:
        raise subprocess.CalledProcessError(rc, cmd)

with open(RUN_CFG) as f:
    cfg = json.load(f)

t = cfg['audio']['training']
mil = t.get('sequence_mil', {})

print(f'Python: {PYTHON_BIN}')
print(f'Using single config: {RUN_CFG.resolve()}')
print('Current params from master_config.json:')
print({
    'task': t.get('task'),
    'train_fraction': t.get('train_fraction'),
    'num_epochs': t.get('num_epochs'),
    'patience': t.get('patience'),
    'lr': t.get('lr'),
    'checkpoint_dir': t.get('checkpoint_dir'),
    'mil_enabled': mil.get('enabled'),
    'topk_ratio_pos': mil.get('topk_ratio_pos'),
    'topk_ratio_neg': mil.get('topk_ratio_neg'),
    'eval_pool_ratio': mil.get('eval_pool_ratio'),
    'auto_threshold': mil.get('auto_threshold'),
    'threshold': mil.get('threshold'),
    'good_window_weight': mil.get('good_window_weight'),
})

Python: /home/alolli/miniconda3/envs/therness_env/bin/python
Run config: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/configs/master_config_5cells.json
Fixed params (no tuning):
{'lr': 0.0003, 'topk_ratio_pos': 0.05, 'topk_ratio_neg': 0.2, 'eval_pool_ratio': 0.05, 'good_window_weight': 0.25, 'auto_threshold': True, 'patience': 10, 'checkpoint_dir': 'checkpoints/audio_5cells'}


In [2]:
train_cmd = [PYTHON_BIN, '-u', '-m', 'audio.run_audio', '--config', str(RUN_CFG)]
run_live(train_cmd)

$ /home/alolli/miniconda3/envs/therness_env/bin/python -u -m audio.run_audio --config configs/master_config_5cells.json
Device: cuda

Total weld files: 1163
File label distribution (binary): {'defect': 617, 'good_weld': 546}
Split strategy: stratified
Train welds: 930 | Val welds: 233
File split stats | train={ defect: 493 (53.0%), good_weld: 437 (47.0%) } | val={ defect: 124 (53.2%), good_weld: 109 (46.8%) }
Train files: 930 | Val files: 233
Classes (2): {'defect': 0, 'good_weld': 1}
DataLoader stats | train_batches=117 | val_batches=30 | batch_size=8
MIL mode enabled | topk_ratio_pos=0.05 | topk_ratio_neg=0.2 | eval_pool_ratio=0.05 | auto_threshold=True | threshold=0.5 | good_window_weight=0.25
Model parameters: 185,698
LR schedule: base_lr=0.0003 | warmup_steps=526/5265 (10.0%) | plateau_factor=0.5 | plateau_patience=3
Config saved to checkpoints/audio_5cells/config.json

Epoch 1/45
----------------------------------------

Training (MIL):   0%|          | 0/117 [00:00<?, ?it/s]
Tra

In [None]:
import torch

ckpt_dir = Path(json.load(open(RUN_CFG))['audio']['training']['checkpoint_dir'])
best_ckpt = ckpt_dir / 'best_model.pt'
print(f'Best checkpoint: {best_ckpt.resolve()}')

if not best_ckpt.exists():
    raise FileNotFoundError(f'best_model.pt not found in {ckpt_dir}')

test_cmd = [
    PYTHON_BIN, '-u', '-m', 'audio.run_audio',
    '--config', str(RUN_CFG),
    '--test_only',
    '--checkpoint', str(best_ckpt),
]
run_live(test_cmd)

try:
    ck = torch.load(best_ckpt, map_location='cpu', weights_only=True)
except TypeError:
    ck = torch.load(best_ckpt, map_location='cpu')

print('--- Best checkpoint stats (single-config run) ---')
print({
    'best_epoch': ck.get('epoch'),
    'val_f1': ck.get('val_f1'),
    'val_auc': ck.get('val_auc'),
    'threshold': ck.get('threshold'),
})

Best checkpoint: /home/alolli/src/malto/hackathon/therness-hackaton-2026-polito/checkpoints/audio_5cells/best_model.pt
$ /home/alolli/miniconda3/envs/therness_env/bin/python -u -m audio.run_audio --config configs/master_config_5cells.json --test_only --checkpoint checkpoints/audio_5cells/best_model.pt
Device: cuda

Total weld files: 1163
File label distribution (binary): {'defect': 617, 'good_weld': 546}
Split strategy: stratified
Train welds: 930 | Val welds: 233
File split stats | train={ defect: 493 (53.0%), good_weld: 437 (47.0%) } | val={ defect: 124 (53.2%), good_weld: 109 (46.8%) }
Train files: 930 | Val files: 233
Classes (2): {'defect': 0, 'good_weld': 1}
DataLoader stats | train_batches=117 | val_batches=30 | batch_size=8
MIL mode enabled | topk_ratio_pos=0.05 | topk_ratio_neg=0.2 | eval_pool_ratio=0.05 | auto_threshold=True | threshold=0.5 | good_window_weight=0.25
Model parameters: 185,698
LR schedule: base_lr=0.0003 | warmup_steps=526/5265 (10.0%) | plateau_factor=0.5 | pl

In [None]:
cfg = json.load(open(RUN_CFG))
ckpt_dir = Path(cfg['audio']['training']['checkpoint_dir'])
best_ckpt = ckpt_dir / 'best_model.pt'
deploy_pt = ckpt_dir / 'deploy_single_label.pt'

export_cmd = [
    PYTHON_BIN, '-u', '-m', 'audio.export_deploy_pt',
    '--checkpoint', str(best_ckpt),
    '--output', str(deploy_pt),
]
run_live(export_cmd)

print(f'Deploy artifact ready: {deploy_pt.resolve()}')
print('Optional final check (same as before):')
print('/home/alolli/miniconda3/envs/therness_env/bin/python /tmp/eval_deploy_dataset.py')