# Environement Setup

In [1]:
import sys, os
try:
    from google.colab import drive, userdata
    IS_COLAB = True
except ImportError:
    IS_COLAB = False

REPO_NAME = 'AML_Mistake_Detection'

if IS_COLAB:
    print("☁️ Colab rilevato.")
    if not os.path.exists('/content/drive'): drive.mount('/content/drive')
    
    GITHUB_USER = 'TUO_USER' # <--- SOSTITUISCI
    try:
        TOKEN = userdata.get('GITHUB_TOKEN')
        REPO_URL = f'https://{TOKEN}@github.com/{GITHUB_USER}/{REPO_NAME}.git'
    except:
        REPO_URL = f'https://github.com/{GITHUB_USER}/{REPO_NAME}.git'

    ROOT_DIR = f'/content/{REPO_NAME}'
    if not os.path.exists(ROOT_DIR):
        !git clone {REPO_URL}
    else:
        %cd {ROOT_DIR}
        !git pull
        %cd /content
else:
    print("Ambiente locale rilevato.")
    ROOT_DIR = os.getcwd()
    while not os.path.exists(os.path.join(ROOT_DIR, '.gitignore')) and ROOT_DIR != os.path.dirname(ROOT_DIR):
        ROOT_DIR = os.path.dirname(ROOT_DIR)

if ROOT_DIR not in sys.path: 
    sys.path.append(ROOT_DIR)

Ambiente locale rilevato.


# Dataset Setup

In [2]:
from utils.setup_project import initialize

# Esegue: Installazione, Setup Dati (unzip/copy), Login WandB, Setup Device
device = initialize(ROOT_DIR)

# Ora puoi passare agli import del modello
from dataset.loader import CaptainCook4D_Dataset
from models.BaselineV1_MLP import BaselineV1_MLP

Setup Progetto in: c:\Users\enric\Desktop\AML_Mistake_Detection
Setup Dati da: c:\Users\enric\Desktop\AML_Mistake_Detection\_file
Inizio setup dati...
   Sorgente: c:\Users\enric\Desktop\AML_Mistake_Detection\_file
   Destinazione: c:\Users\enric\Desktop\AML_Mistake_Detection\data
Copia cartella: annotation_json...
Estrazione ZIP: omnivore.zip...
✅ Setup completato! Dati pronti in: c:\Users\enric\Desktop\AML_Mistake_Detection\data
Device: cuda
✅ Setup completato! Dati pronti in: c:\Users\enric\Desktop\AML_Mistake_Detection\data
Device: cuda


# Dataset Split

In [3]:
import os
from dataset.loader import CaptainCook4D_Dataset, get_loaders

# Percorsi (usando ROOT_DIR dal bootstrap)
DATA_DIR = os.path.join(ROOT_DIR, 'data')
FEATURES_DIR = os.path.join(DATA_DIR, 'omnivore')
ANNOTATIONS_DIR = os.path.join(DATA_DIR, 'annotation_json')

try:
    print(f"Loading from: {FEATURES_DIR}...")
    full_dataset = CaptainCook4D_Dataset(FEATURES_DIR, ANNOTATIONS_DIR)
    train_loader, val_loader, test_loader = get_loaders(
        full_dataset, 
        batch_size=512, 
        seed=42
    )

except Exception as e:
    print(f"❌ Errore: {e}")

Loading from: c:\Users\enric\Desktop\AML_Mistake_Detection\data\omnivore...

DATASET INFO
   Shape: torch.Size([340320, 1024]) -> 340320 Campioni, 1024 Features
FULL DATASET       | Tot: 340320 | OK: 257978 (75.8%) | ERR: 82342 (24.2%) | Ratio: 1:3.1
-----------------------------------------------------------------
TRAIN SET          | Tot: 238224 | OK: 180594 (75.8%) | ERR: 57630 (24.2%) | Ratio: 1:3.1
VALIDATION SET     | Tot: 34032  | OK: 25725 (75.6%) | ERR: 8307  (24.4%) | Ratio: 1:3.1
TEST SET           | Tot: 68064  | OK: 51659 (75.9%) | ERR: 16405 (24.1%) | Ratio: 1:3.1


DATASET INFO
   Shape: torch.Size([340320, 1024]) -> 340320 Campioni, 1024 Features
FULL DATASET       | Tot: 340320 | OK: 257978 (75.8%) | ERR: 82342 (24.2%) | Ratio: 1:3.1
-----------------------------------------------------------------
TRAIN SET          | Tot: 238224 | OK: 180594 (75.8%) | ERR: 57630 (24.2%) | Ratio: 1:3.1
VALIDATION SET     | Tot: 34032  | OK: 25725 (75.6%) | ERR: 8307  (24.4%) | Ratio: 

In [4]:
import numpy as np
import os

def inspect_npz(npz_path, n_rows=5):
    """
    Mostra il contenuto di un file .npz.

    Args:
        npz_path (str): percorso del file .npz
        n_rows (int): numero di righe da stampare per ogni array
    """
    if not os.path.exists(npz_path):
        print(f"[ERROR] File non trovato: {npz_path}")
        return

    data = np.load(npz_path)
    print(f"File: {npz_path}")
    print("Chiavi presenti nel file:", list(data.keys()))

    for key in data.keys():
        arr = data[key]
        print(f"\nArray '{key}' - shape: {arr.shape}, dtype: {arr.dtype}")
        print(arr[:n_rows])  # stampa le prime n_rows righe


# Esempio di utilizzo
npz_file = os.path.join(ROOT_DIR, "data", "omnivore", "1_7_360p.mp4_1s_1s.npz")

# Esegui l'ispezione (se il file esiste)
if os.path.exists(npz_file):
    inspect_npz(npz_file)
else:
    print(f"⚠️ File di esempio non trovato in: {npz_file}")


File: c:\Users\enric\Desktop\AML_Mistake_Detection\data\omnivore\1_7_360p.mp4_1s_1s.npz
Chiavi presenti nel file: ['arr_0']

Array 'arr_0' - shape: (604, 1024), dtype: float32
[[ 0.6910985   0.09298898 -0.6608225  ... -0.75679165  1.2401273
  -0.5683658 ]
 [ 0.40254688 -0.4466254  -0.8645446  ... -1.2709565   0.7917245
  -0.5052321 ]
 [ 0.643613   -0.48683766 -0.88651866 ... -1.0358062   0.658605
  -0.27201462]
 [ 0.8338395  -0.51338077 -0.8236387  ... -0.8753807   0.51246065
  -0.5449421 ]
 [ 0.98503673 -0.4786031  -0.6167189  ... -1.0904019   0.94557023
  -0.4631019 ]]


# MLP (Version 1)

In [5]:
import torch
import torch.nn as nn

model = BaselineV1_MLP(1024).to(device)

In [6]:
lr = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr)

# Quanto pesa la classe “positiva” = classe "1" = classe "error":
# - CASO 1: rapporto effettivo del dataset
#train_pos_weight = train_cnt_0 / train_cnt_1

# - CASO 2: rapporto usato nel paper
train_pos_weight = 1.5

print(f"Peso classe positiva: {train_pos_weight}")
train_pos_weight = torch.tensor([train_pos_weight], device=device)

criterion = nn.BCEWithLogitsLoss(pos_weight=train_pos_weight)

epochs = 50

Peso classe positiva: 1.5


In [7]:
from sklearn.metrics import accuracy_score, f1_score

for epoch in range(epochs):

    # -------------------------
    #        TRAIN
    # -------------------------
    model.train()
    total_loss = 0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device).float()

        outputs = model(inputs)            # [B, 1]
        outputs = outputs.squeeze(1)       # [B]

        loss = criterion(outputs, labels)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    avg_train_loss = total_loss / len(train_loader)

    # -------------------------
    #        EVAL
    # -------------------------
    model.eval()
    total_val_loss = 0
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device).float()

            outputs = model(inputs).squeeze(1)  # logits

            # same loss as train
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()

            # convert logits → probabilities → binary predictions
            probs = torch.sigmoid(outputs)
            preds = (probs >= 0.5).long()

            all_preds.append(preds.cpu())
            all_targets.append(labels.cpu())

    # concat
    all_preds = torch.cat(all_preds).numpy()
    all_targets = torch.cat(all_targets).numpy()

    avg_val_loss = total_val_loss / len(test_loader)
    acc = accuracy_score(all_targets, all_preds)
    f1  = f1_score(all_targets, all_preds, zero_division=0)

    print(f"Epoch {epoch+1}/{epochs} "
          f"- Train Loss: {avg_train_loss:.4f} "
          f"- Val Loss: {avg_val_loss:.4f} "
          f"- Acc: {acc:.4f} "
          f"- F1: {f1:.4f}")


Epoch 1/50 - Train Loss: 0.6742 - Val Loss: 0.6473 - Acc: 0.7695 - F1: 0.2296
Epoch 2/50 - Train Loss: 0.6384 - Val Loss: 0.6208 - Acc: 0.7792 - F1: 0.3565
Epoch 3/50 - Train Loss: 0.6129 - Val Loss: 0.5986 - Acc: 0.7875 - F1: 0.3801
Epoch 4/50 - Train Loss: 0.5914 - Val Loss: 0.5797 - Acc: 0.7948 - F1: 0.4353
Epoch 5/50 - Train Loss: 0.5733 - Val Loss: 0.5626 - Acc: 0.8021 - F1: 0.4907
Epoch 6/50 - Train Loss: 0.5576 - Val Loss: 0.5481 - Acc: 0.8060 - F1: 0.4953
Epoch 7/50 - Train Loss: 0.5437 - Val Loss: 0.5364 - Acc: 0.8110 - F1: 0.5333
Epoch 8/50 - Train Loss: 0.5307 - Val Loss: 0.5274 - Acc: 0.8164 - F1: 0.5175
Epoch 9/50 - Train Loss: 0.5202 - Val Loss: 0.5155 - Acc: 0.8177 - F1: 0.5556


KeyboardInterrupt: 