# Environement Setup

In [1]:
import sys, os
try:
    from google.colab import drive, userdata
    IS_COLAB = True
except ImportError:
    IS_COLAB = False

REPO_NAME = 'MistakeDetection'

if IS_COLAB:
    print("‚òÅÔ∏è Colab rilevato.")
    if not os.path.exists('/content/drive'): drive.mount('/content/drive')

    GITHUB_USER = 'MarcoPernoVDP'
    try:
        TOKEN = userdata.get('GITHUB_TOKEN')
        REPO_URL = f'https://{TOKEN}@github.com/{GITHUB_USER}/{REPO_NAME}.git'
    except:
        REPO_URL = f'https://github.com/{GITHUB_USER}/{REPO_NAME}.git'

    ROOT_DIR = f'/content/{REPO_NAME}'
    if not os.path.exists(ROOT_DIR):
        !git clone {REPO_URL}
    else:
        %cd {ROOT_DIR}
        !git pull
        %cd /content


else:
    print("Ambiente locale rilevato.")
    ROOT_DIR = os.getcwd()
    while not os.path.exists(os.path.join(ROOT_DIR, '.gitignore')) and ROOT_DIR != os.path.dirname(ROOT_DIR):
        ROOT_DIR = os.path.dirname(ROOT_DIR)

if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)


‚òÅÔ∏è Colab rilevato.
Mounted at /content/drive
Cloning into 'MistakeDetection'...
remote: Enumerating objects: 400, done.[K
remote: Counting objects: 100% (185/185), done.[K
remote: Compressing objects: 100% (130/130), done.[K
remote: Total 400 (delta 93), reused 136 (delta 49), pack-reused 215 (from 1)[K
Receiving objects: 100% (400/400), 50.02 MiB | 24.64 MiB/s, done.
Resolving deltas: 100% (174/174), done.
Updating files: 100% (59/59), done.


# Dataset Setup

In [2]:
from utils import setup_project
# Ora puoi passare agli import del modello
from dataset.utils import SplitType
from dataset.capitain_cook_4d_mlp_dataset import CaptainCook4DMLP_Dataset, DatasetSource
from models.BaselineV1_MLP import BaselineV1_MLP

# Esegue: Setup Dati (unzip/copy), Login WandB, Setup Device
device = setup_project.initialize(ROOT_DIR)
# Import wandb
import wandb

Setup Progetto in: /content/MistakeDetection
source_path: /content/drive/MyDrive/MistakeDetection
Setup Dati da: /content/drive/MyDrive/MistakeDetection
Inizio setup dati...
   Sorgente: /content/drive/MyDrive/MistakeDetection
   Destinazione: /content/MistakeDetection/data
Estrazione ZIP: omnivore.zip...
Copia cartella: annotation_json...
Estrazione ZIP: slowfast.zip...
Estrazione ZIP: 3dresnet.zip...
Estrazione ZIP: x3d.zip...
Estrazione ZIP: omnivore_test.zip...
‚úÖ Setup completato! Dati pronti in: /content/MistakeDetection/data


  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33ms339450[0m ([33ms339450-politecnico-di-torino[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


WandB Logged in.
Device: cuda


# Configuration

In [3]:
# Configurazione esperimento
DATASET_SOURCE = DatasetSource.OMNIVORE
SPLIT_TYPE = SplitType.VIDEO_ID

config = {
    "architecture": "BaselineV1_MLP_" + DATASET_SOURCE.value + "_" + SPLIT_TYPE.value,
    "dataset": "CaptainCook4D",
    "feature_extractor": DATASET_SOURCE.value,
    "input_dim": DATASET_SOURCE.input_dims(),
    "batch_size": 512,
    "learning_rate": 0.001,
    "epochs": 50,
    "pos_weight": 1.5,
    "optimizer": "Adam",
    "loss_function": "BCEWithLogitsLoss",
    "seed": 42,
    "split_type": SPLIT_TYPE.value
}

# Dataset Split

In [14]:
import os
from dataset.capitain_cook_4d_mlp_dataset import CaptainCook4DMLP_Dataset, DatasetSource
from dataset.utils import get_mlp_loaders

try:
    full_dataset = CaptainCook4DMLP_Dataset(dataset_source=DATASET_SOURCE, root_dir=ROOT_DIR)
    train_loader, val_loader, test_loader = get_mlp_loaders(
        full_dataset,
        batch_size=config["batch_size"],
        seed=config["seed"],
        split_type=SPLIT_TYPE,
        test_ratio=0
    )

except Exception as e:
    print(f"‚ùå Errore: {e}")

Loading from: c:\Users\marco\Desktop\Marco\Programmazione\C\EsPoli\Advanced Machine Learning\MistakeDetection\data\omnivore...

DATASET INFO [V1 - SUBSECOND-BASED]
   Shape: torch.Size([8828, 1024]) -> 8828 Campioni, 1024 Features
FULL DATASET       | Tot: 8828   | OK: 4917  (55.7%) | ERR: 3911  (44.3%) | Ratio: 1:1.3
-------------------------------------------------------------------------------------
TRAIN SET          | Tot: 7706   | OK: 4412  (57.3%) | ERR: 3294  (42.7%) | Ratio: 1:1.3
VALIDATION SET     | Tot: 437    | OK: 437   (100.0%) | ERR: 0     (0.0%)
TEST SET           | Tot: 685    | OK: 68    (9.9%) | ERR: 617   (90.1%) | Ratio: 1:0.1



In [5]:
full_dataset.print_item(0)

V1 DATASET ITEM [0]
Features shape:       torch.Size([1024]) (features)
Label:                0 (OK)
Step id:              115
Video id:             10_16


In [6]:
from utils.inspect_npz import inspect_npz_from_dataset

dataset_folder = DATASET_SOURCE.value
npz_filename = "1_7_360p.mp4_1s_1s.npz"

# Ispezione del file .npz
inspect_npz_from_dataset(full_dataset.features_dir(), npz_filename, n_rows=3)

File: /content/MistakeDetection/data/omnivore/1_7_360p.mp4_1s_1s.npz
Chiavi presenti nel file: ['arr_0']

Array 'arr_0' - shape: (604, 1024), dtype: float32
[[ 0.6910985   0.09298898 -0.6608225  ... -0.75679165  1.2401273
  -0.5683658 ]
 [ 0.40254688 -0.4466254  -0.8645446  ... -1.2709565   0.7917245
  -0.5052321 ]
 [ 0.643613   -0.48683766 -0.88651866 ... -1.0358062   0.658605
  -0.27201462]]


In [15]:
# Inizializzazione W&B
run = wandb.init(
    project="mistake-detection",
    name=f"baseline-mlp-v1-{DATASET_SOURCE.value}-{SPLIT_TYPE.value}",
    config=config,
    tags=["baseline", "mlp", DATASET_SOURCE.value],
    notes=f"Baseline MLP with {DATASET_SOURCE.value} features for mistake detection and {SPLIT_TYPE.value} split"
)

print(f"üöÄ W&B Run: {run.name} (ID: {run.id})")

üöÄ W&B Run: baseline-mlp-v1-omnivore-video_id (ID: 6csg5k2v)


# W&B Configuration

# MLP (Version 1)

In [16]:
import torch
import torch.nn as nn

model = BaselineV1_MLP(DATASET_SOURCE.input_dims()).to(device)

# Watch del modello per tracciare gradienti e parametri
wandb.watch(model, log="all", log_freq=10)

In [17]:
lr = config["learning_rate"]
optimizer = torch.optim.Adam(model.parameters(), lr)

# Quanto pesa la classe "positiva" = classe "1" = classe "error":
# - CASO 1: rapporto effettivo del dataset
#train_pos_weight = train_cnt_0 / train_cnt_1

# - CASO 2: rapporto usato nel paper
train_pos_weight = config["pos_weight"]

print(f"Peso classe positiva: {train_pos_weight}")
train_pos_weight = torch.tensor([train_pos_weight], device=device)

criterion = nn.BCEWithLogitsLoss(pos_weight=train_pos_weight)

epochs = config["epochs"]

Peso classe positiva: 1.5


In [18]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, roc_auc_score
import numpy as np

best_f1 = 0.0

for epoch in range(epochs):

    # -------------------------
    #        TRAIN
    # -------------------------
    model.train()
    total_loss = 0
    train_preds_list = []
    train_targets_list = []
    train_probs_list = []

    for inputs, labels, _, _ in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device).float()

        outputs = model(inputs)            # [B, 1]
        outputs = outputs.squeeze(1)       # [B]

        loss = criterion(outputs, labels)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metriche train
        with torch.no_grad():
            probs = torch.sigmoid(outputs).cpu()
            preds = (probs >= 0.5).long()

            train_preds_list.append(preds)
            train_targets_list.append(labels.cpu())
            train_probs_list.append(probs)

    avg_train_loss = total_loss / len(train_loader)

    # Metriche di training
    train_preds = torch.cat(train_preds_list).numpy()
    train_targets = torch.cat(train_targets_list).numpy()
    train_probs = torch.cat(train_probs_list).numpy()

    train_acc = accuracy_score(train_targets, train_preds)
    train_f1 = f1_score(train_targets, train_preds, zero_division=0)
    train_precision = precision_score(train_targets, train_preds, zero_division=0)
    train_recall = recall_score(train_targets, train_preds, zero_division=0)

    # AUC train (usa probabilit√†, NON predizioni)
    try:
        train_auc = roc_auc_score(train_targets, train_probs)
    except ValueError:
        train_auc = 0.0  # Caso raro con classe mancante nel batch

    # -------------------------
    #        EVAL
    # -------------------------
    model.eval()
    total_val_loss = 0
    all_preds = []
    all_targets = []
    all_probs = []

    with torch.no_grad():
        for inputs, labels, _, _ in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device).float()

            outputs = model(inputs).squeeze(1)  # logits

            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()

            probs = torch.sigmoid(outputs).cpu()
            preds = (probs >= 0.5).long()

            all_preds.append(preds)
            all_targets.append(labels.cpu())
            all_probs.append(probs)

    all_preds = torch.cat(all_preds).numpy()
    all_targets = torch.cat(all_targets).numpy()
    all_probs = torch.cat(all_probs).numpy()

    avg_val_loss = total_val_loss / len(test_loader)
    val_acc = accuracy_score(all_targets, all_preds)
    val_f1 = f1_score(all_targets, all_preds, zero_division=0)
    val_precision = precision_score(all_targets, all_preds, zero_division=0)
    val_recall = recall_score(all_targets, all_preds, zero_division=0)

    # AUC validation
    try:
        val_auc = roc_auc_score(all_targets, all_probs)
    except ValueError:
        val_auc = 0.0

    # Confusion Matrix
    cm = confusion_matrix(all_targets, all_preds)

    # Log su W&B
    wandb.log({
        # Training metrics
        "train/loss": avg_train_loss,
        "train/accuracy": train_acc,
        "train/f1": train_f1,
        "train/precision": train_precision,
        "train/recall": train_recall,
        "train/auc": train_auc,

        # Validation metrics
        "val/loss": avg_val_loss,
        "val/accuracy": val_acc,
        "val/f1": val_f1,
        "val/precision": val_precision,
        "val/recall": val_recall,
        "val/auc": val_auc,

        # Confusion Matrix
        "val/confusion_matrix": wandb.plot.confusion_matrix(
            probs=None,
            y_true=all_targets,
            preds=all_preds,
            class_names=["No Error", "Error"]
        ),

        "learning_rate": optimizer.param_groups[0]['lr'],
        "epoch": epoch + 1
    })

    print(f"Epoch {epoch+1}/{epochs} "
          f"- Train Loss: {avg_train_loss:.4f} "
          f"- Val Loss: {avg_val_loss:.4f} "
          f"- Val Acc: {val_acc:.4f} "
          f"- Val F1: {val_f1:.4f} "
          f"- Val Precision: {val_precision:.4f} "
          f"- Val Recall: {val_recall:.4f} "
          f"- Val AUC: {val_auc:.4f}")

    # Salvataggio miglior modello
    if val_f1 > best_f1:
        best_f1 = val_f1
        checkpoint_path = os.path.join(ROOT_DIR, "checkpoints", f"best_model_f1_{best_f1:.4f}.pth")
        os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_f1': val_f1,
            'val_acc': val_acc,
            'val_auc': val_auc,
        }, checkpoint_path)

        artifact = wandb.Artifact(
            name=f"model-{run.id}",
            type="model",
            description=f"Best model with F1={best_f1:.4f}",
            metadata={
                "epoch": epoch + 1,
                "val_f1": val_f1,
                "val_acc": val_acc,
                "val_auc": val_auc,
                "architecture": config["architecture"]
            }
        )
        artifact.add_file(checkpoint_path)
        wandb.log_artifact(artifact)

        print(f"‚úÖ Nuovo miglior modello salvato! F1: {best_f1:.4f}")

print("\nüéâ Training completato!")
print(f"Miglior F1 Score: {best_f1:.4f}")

Epoch 1/50 - Train Loss: 0.6602 - Val Loss: 0.7909 - Val Acc: 0.7255 - Val F1: 0.8256 - Val Precision: 0.9653 - Val Recall: 0.7212 - Val AUC: 0.8372
‚úÖ Nuovo miglior modello salvato! F1: 0.8256


KeyboardInterrupt: 

# Results & Visualization

In [None]:
# Log della tabella con esempi di predizioni
model.eval()
predictions_table = wandb.Table(
    columns=["ID", "True Label", "Predicted", "Probability", "Correct"]
)

global_id = 0

with torch.no_grad():
    for inputs, labels, _, _ in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device).float()

        outputs = model(inputs).squeeze(1)
        probs = torch.sigmoid(outputs)
        preds = (probs >= 0.5).long()

        for i in range(min(50, len(labels))):
            predictions_table.add_data(
                global_id,
                labels[i].item(),
                preds[i].item(),
                probs[i].item(),
                preds[i].item() == labels[i].item()
            )
            global_id += 1

        break  # solo un batch

wandb.log({"predictions/sample_table": predictions_table})

# Summary
wandb.run.summary["best_f1"] = best_f1
wandb.run.summary["final_val_acc"] = val_acc
wandb.run.summary["final_val_precision"] = val_precision
wandb.run.summary["final_val_recall"] = val_recall

print("‚úÖ Tabella predizioni e summary caricati su W&B")

‚úÖ Tabella predizioni e summary caricati su W&B


In [None]:
# Chiudi il run di W&B
wandb.finish()
print("üèÅ W&B run terminato")

0,1
epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà
learning_rate,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
train/accuracy,‚ñÅ‚ñÇ‚ñÉ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
train/f1,‚ñÅ‚ñÇ‚ñÉ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
train/loss,‚ñà‚ñá‚ñÜ‚ñÜ‚ñÖ‚ñÖ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
train/precision,‚ñÅ‚ñÇ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
train/recall,‚ñÅ‚ñÇ‚ñÉ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
val/accuracy,‚ñÉ‚ñÑ‚ñÖ‚ñÅ‚ñÑ‚ñÖ‚ñÜ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÖ‚ñá‚ñá‚ñÜ‚ñá‚ñÜ‚ñá‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñá‚ñÜ‚ñá‚ñà‚ñá‚ñá‚ñà
val/f1,‚ñÅ‚ñÇ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÖ‚ñÜ‚ñÜ‚ñÉ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñÜ‚ñÖ‚ñà‚ñà‚ñá‚ñá‚ñÖ‚ñÜ‚ñÜ‚ñá‚ñá‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñÜ‚ñÖ
val/loss,‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà

0,1
best_f1,0.51274
epoch,50
final_val_acc,0.71616
final_val_precision,0.5216
final_val_recall,0.45811
learning_rate,0.001
train/accuracy,0.92597
train/f1,0.8799
train/loss,0.19655
train/precision,0.85301


üèÅ W&B run terminato
