In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import zipfile
import os

# Setup envoirement

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device in uso:", device)

Device in uso: cuda


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data loading

In [4]:
zip_path = "/content/drive/MyDrive/AML_MistakeDetection_DATA/features/gopro/segments/1s/video/omnivore.zip"
extract_dir = "/content/omnivore_extracted"

os.makedirs(extract_dir, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as z:
    z.extractall(extract_dir)

extract_dir = "/content/omnivore_extracted/omnivore"

print("Extracted files:", len(os.listdir(extract_dir)))

Extracted files: 384


In [6]:
import json

with open("/content/drive/MyDrive/AML_MistakeDetection_DATA/annotation_json/complete_step_annotations.json") as f:
    annotations = json.load(f)

In [12]:
class VideoFeatureDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).long()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [8]:
def get_labels_for_npz(npz_file, annotations):
    # es: "10_3_360.mp4_1s_1s.npz"
    base = os.path.basename(npz_file)
    activity, attempt = base.split("_")[:2]  # "10", "3"
    recording_id = f"{activity}_{attempt}"

    # carica feature
    data = np.load(npz_file)
    arr = data[list(data.keys())[0]]  # shape (N, 400)
    N = arr.shape[0]

    labels = np.zeros(N, dtype=np.int64)  # default: no-error = 0

    # trova annotation di questo recording
    info = annotations[recording_id]
    steps = info["steps"]

    # assegnazione label per ogni secondo
    for step in steps:
        has_error = int(step["has_errors"])  # True→1, False→0
        start = step["start_time"]
        end   = step["end_time"]

        if start == -1 or end == -1 or has_error == 0:
            continue

        for sec in range(int(start), int(end) + 1, 1):
            sec_start = sec
            sec_end   = sec + 1

            # check overlap
            if sec_start >= start and sec_end <= end:
                labels[sec] = has_error

    return arr, labels

In [9]:
all_X = []
all_y = []

extract_dir = "/content/omnivore_extracted/omnivore"

for f in sorted(os.listdir(extract_dir)):
    if f.endswith(".npz"):
        X, y = get_labels_for_npz(os.path.join(extract_dir, f), annotations)
        all_X.append(X)
        all_y.append(y)

X = np.concatenate(all_X, axis=0)
y = np.concatenate(all_y, axis=0)

print(X.shape, y.shape)


(340320, 1024) (340320,)


In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    shuffle=True,
)

In [17]:
train_dataset = VideoFeatureDataset(X_train, y_train)
test_dataset  = VideoFeatureDataset(X_test,  y_test)

In [20]:
batch_size = 512

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# MLP

In [75]:
import torch
import torch.nn as nn

class MLPCapitainCook(nn.Module):
    def __init__(self, in_features: int, p: float = 0.5) -> None:
        super().__init__()
        self.fc1 = nn.Linear(in_features, 256)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p)       # Dropout layer con probabilità p
        self.fc2 = nn.Linear(256, 1)       # Output logit (senza sigmoid)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)                # Applica Dropout solo in TRAIN
        x = self.fc2(x)                    # Output logit
        return x                           # no Sigmoid qui


In [76]:
model = MLPCapitainCook(1024).to(device)

In [77]:
lr = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr)
# count classi
neg = (y == 1).sum()   # classe “1” diventa negativa
pos = (y == 0).sum()   # classe “0” diventa positiva

# pos_weight = quanto pesa la classe “positiva” = classe 0
pos_weight_value = neg / pos
pos_weight = torch.tensor([pos_weight_value], device=device)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

epochs = 50

In [78]:
from sklearn.metrics import accuracy_score, f1_score

for epoch in range(epochs):

    # -------------------------
    #        TRAIN
    # -------------------------
    model.train()
    total_loss = 0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device).float()

        outputs = model(inputs)            # [B, 1]
        outputs = outputs.squeeze(1)       # [B]

        loss = criterion(outputs, labels)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    avg_train_loss = total_loss / len(train_loader)

    # -------------------------
    #        EVAL
    # -------------------------
    model.eval()
    total_val_loss = 0
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device).float()

            outputs = model(inputs).squeeze(1)  # logits

            # same loss as train
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()

            # convert logits → probabilities → binary predictions
            probs = torch.sigmoid(outputs)
            preds = (probs >= 0.5).long()

            all_preds.append(preds.cpu())
            all_targets.append(labels.cpu())

    # concat
    all_preds = torch.cat(all_preds).numpy()
    all_targets = torch.cat(all_targets).numpy()

    avg_val_loss = total_val_loss / len(test_loader)
    acc = accuracy_score(all_targets, all_preds)
    f1  = f1_score(all_targets, all_preds, zero_division=0)

    print(f"Epoch {epoch+1}/{epochs} "
          f"- Train Loss: {avg_train_loss:.4f} "
          f"- Val Loss: {avg_val_loss:.4f} "
          f"- Acc: {acc:.4f} "
          f"- F1: {f1:.4f}")


Epoch 1/50 - Train Loss: 0.2603 - Val Loss: 0.2469 - Acc: 0.7560 - F1: 0.0000
Epoch 2/50 - Train Loss: 0.2432 - Val Loss: 0.2389 - Acc: 0.7566 - F1: 0.0055
Epoch 3/50 - Train Loss: 0.2357 - Val Loss: 0.2312 - Acc: 0.7595 - F1: 0.0307
Epoch 4/50 - Train Loss: 0.2283 - Val Loss: 0.2240 - Acc: 0.7658 - F1: 0.0830
Epoch 5/50 - Train Loss: 0.2217 - Val Loss: 0.2177 - Acc: 0.7693 - F1: 0.1090
Epoch 6/50 - Train Loss: 0.2159 - Val Loss: 0.2119 - Acc: 0.7737 - F1: 0.1427
Epoch 7/50 - Train Loss: 0.2108 - Val Loss: 0.2076 - Acc: 0.7786 - F1: 0.1778
Epoch 8/50 - Train Loss: 0.2061 - Val Loss: 0.2033 - Acc: 0.7834 - F1: 0.2119
Epoch 9/50 - Train Loss: 0.2024 - Val Loss: 0.1995 - Acc: 0.7859 - F1: 0.2278
Epoch 10/50 - Train Loss: 0.1985 - Val Loss: 0.1963 - Acc: 0.7926 - F1: 0.2738
Epoch 11/50 - Train Loss: 0.1954 - Val Loss: 0.1934 - Acc: 0.7949 - F1: 0.2888
Epoch 12/50 - Train Loss: 0.1924 - Val Loss: 0.1913 - Acc: 0.7936 - F1: 0.2778
Epoch 13/50 - Train Loss: 0.1896 - Val Loss: 0.1886 - Acc: 0.

KeyboardInterrupt: 