# Environement Setup

In [1]:
# @title 1. Setup Progetto (MistakeDetection)
import sys, os
try:
    from google.colab import drive, userdata
    IS_COLAB = True
except ImportError:
    IS_COLAB = False

REPO_NAME = 'MistakeDetection'

# --- CONFIGURAZIONE PATH ---
if IS_COLAB:
    print("‚òÅÔ∏è Colab rilevato.")
    if not os.path.exists('/content/drive'):
        drive.mount('/content/drive')

    # DEFINIZIONE GLOBALE PROJECT_DIR (Importante per le celle successive!)
    PROJECT_DIR = "/content/drive/MyDrive/MistakeDetection"

    # Fallback se la cartella ha un nome diverso
    if not os.path.exists(PROJECT_DIR):
        if os.path.exists("/content/drive/MyDrive/CaptainCook4D"):
            PROJECT_DIR = "/content/drive/MyDrive/CaptainCook4D"

    print(f"üìÇ Cartella Progetto su Drive: {PROJECT_DIR}")

    GITHUB_USER = 'MarcoPernoVDP'
    try:
        TOKEN = userdata.get('GITHUB_TOKEN')
        REPO_URL = f'https://{TOKEN}@github.com/{GITHUB_USER}/{REPO_NAME}.git'
    except:
        REPO_URL = f'https://github.com/{GITHUB_USER}/{REPO_NAME}.git'

    ROOT_DIR = f'/content/{REPO_NAME}'

    if not os.path.exists(ROOT_DIR):
        print(f"üì• Clonazione {REPO_NAME}...")
        !git clone {REPO_URL}
    else:
        print(f"üîÑ Aggiornamento {REPO_NAME}...")
        %cd {ROOT_DIR}
        !git pull
        %cd /content
else:
    print("Ambiente locale rilevato.")
    ROOT_DIR = os.getcwd()
    while not os.path.exists(os.path.join(ROOT_DIR, '.gitignore')) and ROOT_DIR != os.path.dirname(ROOT_DIR):
        ROOT_DIR = os.path.dirname(ROOT_DIR)
    PROJECT_DIR = ROOT_DIR # In locale coincidono spesso

if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)

print(f"‚úÖ ROOT_DIR impostata a: {ROOT_DIR}")

‚òÅÔ∏è Colab rilevato.
Mounted at /content/drive
üìÇ Cartella Progetto su Drive: /content/drive/MyDrive/MistakeDetection
üì• Clonazione MistakeDetection...
Cloning into 'MistakeDetection'...
remote: Enumerating objects: 554, done.[K
remote: Counting objects: 100% (21/21), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 554 (delta 9), reused 9 (delta 4), pack-reused 533 (from 1)[K
Receiving objects: 100% (554/554), 85.59 MiB | 13.98 MiB/s, done.
Resolving deltas: 100% (281/281), done.
‚úÖ ROOT_DIR impostata a: /content/MistakeDetection


In [2]:
# @title 2. Setup ActionFormer
import os
import sys
import shutil
import subprocess

# Usiamo un workspace separato
AF_WORKDIR = "/content/actionformer_workspace"
os.makedirs(AF_WORKDIR, exist_ok=True)
os.chdir(AF_WORKDIR)

REPO_NAME = "multi_step_localization"
AF_REPO_PATH = os.path.join(AF_WORKDIR, REPO_NAME)

# 1. Clone
if not os.path.exists(AF_REPO_PATH):
    print("üì• Clonazione ActionFormer (con --recursive)...")
    try:
        subprocess.run(["git", "clone", "--recursive", "https://github.com/CaptainCook4D/multi_step_localization.git"], check=True)
    except Exception as e:
        print(f"‚ö†Ô∏è Clone recursive fallito, provo standard...")
        subprocess.run(["git", "clone", "https://github.com/CaptainCook4D/multi_step_localization.git"], check=True)

os.chdir(AF_REPO_PATH)

# 2. Fix Path Libs
if os.path.exists(os.path.join(AF_REPO_PATH, "actionformer", "libs", "utils")):
    UTILS_PATH = os.path.join(AF_REPO_PATH, "actionformer", "libs", "utils")
    PATCH_DIR = os.path.join(AF_REPO_PATH, "actionformer")
elif os.path.exists(os.path.join(AF_REPO_PATH, "libs", "utils")):
    UTILS_PATH = os.path.join(AF_REPO_PATH, "libs", "utils")
    PATCH_DIR = AF_REPO_PATH
else:
    # Tentativo update submodule
    print("‚ö†Ô∏è Cartella libs non trovata, provo update submodule...")
    subprocess.run(["git", "submodule", "update", "--init", "--recursive"], check=True)
    if os.path.exists(os.path.join(AF_REPO_PATH, "libs", "utils")):
        UTILS_PATH = os.path.join(AF_REPO_PATH, "libs", "utils")
        PATCH_DIR = AF_REPO_PATH
    else:
        raise FileNotFoundError("CRITICO: Impossibile trovare libs/utils.")

print(f"‚úÖ Cartella Utils: {UTILS_PATH}")

# 3. Installazione & Patch
print("üì¶ Installazione dipendenze...")
subprocess.run([sys.executable, "-m", "pip", "install", "pyyaml", "scipy"], check=True)

print("ü©π Patch NumPy 2.0...")
with open(os.path.join(PATCH_DIR, "numpy_patch.py"), "w") as f:
    f.write("import numpy as np\n")
    f.write("try:\n  if not hasattr(np, 'float'): np.float = np.float64\nexcept: pass\n")
    f.write("try:\n  if not hasattr(np, 'int'): np.int = np.int_\nexcept: pass\n")

# Inietta patch in eval.py
eval_path = os.path.join(AF_REPO_PATH, "eval.py")
if os.path.exists(eval_path):
    with open(eval_path, "r") as f: content = f.read()
    if "import numpy_patch" not in content:
        with open(eval_path, "w") as f:
            f.write("import sys\nsys.path.append('actionformer')\nimport numpy_patch\n" + content)

# 4. Compilazione
print("‚öôÔ∏è Compilazione CUDA...")
os.chdir(UTILS_PATH)
subprocess.run([sys.executable, "setup.py", "install"], check=True)

os.chdir(AF_REPO_PATH)
print("\n‚úÖ Ambiente ActionFormer pronto.")

üì• Clonazione ActionFormer (con --recursive)...
‚úÖ Cartella Utils: /content/actionformer_workspace/multi_step_localization/actionformer/libs/utils
üì¶ Installazione dipendenze...
ü©π Patch NumPy 2.0...
‚öôÔ∏è Compilazione CUDA...

‚úÖ Ambiente ActionFormer pronto.


In [3]:
# @title 3. Estrazione Feature Omnivore
import zipfile
import shutil
import os
from tqdm import tqdm

# --- CONFIGURAZIONE VARIABILI (Self-Contained) ---
if 'PROJECT_DIR' not in locals():
    # Tenta di indovinare il path
    if os.path.exists("/content/drive/MyDrive/MistakeDetection"):
        PROJECT_DIR = "/content/drive/MyDrive/MistakeDetection"
    elif os.path.exists("/content/drive/MyDrive/CaptainCook4D"):
        PROJECT_DIR = "/content/drive/MyDrive/CaptainCook4D"
    else:
        # Fallback locale se non trova nulla (o se sei in locale)
        PROJECT_DIR = os.getcwd()

if 'ROOT_DIR' not in locals():
    # Tenta di trovare il repo clonato
    possible_roots = [
        os.path.join(PROJECT_DIR, "MistakeDetection"),
        "/content/MistakeDetection",
        PROJECT_DIR
    ]
    for r in possible_roots:
        if os.path.exists(os.path.join(r, ".git")):
            ROOT_DIR = r
            break
    if 'ROOT_DIR' not in locals(): ROOT_DIR = PROJECT_DIR

# --- RICERCA ZIP ---
POSSIBLE_PATHS = [
    os.path.join(PROJECT_DIR, "_file", "omnivore.zip"),
    os.path.join(PROJECT_DIR, "data", "omnivore.zip"),
    os.path.join(PROJECT_DIR, "omnivore.zip"),
    # Path specifici colab
    "/content/drive/MyDrive/MistakeDetection/omnivore.zip",
    "/content/drive/MyDrive/MistakeDetection/data/omnivore.zip"
]

ZIP_PATH = None
for p in POSSIBLE_PATHS:
    if os.path.exists(p):
        ZIP_PATH = p
        break

LOCAL_FEAT_DIR = "/content/temp_omnivore_features"

if ZIP_PATH is None:
    print(f"‚ùå ERRORE: Non trovo 'omnivore.zip'.")
    print(f"   Ho cercato in: {POSSIBLE_PATHS}")
else:
    print(f"üìÇ Trovato Zip: {ZIP_PATH}")
    print(f"‚è≥ Estrazione in: {LOCAL_FEAT_DIR}...")

    if os.path.exists(LOCAL_FEAT_DIR):
        try:
            shutil.rmtree(LOCAL_FEAT_DIR)
        except: pass # Ignora errori permessi
    os.makedirs(LOCAL_FEAT_DIR, exist_ok=True)

    with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
        zip_ref.extractall(LOCAL_FEAT_DIR)

    print(f"‚úÖ Estrazione completata.")

üìÇ Trovato Zip: /content/drive/MyDrive/MistakeDetection/omnivore.zip
‚è≥ Estrazione in: /content/temp_omnivore_features...
‚úÖ Estrazione completata.


# Features Extraction

In [4]:
# @title 3.5 Riparazione Generazione JSON (Debug & Fallback)
import os
import glob
import subprocess
import json
import sys
import shutil

# --- CONFIGURAZIONE ---
AF_WORKDIR = "/content/actionformer_workspace"
if os.path.exists(os.path.join(AF_WORKDIR, "multi_step_localization")):
    AF_REPO_PATH = os.path.join(AF_WORKDIR, "multi_step_localization")
else:
    AF_REPO_PATH = AF_WORKDIR

if 'PROJECT_DIR' not in locals():
    if os.path.exists("/content/drive/MyDrive/MistakeDetection"):
        PROJECT_DIR = "/content/drive/MyDrive/MistakeDetection"
    else:
        PROJECT_DIR = "/content/drive/MyDrive/CaptainCook4D"

USER_ANNOTATION_DIR = os.path.join(PROJECT_DIR, "annotation_json")
TARGET_JSON = os.path.join(AF_WORKDIR, "actionformer_split.json")
TARGET_JSON_REC = os.path.join(AF_WORKDIR, "actionformer_split_recordings.json")

print(f"üìÇ Cartella Annotazioni Utente: {USER_ANNOTATION_DIR}")

# 1. VERIFICA FILE SORGENTI
if not os.path.exists(USER_ANNOTATION_DIR):
    print("‚ùå ERRORE: La cartella annotation_json non esiste!")
    print(f"   Crea la cartella: {USER_ANNOTATION_DIR} e mettici dentro i file .json dei video.")
    raise FileNotFoundError("Cartella annotation_json mancante")

json_files = glob.glob(os.path.join(USER_ANNOTATION_DIR, "*.json"))
print(f"   Trovati {len(json_files)} file .json sorgenti.")

if len(json_files) == 0:
    print("‚ö†Ô∏è ATTENZIONE: La cartella annotation_json √® VUOTA.")
    print("   Senza file .json input, non possiamo creare il dataset ActionFormer.")

# 2. TENTATIVO 1: USARE LO SCRIPT DEL REPO (Con Debug)
converter_script = os.path.join(AF_REPO_PATH, "convert_to_action_former_json.py")
if os.path.exists(converter_script):
    print(f"üöÄ Avvio script conversione ufficiale: {os.path.basename(converter_script)}")

    cmd = [
        "python", converter_script,
        "--annotation_folder", USER_ANNOTATION_DIR,
        "--output_file", TARGET_JSON
    ]

    # Eseguiamo catturando l'output per vedere l'errore
    result = subprocess.run(cmd, capture_output=True, text=True, cwd=AF_REPO_PATH)

    if result.returncode == 0 and os.path.exists(TARGET_JSON):
        print("‚úÖ Conversione ufficiale RIUSCITA!")
    else:
        print("‚ùå Conversione ufficiale FALLITA.")
        print("--- ERRORE SCRIPT ---")
        print(result.stderr)
        print("---------------------")
        print("‚ö†Ô∏è Procedo con Generazione MANUALE di Emergenza (Fallback)...")

# 3. TENTATIVO 2: GENERATORE MANUALE (Fallback)
# Se lo script sopra fallisce, creiamo noi un JSON valido per ActionFormer
if not os.path.exists(TARGET_JSON):
    print("üõ†Ô∏è Avvio Generatore Manuale (Python)...")

    database = {}

    # Se abbiamo file json reali, proviamo a leggerli
    if json_files:
        for jf in json_files:
            vid_id = os.path.basename(jf).replace(".json", "")
            try:
                with open(jf, 'r') as f:
                    data = json.load(f)

                # Cerca di capire la struttura (CaptainCook ha varie versioni)
                # Struttura attesa: Lista di step o dizionario
                annotations = []

                # Caso A: Lista diretta di step
                if isinstance(data, list):
                    for item in data:
                        if 'start_time' in item and 'end_time' in item:
                             annotations.append({
                                 "segment": [float(item['start_time']), float(item['end_time'])],
                                 "label": item.get('label', 'unknown_step')
                             })

                # Caso B: Dizionario (es. 'segments': [...])
                elif isinstance(data, dict):
                     # Logica da adattare se necessario
                     pass

                # Se non riusciamo a leggere, creiamo un placeholder per far girare il modello
                if not annotations:
                    # Placeholder: ActionFormer trover√† da solo i segmenti
                    # Mettiamo un segmento finto che copre tutto il video (ipotesi)
                    annotations.append({"segment": [0, 1000], "label": "test"})

                database[vid_id] = {
                    "subset": "validation", # Fondamentale per eval.py
                    "annotations": annotations
                }
            except Exception as e:
                print(f"   Errore lettura {vid_id}: {e}")

    # Se non c'erano file o lettura fallita, usiamo le feature presenti
    if not database:
        print("‚ö†Ô∏è Lettura annotazioni fallita. Genero DB basato sui file Feature (.npz)...")
        # Leggiamo la cartella feature per sapere quali video abbiamo
        local_feat_dir = "/content/temp_omnivore_features"
        if os.path.exists(local_feat_dir):
            feat_files = glob.glob(os.path.join(local_feat_dir, "*.npz"))
            for ff in feat_files:
                vid_id = os.path.basename(ff).replace(".npz", "")
                # Creiamo una entry dummy valida
                database[vid_id] = {
                    "subset": "validation",
                    "annotations": [{"segment": [0.0, 1.0], "label": "dummy"}]
                }

    # Salva il JSON finale
    final_data = {"database": database}
    with open(TARGET_JSON, 'w') as f:
        json.dump(final_data, f)
    print(f"‚úÖ JSON generato manualmente: {len(database)} video inseriti.")

# 4. DUPLICAZIONE PER COMPATIBILIT√Ä (Il trucco _recordings)
if os.path.exists(TARGET_JSON):
    shutil.copy2(TARGET_JSON, TARGET_JSON_REC)
    print(f"‚úÖ Creato duplicato necessario: {os.path.basename(TARGET_JSON_REC)}")
    print("üéâ ORA PUOI ESEGUIRE LA CELLA 4!")
else:
    print("‚ùå DISASTRO: Impossibile creare il file JSON in nessun modo.")

üìÇ Cartella Annotazioni Utente: /content/drive/MyDrive/MistakeDetection/annotation_json
   Trovati 7 file .json sorgenti.
üöÄ Avvio script conversione ufficiale: convert_to_action_former_json.py
‚ùå Conversione ufficiale FALLITA.
--- ERRORE SCRIPT ---

---------------------
‚ö†Ô∏è Procedo con Generazione MANUALE di Emergenza (Fallback)...
üõ†Ô∏è Avvio Generatore Manuale (Python)...
‚úÖ JSON generato manualmente: 7 video inseriti.
‚úÖ Creato duplicato necessario: actionformer_split_recordings.json
üéâ ORA PUOI ESEGUIRE LA CELLA 4!


In [None]:
# @title 4. Zero-Shot Localization (Clustering-based)
import os
import glob
import numpy as np
import json
from sklearn.cluster import KMeans
from tqdm import tqdm

print("üöÄ Avvio Zero-Shot Localization (Clustering Temporale)...")

# --- CONFIG ---
FEAT_DIR = "/content/temp_omnivore_features/omnivore"
OUT_DIR = "/content/actionformer_workspace/multi_step_localization/ckpt"
os.makedirs(OUT_DIR, exist_ok=True)

# Ipotizziamo che ogni ricetta abbia mediamente tra 5 e 12 step
K_STEPS = 8

feature_files = glob.glob(os.path.join(FEAT_DIR, "*.npz"))
print(f"üîç Analisi di {len(feature_files)} video per localizzazione zero-shot...")

results = {}

for f_path in tqdm(feature_files):
    vid_id = os.path.basename(f_path).replace(".npz", "")

    try:
        # 1. Caricamento Feature
        data = np.load(f_path)
        feats = data['feats'] if 'feats' in data else data[data.files[0]]
        if feats.shape[0] == 1024: feats = feats.T # [T, 1024]

        T = feats.shape[0]
        if T < K_STEPS: # Video troppo corto
            results[vid_id] = [{"label": 0, "score": 1.0, "segment": [0.0, float(T*16/30)]}]
            continue

        # 2. Clustering Temporale (Dividiamo il video in K blocchi coerenti)
        # Usiamo gli indici temporali come feature aggiuntiva per forzare la sequenzialit√†
        indices = np.linspace(0, 1, T).reshape(-1, 1)
        combined_feats = np.hstack([feats, indices]) # Feature + Tempo

        kmeans = KMeans(n_clusters=K_STEPS, n_init=10, random_state=42)
        clusters = kmeans.fit_predict(combined_feats)

        # 3. Estrazione segmenti (Start, End) dai cluster
        segments = []
        fps = 30
        stride = 16

        # Troviamo i confini dove il cluster cambia
        current_cluster = clusters[0]
        start_idx = 0

        for i in range(1, T):
            if clusters[i] != current_cluster:
                end_idx = i
                segments.append({
                    "label": int(current_cluster),
                    "score": 0.9, # Confidenza fittizia per zero-shot
                    "segment": [float(start_idx * stride / fps), float(end_idx * stride / fps)]
                })
                start_idx = i
                current_cluster = clusters[i]

        # Ultimo segmento
        segments.append({
            "label": int(current_cluster),
            "score": 0.9,
            "segment": [float(start_idx * stride / fps), float(T * stride / fps)]
        })

        results[vid_id] = segments

    except Exception as e:
        continue

# Salvataggio nel formato richiesto dal task
final_path = os.path.join(OUT_DIR, "results.json")
with open(final_path, 'w') as f:
    json.dump({"results": results}, f)

print(f"\n‚úÖ Localizzazione completata! Generato {final_path} con {len(results)} video.")

In [9]:
# @title 4. Step Localization (HiERO Approach - Hierarchical Clustering)
import os
import glob
import numpy as np
import json
from sklearn.cluster import AgglomerativeClustering
from sklearn.neighbors import kneighbors_graph
from tqdm import tqdm

print("üöÄ Avvio HiERO Localization (Hierarchical Temporal Clustering)...")

# --- CONFIG ---
FEAT_DIR = "/content/temp_omnivore_features/omnivore"
OUT_DIR = "/content/actionformer_workspace/multi_step_localization/ckpt"
os.makedirs(OUT_DIR, exist_ok=True)

# Numero di step desiderati (solitamente una ricetta ha tra 6 e 12 passaggi)
# HiERO estrarr√† ESATTAMENTE questo numero di segmenti puliti.
N_STEPS = 8

feature_files = glob.glob(os.path.join(FEAT_DIR, "*.npz"))
print(f"üîç Analisi di {len(feature_files)} video con logica HiERO...")

results = {}

for f_path in tqdm(feature_files):
    vid_id = os.path.basename(f_path).replace(".npz", "")

    try:
        # 1. Caricamento Feature
        data = np.load(f_path)
        feats = data['feats'] if 'feats' in data else data[data.files[0]]
        if feats.shape[0] == 1024: feats = feats.T # Portiamo a [T, 1024]

        T = feats.shape[0]
        if T < N_STEPS:
            results[vid_id] = [{"label": 0, "score": 1.0, "segment": [0.0, float(T*16/30)]}]
            continue

        # 2. VINCOLO TEMPORALE (Il segreto di HiERO)
        # Creiamo una matrice che dice all'algoritmo: "Puoi unire solo frame adiacenti"
        connectivity = kneighbors_graph(feats, n_neighbors=2, include_self=False)

        # 3. CLUSTERING GERARCHICO
        # 'ward' minimizza la varianza all'interno dei segmenti (crea step molto coerenti)
        clusterer = AgglomerativeClustering(
            n_clusters=N_STEPS,
            connectivity=connectivity,
            linkage='ward'
        )
        clusters = clusterer.fit_predict(feats)

        # 4. Estrazione confini (start, end)
        # Poich√© abbiamo usato il vincolo temporale, i cluster saranno blocchi contigui
        segments = []
        fps = 30
        stride = 16

        for cluster_id in range(N_STEPS):
            indices = np.where(clusters == cluster_id)[0]
            if len(indices) > 0:
                start_idx = indices.min()
                end_idx = indices.max() + 1
                segments.append({
                    "label": int(cluster_id),
                    "score": 1.0,
                    "segment": [float(start_idx * stride / fps), float(end_idx * stride / fps)]
                })

        # Ordiniamo temporalmente per sicurezza
        segments.sort(key=lambda x: x['segment'][0])
        results[vid_id] = segments

    except Exception as e:
        print(f"Errore su {vid_id}: {e}")
        continue

# Salvataggio finale
with open(os.path.join(OUT_DIR, "results.json"), 'w') as f:
    json.dump({"results": results}, f)

print(f"\n‚úÖ HiERO completato! Risultati salvati in results.json")

üöÄ Avvio HiERO Localization (Hierarchical Temporal Clustering)...
üîç Analisi di 384 video con logica HiERO...


  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components = _fix_connectivity(
  connectivity, n_connected_components =


‚úÖ HiERO completato! Risultati salvati in results.json





In [6]:
# @title 4. Inferenza ActionFormer (NON FUNZIONA, PESI UTILIZZATI NON COMPATIBILI CON FEATURES OMNIVORE)
import os
import glob
import subprocess
import yaml
import sys
import shutil
import json
import torch
import numpy as np
import time

# Output immediato
os.environ['PYTHONUNBUFFERED'] = '1'

print("üöÄ Inizializzazione ActionFormer (GPU RECOVERY MODE)...", flush=True)

# --- 1. RIPRISTINO REPO (Se cancellato dal reset) ---
AF_WORKDIR = "/content/actionformer_workspace"
os.makedirs(AF_WORKDIR, exist_ok=True)
AF_REPO_PATH = os.path.join(AF_WORKDIR, "multi_step_localization")

# Se manca il codice, lo scarichiamo al volo
if not os.path.exists(os.path.join(AF_REPO_PATH, "actionformer")):
    print("‚ö†Ô∏è Repository ActionFormer mancante (causa reset). Ripristino in corso...", flush=True)
    if os.path.exists(AF_REPO_PATH): shutil.rmtree(AF_REPO_PATH)
    # Cloniamo un fork stabile o l'originale
    subprocess.run(["git", "clone", "https://github.com/happyharrycn/actionformer.git", AF_REPO_PATH], check=True)
    # Installiamo dipendenze base minime se necessario (spesso su colab bastano quelle base)
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", "pyyaml"], check=True)
    print("‚úÖ Repository ripristinato.", flush=True)

if AF_REPO_PATH not in sys.path: sys.path.append(AF_REPO_PATH)

if 'PROJECT_DIR' not in locals():
    if os.path.exists("/content/drive/MyDrive/MistakeDetection"):
        PROJECT_DIR = "/content/drive/MyDrive/MistakeDetection"
    else:
        PROJECT_DIR = "/content/drive/MyDrive/CaptainCook4D"

# --- 2. SETUP HARDWARE & FEATURE ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üñ•Ô∏è Hardware: {DEVICE.upper()}", flush=True)

print("üîç [System] Ricerca file feature...", flush=True)
search_paths = [
    "/content/temp_omnivore_features/omnivore",
    "/content/temp_omnivore_features",
    os.path.join(PROJECT_DIR, "features", "omnivore_video"),
    os.path.join(PROJECT_DIR, "features"),
    "/content/omnivore_features"
]

FOUND_FEAT_DIR = None
for path in search_paths:
    if os.path.exists(path):
        files = glob.glob(os.path.join(path, "*.npz"))
        if len(files) > 0:
            FOUND_FEAT_DIR = path
            print(f"‚úÖ Feature trovate in: {path} ({len(files)} file)", flush=True)
            break

if not FOUND_FEAT_DIR:
    # Se dopo il reset hai perso anche le feature in /content/, dobbiamo rigenerarle o trovarle in Drive
    print("‚ùå ERRORE: Feature perse col reset! Controlla se sono su Drive.", flush=True)
    # Tentativo disperato su Drive
    drive_cands = glob.glob("/content/drive/MyDrive/**/*.npz", recursive=True)
    if drive_cands:
        FOUND_FEAT_DIR = os.path.dirname(drive_cands[0])
        print(f"‚úÖ Trovate feature di backup su Drive: {FOUND_FEAT_DIR}", flush=True)
    else:
        # Creiamo dummy per non far crashare lo script, ma l'utente deve rifare le feature
        print("‚ö†Ô∏è Nessuna feature trovata. Creo cartella dummy (Riesegui Step 3 se necessario).")
        FOUND_FEAT_DIR = "/content/dummy_feat"
        os.makedirs(FOUND_FEAT_DIR, exist_ok=True)

LOCAL_FEAT_DIR = FOUND_FEAT_DIR
MY_CONFIG_PATH = os.path.join(AF_REPO_PATH, "configs", "forced_config.yaml")
DATASETS_LIB_PATH = os.path.join(AF_REPO_PATH, "actionformer", "libs", "datasets")


# --- 3. GENERAZIONE JSON DATABASE ---
json_base = os.path.join(AF_WORKDIR, "actionformer_split.json")
feature_files = glob.glob(os.path.join(LOCAL_FEAT_DIR, "*.npz"))
full_db = {}
for f in feature_files:
    vid_name = os.path.basename(f).replace(".npz", "")
    full_db[vid_name] = {"subset": "validation", "annotations": [{"label": "test", "segment": [0, 1]}]}
# Dummy entry se vuoto
if not full_db: full_db["dummy"] = {"subset": "validation", "annotations": [{"label": "test", "segment": [0, 1]}]}

with open(json_base, 'w') as f:
    json.dump({"database": full_db, "taxonomy": [{"id":0,"label":"test","label_id":0,"nodeName":"test"}], "version": "1.0"}, f)
shutil.copy2(json_base, json_base.replace(".json","")+"_recordings.json")


# --- 4. CONFIGURAZIONE YAML (FIX MISSING DIR) ---
# FIX CRUCIALE: Creiamo la cartella configs se non esiste
os.makedirs(os.path.dirname(MY_CONFIG_PATH), exist_ok=True)

cands = glob.glob(os.path.join(PROJECT_DIR, "**", "*omnivore*.pth*"), recursive=True)
if not cands: raise FileNotFoundError("‚ùå Modello .pth.tar non trovato su Drive!")
MODEL_CKPT = sorted(cands)[-1]

# 1. Rigenera il Config con soglie MINIME
config_data = {
    'dataset_name': 'thumos', 'model_name': 'LocPointTransformer', 'output_folder': './ckpt/', 'devices': [DEVICE],
    'dataset': {'json_file': os.path.join(AF_WORKDIR, "actionformer_split.json"), 'feat_folder': LOCAL_FEAT_DIR,
                'file_prefix': '', 'file_ext': '.npz', 'input_dim': 1024, 'feat_stride': 16, 'num_classes': 24,
                'default_fps': 30, 'num_frames': 32, 'downsample_rate': 1, 'max_seq_len': 2304, 'trunc_thresh': 0.5,
                'crop_ratio': None, 'force_upsampling': False},
    'eval': {'batch_size': 1, 'nms_score_thres': 0.0001}, # SOGLIA QUASI ZERO
    'loader': {'batch_size': 1, 'num_workers': 2},
    'model': {'backbone_type': 'convTransformer', 'fpn_type': 'identity', 'backbone_arch': [2, 2, 5], 'scale_factor': 2,
              'input_dim': 1024, 'max_seq_len': 2304, 'n_head': 4, 'embd_kernel_size': 3, 'embd_with_ln': True,
              'fpn_with_ln': True, 'fpn_start_level': 0, 'head_num_layers': 3, 'head_kernel_size': 3, 'head_with_ln': True,
              'use_rel_pe': False, 'num_classes': 24, 'regression_range': [[0, 4], [4, 8], [8, 16], [16, 32], [32, 64], [64, 10000]],
              'embd_dim': 512, 'fpn_dim': 512, 'head_dim': 512, 'use_abs_pe': False, 'max_buffer_len_factor': 6.0, 'n_mha_win_size': 19,
              'train_cfg': {'center_sample': 'radius', 'center_sample_radius': 1.5, 'loss_weight': 1.0, 'cls_prior_prob': 0.01,
                            'init_loss_norm': 2000, 'clip_grad_l2norm': -1, 'label_smoothing': 0.0, 'dropout': 0.1, 'droppath': 0.1, 'head_empty_cls': []},
              'test_cfg': {
                  'pre_nms_thresh': 0.0001, # SOGLIA PRE-NMS MINIMA
                  'pre_nms_topk': 5000, 'iou_threshold': 0.1, 'min_score': 0.0001,
                  'max_seg_num': 1000, 'nms_method': 'soft', 'nms_sigma': 0.5, 'voting_thresh': 0.75,
                  'multiclass_nms': True, 'duration_thresh': 0.001}
    },
    'train': {'head_dim': 512}
}
with open(MY_CONFIG_PATH, 'w') as f: yaml.dump(config_data, f)


# --- 5. PATCH LIBRERIE (Core Fixes) ---
print("üõ†Ô∏è [System] Applicazione patch al codice...", flush=True)
pycache_dir = os.path.join(DATASETS_LIB_PATH, "__pycache__")
if os.path.exists(pycache_dir): shutil.rmtree(pycache_dir)

# A. __INIT__.PY
with open(os.path.join(DATASETS_LIB_PATH, "__init__.py"), 'w') as f:
    f.write("from .datasets import make_dataset, make_data_loader\n")

# B. DATASETS.PY
datasets_code = r"""
import torch
import os
_DATASET_REGISTRY = {}
def register_dataset(name):
    def decorator(cls):
        _DATASET_REGISTRY[name] = cls
        return cls
    return decorator
def make_dataset(name, is_training, split, **kwargs):
    if name == 'thumos':
        from .thumos14 import THUMOS14Dataset
        return THUMOS14Dataset(is_training, split, **kwargs)
    if name in _DATASET_REGISTRY:
        return _DATASET_REGISTRY[name](is_training, split, **kwargs)
    raise KeyError(f"Dataset sconosciuto: {name}")
def make_data_loader(dataset, is_training, generator, batch_size, num_workers):
    persistent = True if num_workers > 0 else False
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=(True if is_training else False), collate_fn=None, pin_memory=False, drop_last=(True if is_training else False), persistent_workers=persistent)
    return loader
"""
with open(os.path.join(DATASETS_LIB_PATH, "datasets.py"), 'w') as f: f.write(datasets_code)

# C. THUMOS14.PY (Robust NPZ + Meta)
thumos_code = r"""
import os
import json
import numpy as np
import torch
from torch.utils.data import Dataset
from .datasets import register_dataset
@register_dataset("thumos")
class THUMOS14Dataset(Dataset):
    def __init__(self, is_training, split, feat_folder, json_file, feat_stride, num_frames, default_fps, downsample_rate, max_seq_len, trunc_thresh, crop_ratio, input_dim, num_classes, file_prefix, file_ext, force_upsampling):
        self.split = split; self.feat_folder = feat_folder; self.json_file = json_file; self.feat_stride = feat_stride; self.num_frames = num_frames; self.default_fps = default_fps; self.downsample_rate = downsample_rate; self.input_dim = input_dim; self.num_classes = num_classes; self.file_prefix = file_prefix; self.file_ext = file_ext
        dict_db, label_dict = self._load_json_db(self.json_file)
        self.data_list = [val for key, val in dict_db.items()]
        self.label_dict = label_dict
    def get_attributes(self): return self.data_list, self.label_dict, self.num_classes
    def _load_json_db(self, json_file):
        with open(json_file, 'r') as fid: json_data = json.load(fid)
        label_dict = {'test': 0}
        if 'taxonomy' in json_data:
            for act in json_data['taxonomy']: label_dict[act.get('label', 'unknown')] = act.get('id', 0)
        dict_db = json_data['database']
        for vid in dict_db:
            if 'id' not in dict_db[vid]: dict_db[vid]['id'] = vid
        return dict_db, label_dict
    def __len__(self): return len(self.data_list)
    def __getitem__(self, idx):
        item = self.data_list[idx]
        feat_file = os.path.join(self.feat_folder, self.file_prefix + item['id'] + self.file_ext)
        try:
            if not os.path.exists(feat_file): feats = np.zeros((self.input_dim, 100), dtype=np.float32)
            else:
                loaded = np.load(feat_file)
                if isinstance(loaded, np.lib.npyio.NpzFile):
                    keys = loaded.files
                    if 'feats' in keys: feats = loaded['feats']
                    elif 'arr_0' in keys: feats = loaded['arr_0']
                    else: feats = loaded[keys[0]]
                else: feats = loaded
            feats = feats.astype(np.float32)
            if feats.ndim == 2 and feats.shape[1] == self.input_dim: feats = feats.transpose()
        except: feats = np.zeros((self.input_dim, 100), dtype=np.float32)
        if self.downsample_rate > 1: feats = feats[:, ::self.downsample_rate]
        feat_stride = self.feat_stride * self.downsample_rate
        num_feat_frames = feats.shape[1]
        duration = (num_feat_frames * feat_stride) / self.default_fps
        feats = torch.from_numpy(np.ascontiguousarray(feats))
        return {'video_id': item['id'], 'feats': feats, 'segments': torch.zeros((0, 2), dtype=torch.float32), 'labels': torch.zeros((0), dtype=torch.int64), 'fps': self.default_fps, 'feat_stride': feat_stride, 'feat_num_frames': num_feat_frames, 'duration': duration}
"""
with open(os.path.join(DATASETS_LIB_PATH, "thumos14.py"), 'w') as f: f.write(thumos_code)


# --- 6. SCRIPT INFERENZA ---
eval_standalone = r"""
import os
import torch
import torch.nn as nn
import argparse
import numpy as np
import yaml
import json
import sys
from tqdm import tqdm

def log(msg): print(f"[DEBUG] {msg}", flush=True)
def load_config(path):
    with open(path, 'r') as f: return yaml.safe_load(f)

from actionformer.libs.modeling import make_meta_arch
from actionformer.libs.datasets import make_dataset, make_data_loader

def main():
    log("1. Setup...")
    config_path = r'""" + MY_CONFIG_PATH + r"""'
    cfg = load_config(config_path)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    log(f"   -> Device: {device}")

    toxic = ['backbone', 'division_type', 'videos_type']
    for k in toxic:
        if k in cfg['dataset']: del cfg['dataset'][k]

    val_dataset = make_dataset(cfg['dataset_name'], False, ['validation'], **cfg['dataset'])
    val_loader = make_data_loader(val_dataset, False, None, **cfg['loader'])

    model = make_meta_arch(cfg['model_name'], **cfg['model'])
    model = model.to(device)

    ckpt_path = r'""" + MODEL_CKPT + r"""'
    log(f"2. Pesi: {os.path.basename(ckpt_path)}")
    checkpoint = torch.load(ckpt_path, map_location=device)
    state_dict = checkpoint.get('state_dict_ema', checkpoint.get('model', checkpoint.get('state_dict', checkpoint)))
    model.load_state_dict(state_dict, strict=False)
    model.eval()

    results = {'video_ids': [], 'segment_intervals': [], 'scores': [], 'labels': []}

    log("3. Inferenza in corso...")
    with torch.no_grad():
        for i, batch in tqdm(enumerate(val_loader), total=len(val_loader), file=sys.stdout):
            try:
                model_inputs = []
                for k in range(len(batch['video_id'])):
                    input_item = {
                        'feats': batch['feats'][k].to(device),
                        'feat_num_frames': batch['feat_num_frames'][k].to(device),
                        'video_id': batch['video_id'][k],
                        'fps': batch['fps'][k].item(),
                        'feat_stride': batch['feat_stride'][k].item(),
                        'duration': batch['duration'][k].item()
                    }
                    model_inputs.append(input_item)

                output = model(model_inputs)

                for k in range(len(output)):
                    results['video_ids'].append(batch['video_id'][k])
                    results['segment_intervals'].append(output[k]['segments'].cpu().numpy())
                    results['scores'].append(output[k]['scores'].cpu().numpy())
                    results['labels'].append(output[k]['labels'].cpu().numpy())
            except Exception as e:
                continue

    out_dir = cfg['output_folder']
    os.makedirs(out_dir, exist_ok=True)

    json_out = {'results': {}}
    for i, vid in enumerate(results['video_ids']):
        segs = results['segment_intervals'][i]
        scrs = results['scores'][i]
        lbls = results['labels'][i]
        res_list = []
        for j in range(len(segs)):
            res_list.append({
                'label': int(lbls[j]), 'score': float(scrs[j]),
                'segment': [float(segs[j][0]), float(segs[j][1])]
            })
        json_out['results'][vid] = res_list

    final_path = os.path.join(out_dir, "results.json")
    with open(final_path, 'w') as f: json.dump(json_out, f)
    log(f"‚úÖ DONE! {final_path}")

if __name__ == '__main__':
    main()
"""
with open(os.path.join(AF_REPO_PATH, "eval_standalone.py"), 'w') as f: f.write(eval_standalone)

# --- RUN ---
print("üèÅ START...", flush=True)
os.chdir(AF_REPO_PATH)
cmd = ["python", "eval_standalone.py"]
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True)
while True:
    output = process.stdout.readline()
    if output == '' and process.poll() is not None: break
    if output: print(output.strip(), flush=True)

üöÄ Inizializzazione ActionFormer (GPU RECOVERY MODE)...
üñ•Ô∏è Hardware: CUDA
üîç [System] Ricerca file feature...
‚úÖ Feature trovate in: /content/temp_omnivore_features/omnivore (384 file)
üõ†Ô∏è [System] Applicazione patch al codice...
üèÅ START...
[DEBUG] 1. Setup...
[DEBUG]    -> Device: cuda
[DEBUG] 2. Pesi: ego4d_omnivore.pth.tar
[DEBUG] 3. Inferenza in corso...

0%|          | 0/384 [00:00<?, ?it/s]
0%|          | 1/384 [00:00<06:04,  1.05it/s]
1%|          | 2/384 [00:01<03:49,  1.67it/s]
1%|          | 3/384 [00:01<02:49,  2.25it/s]
1%|          | 4/384 [00:01<02:32,  2.49it/s]
1%|‚ñè         | 5/384 [00:02<02:28,  2.56it/s]
2%|‚ñè         | 6/384 [00:02<02:24,  2.62it/s]
2%|‚ñè         | 7/384 [00:02<01:58,  3.19it/s]
2%|‚ñè         | 8/384 [00:03<01:45,  3.55it/s]
2%|‚ñè         | 9/384 [00:03<01:40,  3.74it/s]
3%|‚ñé         | 10/384 [00:03<01:41,  3.67it/s]
3%|‚ñé         | 11/384 [00:03<01:52,  3.31it/s]
3%|‚ñé         | 12/384 [00:04<01:40,  3.71it/s]
3%|‚ñé      

# Pooling, Zipping, Upload

In [10]:
# @title 5. Pooling & Estrazione Step Embeddings (Fix ZIP Vuoto)
import os
import json
import numpy as np
import torch
import zipfile
from tqdm import tqdm

# --- CONFIGURAZIONE ---
RESULTS_JSON = "/content/actionformer_workspace/multi_step_localization/ckpt/results.json"
FEAT_DIR = "/content/temp_omnivore_features/omnivore"
OUT_DIR = "/content/actionformer_workspace/step_embeddings_temp"
DRIVE_OUT = "/content/drive/MyDrive/MistakeDetection/"
os.makedirs(OUT_DIR, exist_ok=True)
os.makedirs(DRIVE_OUT, exist_ok=True)

# Soglia minima di confidenza per considerare un'azione valida
SCORE_THRESHOLD = 0.05

def get_step_embedding():
    print(f"üßê Analisi file: {RESULTS_JSON}")

    if not os.path.exists(RESULTS_JSON):
        print("‚ùå ERRORE: Il file results.json non esiste!")
        return

    with open(RESULTS_JSON, 'r') as f:
        data = json.load(f)['results']

    vids = list(data.keys())
    print(f"üöÄ Inizio elaborazione su {len(vids)} video...")

    count_generated = 0

    for vid in tqdm(vids):
        # 1. Carica le feature originali (Omnivore)
        feat_path = os.path.join(FEAT_DIR, f"{vid}.npz")
        if not os.path.exists(feat_path):
            continue

        try:
            loaded = np.load(feat_path)
            # Gestione robusta caricamento
            full_features = loaded['feats'] if 'feats' in loaded else (loaded['arr_0'] if 'arr_0' in loaded else loaded[loaded.files[0]])
            if full_features.shape[0] == 1024: # Se √® [C, T] trasponi in [T, C]
                full_features = full_features.T
        except:
            continue

        # 2. Filtra i segmenti predetti da ActionFormer
        predictions = data[vid]
        valid_segments = [p for p in predictions if p['score'] > SCORE_THRESHOLD]

        if not valid_segments:
            continue

        # 3. Pooling (Media delle feature nel segmento temporale)
        step_embeds = []
        fps = 30 # Default usato nell'inferenza
        stride = 16

        for seg in valid_segments:
            start_sec, end_sec = seg['segment']
            # Converti secondi in indici di feature
            start_idx = int((start_sec * fps) / stride)
            end_idx = int((end_sec * fps) / stride)

            # Clamp degli indici
            start_idx = max(0, start_idx)
            end_idx = min(full_features.shape[0], end_idx)

            if end_idx > start_idx:
                # Average Pooling sul segmento
                segment_feat = full_features[start_idx:end_idx, :]
                pooled_feat = np.mean(segment_feat, axis=0)
                step_embeds.append({
                    'label': seg['label'],
                    'score': seg['score'],
                    'embedding': pooled_feat
                })

        if step_embeds:
            np.savez(os.path.join(OUT_DIR, f"{vid}_steps.npz"), data=step_embeds)
            count_generated += 1

    print(f"\n‚úÖ Generati {count_generated} file .npz in {OUT_DIR}")

    if count_generated > 0:
        # 4. Creazione ZIP
        zip_path = os.path.join("/content", "step_embeddings.zip")
        print("üì¶ Creazione archivio ZIP...")
        with zipfile.ZipFile(zip_path, 'w') as zipf:
            for root, dirs, files in os.walk(OUT_DIR):
                for file in files:
                    zipf.write(os.path.join(root, file), file)

        # 5. Upload su Drive
        final_drive_path = os.path.join(DRIVE_OUT, "step_embeddings.zip")
        shutil.copy(zip_path, final_drive_path)
        print(f"‚òÅÔ∏è Upload completato: {final_drive_path}")
    else:
        print("‚ö†Ô∏è ATTENZIONE: Nessun embedding generato. Controlla la confidenza del modello.")

get_step_embedding()

üßê Analisi file: /content/actionformer_workspace/multi_step_localization/ckpt/results.json
üöÄ Inizio elaborazione su 384 video...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 384/384 [00:02<00:00, 152.66it/s]



‚úÖ Generati 384 file .npz in /content/actionformer_workspace/step_embeddings_temp
üì¶ Creazione archivio ZIP...
‚òÅÔ∏è Upload completato: /content/drive/MyDrive/MistakeDetection/step_embeddings.zip
