In [None]:
import sys, os
try:
    from google.colab import drive, userdata
    IS_COLAB = True
except ImportError:
    IS_COLAB = False

REPO_NAME = 'MistakeDetection'

if IS_COLAB:
    print("‚òÅÔ∏è Colab rilevato.")
    if not os.path.exists('/content/drive'): drive.mount('/content/drive')

    GITHUB_USER = 'MarcoPernoVDP'
    try:
        TOKEN = userdata.get('GITHUB_TOKEN')
        REPO_URL = f'https://{TOKEN}@github.com/{GITHUB_USER}/{REPO_NAME}.git'
    except:
        REPO_URL = f'https://github.com/{GITHUB_USER}/{REPO_NAME}.git'

    ROOT_DIR = f'/content/{REPO_NAME}'
    if not os.path.exists(ROOT_DIR):
        !git clone {REPO_URL}
    else:
        %cd {ROOT_DIR}
        !git pull
        %cd /content


else:
    print("Ambiente locale rilevato.")
    ROOT_DIR = os.getcwd()
    while not os.path.exists(os.path.join(ROOT_DIR, '.gitignore')) and ROOT_DIR != os.path.dirname(ROOT_DIR):
        ROOT_DIR = os.path.dirname(ROOT_DIR)

if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)


‚òÅÔ∏è Colab rilevato.
/content/MistakeDetection
Already up to date.
/content


In [None]:
# Verifica ambiente
print(f"üìç Working directory: {ROOT_DIR}")
print(f"üêç Python environment: {'Colab' if IS_COLAB else 'Local'}")

# Task 2 - Subtask 3: Hungarian Matching

Matching tra video step embeddings e task graph text embeddings usando l'algoritmo Ungherese.

## 1. Installazione Dipendenze

In [None]:
if IS_COLAB:
    !pip install scipy matplotlib seaborn -q

import numpy as np
import json
import zipfile
from pathlib import Path
from scipy.optimize import linear_sum_assignment
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Librerie caricate")

## 2. Configurazione Path

**Modifica questi path secondo la tua struttura:**

In [None]:
if IS_COLAB:
    # ========== CONFIGURA QUESTI PATH PER COLAB ==========
    VIDEO_FEATURES_ZIP = "/content/drive/MyDrive/MistakeDetection/step_embeddings.zip"
    TEXT_FEATURES_ZIP = "/content/drive/MyDrive/MistakeDetection/text_embeddings.zip"
    OUTPUT_DIR = "/content/drive/MyDrive/MistakeDetection/hungarian_results"
    # ====================================================
else:
    # ========== CONFIGURA QUESTI PATH PER LOCALE ==========
    VIDEO_FEATURES_ZIP = str(Path(ROOT_DIR) / "data" / "step_embeddings.zip")
    TEXT_FEATURES_ZIP = str(Path(ROOT_DIR) / "data" / "text_embeddings.zip")
    OUTPUT_DIR = str(Path(ROOT_DIR) / "output" / "hungarian_results")
    # ====================================================

# Crea cartella output
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

print("üìÅ Configurazione:")
print(f"  Video features ZIP: {VIDEO_FEATURES_ZIP}")
print(f"  Text features ZIP:  {TEXT_FEATURES_ZIP}")
print(f"  Output directory:   {OUTPUT_DIR}")

## 3. Caricamento Features da ZIP

In [None]:
def load_features_from_zip(zip_path: str) -> Dict[str, np.ndarray]:
    """
    Carica tutte le features da un file ZIP
    
    Args:
        zip_path: Path al file ZIP contenente .npz files
        
    Returns:
        Dizionario {filename: features_array}
    """
    features_dict = {}
    
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        # Lista tutti i file .npz nello zip
        npz_files = [f for f in zip_ref.namelist() if f.endswith('.npz')]
        
        print(f"üì¶ Trovati {len(npz_files)} file .npz in {Path(zip_path).name}")
        
        for npz_file in npz_files:
            try:
                # Leggi il file npz dallo zip
                with zip_ref.open(npz_file) as f:
                    data = np.load(f, allow_pickle=True)
                    
                    # Estrai le features
                    if 'features' in data:
                        features = data['features']
                        
                        # Normalizzazione L2
                        norms = np.linalg.norm(features, axis=1, keepdims=True)
                        norms = np.where(norms == 0, 1, norms)
                        features_normalized = features / norms
                        
                        # Usa il basename come chiave
                        key = Path(npz_file).stem
                        features_dict[key] = features_normalized
                        
            except Exception as e:
                print(f"‚ö†Ô∏è Errore caricando {npz_file}: {e}")
                continue
    
    return features_dict

# Carica features
print("\nüîÑ Caricamento video features...")
video_features = load_features_from_zip(VIDEO_FEATURES_ZIP)

print("\nüîÑ Caricamento text features...")
text_features = load_features_from_zip(TEXT_FEATURES_ZIP)

print(f"\n‚úÖ Caricate:")
print(f"  {len(video_features)} video features")
print(f"  {len(text_features)} text features")

# Mostra esempi
if video_features:
    example_key = list(video_features.keys())[0]
    print(f"\nüìä Esempio video feature '{example_key}': shape {video_features[example_key].shape}")
if text_features:
    example_key = list(text_features.keys())[0]
    print(f"üìä Esempio text feature '{example_key}': shape {text_features[example_key].shape}")

## 4. Algoritmo di Hungarian Matching

In [None]:
def hungarian_matching(video_feats: np.ndarray, 
                      text_feats: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, float]:
    """
    Esegue Hungarian matching tra video e text features
    
    Args:
        video_feats: Array (N, D) di embeddings video
        text_feats: Array (M, D) di embeddings testo
        
    Returns:
        - video_indices: Indici video matched
        - text_indices: Indici testo matched  
        - distances: Distanze coseno dei match
        - avg_distance: Distanza media
    """
    # Calcola matrice di costo (cosine distance = 1 - cosine similarity)
    # Per features normalizzate: cosine_sim = dot product
    similarity = video_feats @ text_feats.T
    cost_matrix = 1.0 - similarity
    
    # Applica algoritmo Ungherese
    video_indices, text_indices = linear_sum_assignment(cost_matrix)
    
    # Estrai le distanze dei match
    distances = cost_matrix[video_indices, text_indices]
    avg_distance = distances.mean()
    
    return video_indices, text_indices, distances, avg_distance

print("‚úÖ Funzione Hungarian matching definita")

## 5. Matching Video-Text Features

In [None]:
def match_video_to_text(video_features: Dict[str, np.ndarray],
                       text_features: Dict[str, np.ndarray]) -> Dict:
    """
    Matcha tutte le video features con le corrispondenti text features
    
    Args:
        video_features: Dict {video_id: features}
        text_features: Dict {recipe_id: features}
        
    Returns:
        Dict con risultati del matching
    """
    results = {}
    
    print("\nüöÄ Esecuzione Hungarian matching...\n")
    
    for video_id, vid_feats in video_features.items():
        # Trova la text feature corrispondente
        # Assumi che il video_id contenga il recipe_id
        matched_text_id = None
        
        for text_id in text_features.keys():
            # Rimuovi suffisso '_text' se presente
            recipe_id = text_id.replace('_text', '')
            
            # Controlla se il recipe_id √® contenuto nel video_id
            if recipe_id in video_id:
                matched_text_id = text_id
                break
        
        if matched_text_id is None:
            print(f"‚ö†Ô∏è Nessuna text feature trovata per {video_id}")
            continue
        
        txt_feats = text_features[matched_text_id]
        
        # Esegui matching
        vid_idx, txt_idx, distances, avg_dist = hungarian_matching(vid_feats, txt_feats)
        
        # Salva risultati
        results[video_id] = {
            'text_id': matched_text_id,
            'video_indices': vid_idx,
            'text_indices': txt_idx,
            'distances': distances,
            'avg_distance': avg_dist,
            'num_matches': len(vid_idx)
        }
        
        print(f"‚úì {video_id} ‚Üí {matched_text_id}")
        print(f"  Matches: {len(vid_idx)}, Avg distance: {avg_dist:.4f}\n")
    
    return results

# Esegui matching
matching_results = match_video_to_text(video_features, text_features)

# Statistiche globali
if matching_results:
    all_avg_dists = [r['avg_distance'] for r in matching_results.values()]
    print(f"\nüìä STATISTICHE GLOBALI:")
    print(f"  Total matches: {len(matching_results)}")
    print(f"  Avg distance: {np.mean(all_avg_dists):.4f} ¬± {np.std(all_avg_dists):.4f}")
    print(f"  Min distance: {np.min(all_avg_dists):.4f}")
    print(f"  Max distance: {np.max(all_avg_dists):.4f}")
else:
    print("‚ö†Ô∏è Nessun matching trovato!")

## 6. Salvataggio Risultati

In [None]:
# Salva come JSON
output_json = Path(OUTPUT_DIR) / "matching_results.json"

json_results = {}
for video_id, result in matching_results.items():
    json_results[video_id] = {
        'text_id': result['text_id'],
        'video_indices': result['video_indices'].tolist(),
        'text_indices': result['text_indices'].tolist(),
        'distances': result['distances'].tolist(),
        'avg_distance': float(result['avg_distance']),
        'num_matches': int(result['num_matches'])
    }

with open(output_json, 'w') as f:
    json.dump(json_results, f, indent=2)

print(f"üíæ Risultati salvati in: {output_json}")

# Salva come NPZ (per uso in Python)
output_npz = Path(OUTPUT_DIR) / "matching_results.npz"
np.savez_compressed(output_npz, **matching_results)
print(f"üíæ Risultati salvati in: {output_npz}")

## 7. Visualizzazione (Opzionale)

In [None]:
# Plot distribuzione distanze
if matching_results:
    all_distances = []
    for result in matching_results.values():
        all_distances.extend(result['distances'].tolist())
    
    plt.figure(figsize=(12, 5))
    
    # Istogramma
    plt.subplot(1, 2, 1)
    plt.hist(all_distances, bins=50, edgecolor='black', alpha=0.7)
    plt.xlabel('Cosine Distance', fontsize=12)
    plt.ylabel('Frequency', fontsize=12)
    plt.title('Distribuzione delle Distanze di Matching', fontsize=14)
    plt.grid(alpha=0.3)
    
    # Distanze medie per video
    plt.subplot(1, 2, 2)
    video_ids = list(matching_results.keys())
    avg_dists = [matching_results[vid]['avg_distance'] for vid in video_ids]
    
    plt.bar(range(len(avg_dists)), avg_dists, alpha=0.7, edgecolor='black')
    plt.xlabel('Video Index', fontsize=12)
    plt.ylabel('Average Distance', fontsize=12)
    plt.title('Distanza Media per Video', fontsize=14)
    plt.axhline(np.mean(avg_dists), color='red', linestyle='--', 
                label=f'Media: {np.mean(avg_dists):.3f}')
    plt.legend()
    plt.grid(alpha=0.3)
    
    plt.tight_layout()
    
    # Salva plot
    plot_path = Path(OUTPUT_DIR) / 'matching_distances.png'
    plt.savefig(plot_path, dpi=150, bbox_inches='tight')
    print(f"üíæ Plot salvato in: {plot_path}")
    
    plt.show()
else:
    print("‚ö†Ô∏è Nessun risultato da visualizzare")

## 8. Esempio di Analisi Dettagliata

In [None]:
# Analizza un esempio specifico
if matching_results:
    # Prendi il primo risultato
    example_video_id = list(matching_results.keys())[0]
    result = matching_results[example_video_id]
    
    print(f"{'='*80}")
    print(f"ESEMPIO DI MATCHING: {example_video_id}")
    print(f"{'='*80}")
    print(f"Text ID matched: {result['text_id']}")
    print(f"Numero di matches: {result['num_matches']}")
    print(f"Distanza media: {result['avg_distance']:.4f}\n")
    
    print(f"{'Video Step':<15} {'Text Step':<15} {'Distance':<12}")
    print(f"{'-'*45}")
    
    for vid_idx, txt_idx, dist in zip(
        result['video_indices'][:10],  # Mostra solo i primi 10
        result['text_indices'][:10],
        result['distances'][:10]
    ):
        print(f"{vid_idx:<15} {txt_idx:<15} {dist:<12.4f}")
    
    if result['num_matches'] > 10:
        print(f"... (altri {result['num_matches'] - 10} matches)")
    
    # Plot matrice di costo per questo esempio
    vid_feats = video_features[example_video_id]
    txt_feats = text_features[result['text_id']]
    
    # Calcola matrice di costo
    similarity = vid_feats @ txt_feats.T
    cost_matrix = 1.0 - similarity
    
    plt.figure(figsize=(10, 8))
    plt.imshow(cost_matrix, cmap='YlOrRd', aspect='auto')
    
    # Evidenzia i match ottimali
    for vid_idx, txt_idx in zip(result['video_indices'], result['text_indices']):
        plt.plot(txt_idx, vid_idx, 'b*', markersize=10, markeredgecolor='blue', markeredgewidth=2)
    
    plt.colorbar(label='Cosine Distance')
    plt.xlabel('Text Step Index', fontsize=12)
    plt.ylabel('Video Step Index', fontsize=12)
    plt.title(f'Cost Matrix - {example_video_id}\n(Blue stars = optimal matches)', fontsize=14)
    plt.tight_layout()
    
    # Salva
    cost_matrix_path = Path(OUTPUT_DIR) / f'cost_matrix_{example_video_id}.png'
    plt.savefig(cost_matrix_path, dpi=150, bbox_inches='tight')
    print(f"\nüíæ Cost matrix salvata in: {cost_matrix_path}")
    
    plt.show()
else:
    print("‚ö†Ô∏è Nessun risultato disponibile")

---

## üìù Istruzioni d'Uso

### 1Ô∏è‚É£ Preparazione Dati

Assicurati di avere:
- **File ZIP con video features**: contiene file .npz con embeddings video step-level
- **File ZIP con text features**: contiene file .npz con embeddings testuali dei task graph

Ogni file .npz deve contenere almeno la chiave `'features'` con un array numpy.

### 2Ô∏è‚É£ Configurazione

Modifica la **cella 3** (Configurazione Path) e inserisci i percorsi corretti:

```python
# Per Colab
VIDEO_FEATURES_ZIP = "/content/drive/MyDrive/.../step_embeddings.zip"
TEXT_FEATURES_ZIP = "/content/drive/MyDrive/.../text_embeddings.zip"
OUTPUT_DIR = "/content/drive/MyDrive/.../hungarian_results"

# Per Locale
VIDEO_FEATURES_ZIP = "path/to/step_embeddings.zip"
TEXT_FEATURES_ZIP = "path/to/text_embeddings.zip"
OUTPUT_DIR = "path/to/output"
```

### 3Ô∏è‚É£ Esecuzione

Esegui tutte le celle in ordine. Il notebook:
1. Carica le features dai file ZIP
2. Normalizza le features (L2 normalization)
3. Esegue Hungarian matching usando cosine distance
4. Salva i risultati in JSON e NPZ
5. Genera visualizzazioni

### 4Ô∏è‚É£ Output

I risultati vengono salvati in `OUTPUT_DIR`:
- `matching_results.json`: risultati in formato leggibile
- `matching_results.npz`: risultati in formato numpy (per Python)
- `matching_distances.png`: grafici delle distanze
- `cost_matrix_*.png`: matrici di costo per esempi specifici

### üîç Cosa fa l'Hungarian Matching?

L'algoritmo Ungherese risolve il problema di assegnamento ottimale tra:
- **N video steps** (righe)
- **M task graph nodes** (colonne)

Trova l'accoppiamento 1-a-1 che **minimizza la distanza coseno totale**.

**Cosine Distance** = 1 - Cosine Similarity  
(pi√π basso = pi√π simili)

### ‚ö° Note Importanti

- Le features sono **normalizzate L2** automaticamente
- I file vengono matchati per **nome**: il nome del video deve contenere l'ID della ricetta
- Se non trovi match, controlla la convenzione di naming dei tuoi file .npz