# Entrenament EUREKA

Aquest notebook entrena el model EUREKA utilitzant els fitxers d'anotaci√≥ `.txt` (`Annot_TrainList.txt`, `classIdx.txt`) i els v√≠deos `.avi` locals.

In [None]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
import mediapipe as mp
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm

# --- 1. CONFIGURACI√ì MAC (MPS) ---
def get_device():
    if torch.backends.mps.is_available():
        print("Acceleraci√≥ Apple Silicon detectada: Utilitzant MPS")
        return torch.device("mps")
    else:
        print("No s'ha detectat GPU Metal. Utilitzant CPU.")
        return torch.device("cpu")

DEVICE = get_device()

‚úÖ Acceleraci√≥ Apple Silicon detectada: Utilitzant MPS


In [None]:
# --- 2. CONFIGURACI√ì I RUTES ---
M_KEY_FRAMES = 15
NUM_LANDMARKS = 21
COMPONENTS = 2
INPUT_SIZE = (NUM_LANDMARKS ** 2) * COMPONENTS * (M_KEY_FRAMES - 1)

# Canvia aix√≤ si tens els fitxers en un altre lloc
PATH_VIDEOS_ROOT = "videos"
PATH_ANNOTATIONS_DIR = "annotations"

FILE_TRAIN_LIST = os.path.join(PATH_ANNOTATIONS_DIR, "Annot_TrainList.txt")
FILE_CLASS_IDX = os.path.join(PATH_ANNOTATIONS_DIR, "classIdx.txt")

# Verificaci√≥
if not os.path.exists(FILE_TRAIN_LIST):
    print(f"ERROR: No trobo {FILE_TRAIN_LIST}. Crea la carpeta 'annotations' i posa-hi els fitxers.")

In [None]:
# --- 3. LLEGIR CLASSES (Versi√≥ "Blindada") ---
def carregar_classes_txt(path):
    print(f"Llegint classes de: {path}")
    classes = {}
    
    if not os.path.exists(path):
        print(f"ERROR: El fitxer {path} no existeix.")
        return ["NoGesture"], 1

    try:
        with open(path, 'r') as f:
            lines = f.readlines()
            
        print(f" Primeres l√≠nies del fitxer (per depurar):\n   {lines[:3]}")

        for line in lines:
            line = line.strip()
            if not line: continue
            
            # 1. Intentem separar per diferents car√†cters
            parts = []
            if ',' in line:
                parts = line.split(',') # Format CSV: 1,NoGesture
            elif ';' in line:
                parts = line.split(';') # Format CSV rar
            else:
                parts = line.split()    # Format espais: 1 NoGesture
            
            # 2. Si tenim dades suficients
            if len(parts) >= 2:
                try:
                    # Netegem l'ID (per si t√© espais o car√†cters invisibles)
                    id_str = "".join(filter(str.isdigit, parts[0]))
                    if not id_str: continue 
                    
                    id_class = int(id_str)
                    
                    # El nom √©s la segona part (netejant cometes o espais)
                    name_class = parts[1].strip().strip('"').strip("'")
                    
                    classes[id_class] = name_class
                except ValueError:
                    # Si la primera columna no √©s un n√∫mero (ex: cap√ßalera "ID Class")
                    continue
        
        # 3. Validaci√≥ final
        if not classes:
            print("ALERTA: No s'ha pogut llegir cap classe. El fitxer est√† buit o t√© un format desconegut.")
            return ["NoGesture"], 1
            
        # 4. Ordenem
        sorted_ids = sorted(classes.keys())
        class_list = [classes[i] for i in sorted_ids]
        min_id = sorted_ids[0]
        
        return class_list, min_id
        
    except Exception as e:
        print(f"Error cr√≠tic (Excepci√≥): {e}")
        return ["NoGesture"], 1

CLASSES, MIN_ID = carregar_classes_txt(FILE_CLASS_IDX)
NUM_CLASSES = len(CLASSES)
print(f"Classes carregades ({NUM_CLASSES}): {CLASSES}")
print(f"L'ID m√≠nim √©s {MIN_ID}.")

üìñ Llegint classes de: annotations/classIdx.txt
   ‚ÑπÔ∏è Primeres l√≠nies del fitxer (per depurar):
   ['id,label\n', '1,D0X\n', '2,B0A\n']
üìã Classes carregades (14): ['D0X', 'B0A', 'B0B', 'G01', 'G02', 'G03', 'G04', 'G05', 'G06', 'G07', 'G08', 'G09', 'G10', 'G11']
‚ÑπÔ∏è L'ID m√≠nim √©s 1.


In [13]:
# --- 4. MODEL EUREKA ---
class EurekaNet(nn.Module):
    def __init__(self):
        super(EurekaNet, self).__init__()
        self.fc1 = nn.Linear(INPUT_SIZE, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.dropout1 = nn.Dropout(0.25)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.dropout2 = nn.Dropout(0.25)
        self.fc3 = nn.Linear(64, NUM_CLASSES)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

In [14]:
# --- 5. EXTRACTOR DE FEATURES (Optimitzat) ---
def extract_features(video_path, start, end, hands_model):
    """
    Ara rep el model de mans ja carregat per no perdre temps reiniciant-lo.
    """
    cap = cv2.VideoCapture(video_path)
    # Validaci√≥ r√†pida: si no pot obrir el v√≠deo, sortim
    if not cap.isOpened():
        return None

    cap.set(cv2.CAP_PROP_POS_FRAMES, start)
    
    lm_seq = []
    frames_to_read = end - start + 1
    if frames_to_read < M_KEY_FRAMES: frames_to_read = M_KEY_FRAMES
    
    for _ in range(frames_to_read):
        ret, frame = cap.read()
        if not ret: break
        
        # Convertim a RGB per a MediaPipe
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Usem el model passat per par√†metre (Molt m√©s r√†pid!)
        res = hands_model.process(frame_rgb)
        
        if res.multi_hand_landmarks:
            lm_seq.append([[lm.x, lm.y] for lm in res.multi_hand_landmarks[0].landmark])
    
    cap.release()
    # NO TANQUEM EL MODEL AQU√ç (hands.close()) PERQU√à EL REUTILITZAREM
    
    if len(lm_seq) < 2: return None
    while len(lm_seq) < M_KEY_FRAMES: lm_seq.append(lm_seq[-1])
    
    indices = np.linspace(0, len(lm_seq)-1, M_KEY_FRAMES, dtype=int)
    sel_frames = [lm_seq[i] for i in indices]
    
    feats = []
    for t in range(1, len(sel_frames)):
        curr, prev = sel_frames[t], sel_frames[t-1]
        for pc in curr:
            for pp in prev:
                feats.extend([pc[0]-pp[0], pc[1]-pp[1]])
    return np.array(feats, dtype=np.float32)

In [None]:
# --- 6. INDEXADOR DE V√çDEOS ---
def indexar_videos_local(root_dir):
    print(f"üîç Indexant v√≠deos a: {os.path.abspath(root_dir)}")
    video_map = {}
    count = 0
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.lower().endswith(('.avi', '.mp4')):
                name_no_ext = os.path.splitext(file)[0]
                full_path = os.path.join(root, file)
                video_map[name_no_ext] = full_path
                count += 1
    print(f"Trobats {count} fitxers de v√≠deo.")
    return video_map

In [None]:
# --- 7. PREPARAR DATASET (Optimitzat amb c√†rrega √∫nica) ---
def preparar_dataset_txt():
    # 1. Indexar disc
    video_map = indexar_videos_local(PATH_VIDEOS_ROOT)
    if not video_map:
        print("ERROR: Carpeta de v√≠deos buida.")
        return [], []

    # 2. Llegir Annot_TrainList.txt
    print(f"Llegint llista d'entrenament: {FILE_TRAIN_LIST}")
    try:
        df = pd.read_csv(FILE_TRAIN_LIST, header=None, sep=',')
        df.columns = ['video', 'label_name', 'label_id', 'start', 'end', 'frames']
    except Exception as e:
        print(f"Error llegint el CSV: {e}")
        return [], []

    X, Y = [], []
    stats = {"ok": 0, "fail_read": 0, "no_video": 0}
    
    print("Inicialitzant MediaPipe (una sola vegada)...")
    mp_hands = mp.solutions.hands
    
    # INICIALITZEM EL "MOTOR" AQU√ç FORA DEL BUCLE
    with mp_hands.Hands(
        static_image_mode=False, # False √©s m√©s r√†pid per a v√≠deo
        max_num_hands=1, 
        min_detection_confidence=0.4
    ) as hands_model:
        
        print(f"Processant {len(df)} gestos (Ara anir√† r√†pid)...")
        
        for idx, row in tqdm(df.iterrows(), total=df.shape[0]):
            try:
                vid_name = str(row['video']).strip()
                label_id = int(row['label_id']) - MIN_ID
                start = int(row['start'])
                end = int(row['end'])
                
                if label_id < 0 or label_id >= NUM_CLASSES: continue

                # Buscar v√≠deo
                vpath = video_map.get(vid_name)
                if not vpath:
                    clean = os.path.splitext(vid_name)[0]
                    vpath = video_map.get(clean)
                    
                if not vpath:
                    stats["no_video"] += 1
                    continue
                    
                # PASSEM EL MODEL (hands_model) A LA FUNCI√ì
                feats = extract_features(vpath, start, end, hands_model)
                
                if feats is not None and len(feats) == INPUT_SIZE:
                    X.append(torch.tensor(feats))
                    Y.append(label_id)
                    stats["ok"] += 1
                else:
                    stats["fail_read"] += 1
                    
            except Exception:
                continue

    print(f"\nRESUM FINAL:")
    print(f"   Processats OK: {stats['ok']}")
    print(f"   Fallats (MediaPipe): {stats['fail_read']}")
    print(f"   V√≠deo no trobat: {stats['no_video']}")
    
    return X, Y

In [None]:
# --- 8. ENTRENAMENT ---
class HandDataset(Dataset):
    def __init__(self, x, y): self.x, self.y = x, y
    def __len__(self): return len(self.x)
    def __getitem__(self, i): return self.x[i], self.y[i]

def train_local():
    X, Y = preparar_dataset_txt()
    
    if not X:
        print("No hi ha dades per entrenar.")
        return

    dataset = HandDataset(torch.stack(X), torch.tensor(Y, dtype=torch.long))
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    print(f"Iniciant entrenament al dispositiu: {DEVICE}")
    
    model = EurekaNet().to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    model.train()

    EPOCHS = 100
    for ep in range(EPOCHS):
        tot_loss = 0
        correct = 0
        total = 0
        
        for x, y in dataloader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            
            tot_loss += loss.item()
            _, pred = torch.max(out, 1)
            correct += (pred == y).sum().item()
            total += y.size(0)
            
        print(f"Epoch {ep+1}/{EPOCHS}: Acc {100*correct/total:.2f}% | Loss {tot_loss/len(dataloader):.4f}")

    torch.save(model.state_dict(), "eureka_model_100_epochs.pth")
    print(f"Model guardat a: {os.path.abspath('eureka_model_100_epochs.pth')}")

In [18]:
# --- 9. EXECUTAR ---
if __name__ == "__main__":
    train_local()

üîç Indexant v√≠deos a: /Users/jaumemil/Desktop/IA/TFG/Eureka/videos
‚úÖ Trobats 200 fitxers de v√≠deo.
üìñ Llegint llista d'entrenament: annotations/Annot_TrainList.txt
‚öôÔ∏è Inicialitzant MediaPipe (una sola vegada)...
üöÄ Processant 4039 gestos (Ara anir√† r√†pid)...


I0000 00:00:1765796948.343319 1305568 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M2


  0%|          | 0/4039 [00:00<?, ?it/s]

W0000 00:00:1765796948.370965 1645173 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765796948.382114 1645173 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.



üìä RESUM FINAL:
   ‚úÖ Processats OK: 3956
   ‚ùå Fallats (MediaPipe): 83
   ‚ùå V√≠deo no trobat: 0
üöÄ Iniciant entrenament al dispositiu: mps
Epoch 1/100: Acc 49.70% | Loss 1.6651
Epoch 2/100: Acc 69.44% | Loss 1.0356
Epoch 3/100: Acc 75.35% | Loss 0.8172
Epoch 4/100: Acc 78.08% | Loss 0.7100
Epoch 5/100: Acc 77.93% | Loss 0.6722
Epoch 6/100: Acc 79.50% | Loss 0.6111
Epoch 7/100: Acc 80.86% | Loss 0.5742
Epoch 8/100: Acc 81.12% | Loss 0.5624
Epoch 9/100: Acc 81.72% | Loss 0.5386
Epoch 10/100: Acc 81.67% | Loss 0.5181
Epoch 11/100: Acc 83.14% | Loss 0.5021
Epoch 12/100: Acc 83.77% | Loss 0.4692
Epoch 13/100: Acc 83.97% | Loss 0.4750
Epoch 14/100: Acc 84.10% | Loss 0.4443
Epoch 15/100: Acc 84.48% | Loss 0.4473
Epoch 16/100: Acc 85.59% | Loss 0.4229
Epoch 17/100: Acc 85.19% | Loss 0.4246
Epoch 18/100: Acc 85.26% | Loss 0.4138
Epoch 19/100: Acc 84.98% | Loss 0.4158
Epoch 20/100: Acc 86.27% | Loss 0.3938
Epoch 21/100: Acc 86.50% | Loss 0.3887
Epoch 22/100: Acc 86.02% | Loss 0.3726
Ep