In [1]:
from google.colab import drive
import os

# Montar Google Drive (opcional pero recomendado)
drive.mount('/content/drive')

# Clonar tu repositorio
GITHUB_USER = "PieroCampos"
REPO_NAME = "deep-learning-retinal-classification"

# Si es primera vez
if not os.path.exists(REPO_NAME):
    !git clone https://github.com/PieroCampos/deep-learning-retinal-classification.git
    %cd {REPO_NAME}
else:
    %cd {REPO_NAME}
    !git pull

print("‚úÖ Repositorio conectado!")

Mounted at /content/drive
Cloning into 'deep-learning-retinal-classification'...
remote: Enumerating objects: 29, done.[K
remote: Counting objects: 100% (29/29), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 29 (delta 12), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (29/29), 19.28 KiB | 2.75 MiB/s, done.
Resolving deltas: 100% (12/12), done.
/content/deep-learning-retinal-classification
‚úÖ Repositorio conectado!


In [2]:
#Instalar dependencias

!pip install -q torch torchvision
!pip install -q timm  # Para modelos pre-entrenados
!pip install -q scikit-learn pandas matplotlib seaborn
!pip install -q kaggle  # Para descargar dataset

In [3]:
# ============================================
# CELL 3: Montar Google Drive
# ============================================
from google.colab import drive

print(" Montando Google Drive...")
drive.mount('/content/drive')

print("\n Google Drive montado!")
print("\n Verificando archivos en Drive...")

# Listar contenido de tu carpeta del proyecto
!ls -lh "/content/drive/MyDrive/DL_Project_ODIR/"

 Montando Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

 Google Drive montado!

 Verificando archivos en Drive...
total 64M
-rw------- 1 root root  64M Dec 28 23:27  final-project-deep-learning-fall-2025.zip
drwx------ 2 root root 4.0K Dec 29 14:03 'Oulu University'


In [4]:
# ============================================
# CELL 4: Extraer dataset desde Google Drive
# ============================================
import os
import zipfile

# Crear directorio para datos
!mkdir -p ./data

# Ruta al archivo ZIP en tu Drive
zip_path = "/content/drive/MyDrive/DL_Project_ODIR/final-project-deep-learning-fall-2025.zip"

# Verificar que existe
if os.path.exists(zip_path):
    print(f" Archivo encontrado: {zip_path}")
    print(f" Tama√±o: {os.path.getsize(zip_path) / (1024*1024):.2f} MB")
    print("\n Descomprimiendo... (esto puede tomar 2-3 minutos)")

    # Descomprimir
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall('./data')

    print("\n ¬°Dataset extra√≠do correctamente!")

else:
    print(f" No se encontr√≥ el archivo en: {zip_path}")
    print("\n Archivos disponibles en tu carpeta:")
    !ls "/content/drive/MyDrive/DL_Project_ODIR/"

 Archivo encontrado: /content/drive/MyDrive/DL_Project_ODIR/final-project-deep-learning-fall-2025.zip
 Tama√±o: 63.10 MB

 Descomprimiendo... (esto puede tomar 2-3 minutos)

 ¬°Dataset extra√≠do correctamente!


In [5]:
# ============================================
# Verificar estructura del dataset
# ============================================
import os

print(" ESTRUCTURA DEL DATASET:")
print("="*60)

# Funci√≥n para mostrar √°rbol de directorios
def show_tree(path, prefix="", max_files=10):
    try:
        items = sorted(os.listdir(path))
        dirs = [i for i in items if os.path.isdir(os.path.join(path, i))]
        files = [i for i in items if os.path.isfile(os.path.join(path, i))]

        # Mostrar directorios
        for d in dirs:
            print(f"{prefix}üìÅ {d}/")
            show_tree(os.path.join(path, d), prefix + "  ", max_files)

        # Mostrar archivos
        for i, f in enumerate(files):
            if i < max_files:
                size = os.path.getsize(os.path.join(path, f))
                size_str = f"{size/1024:.1f} KB" if size < 1024*1024 else f"{size/(1024*1024):.1f} MB"
                print(f"{prefix}üìÑ {f} ({size_str})")
            elif i == max_files:
                print(f"{prefix}   ... y {len(files) - max_files} archivos m√°s")
                break

    except PermissionError:
        print(f"{prefix} Sin permisos")

show_tree('./data')

print("\n" + "="*60)

# Contar im√°genes si existen
if os.path.exists('./data/images'):
    num_images = len([f for f in os.listdir('./data/images') if f.endswith(('.jpg', '.jpeg', '.png'))])
    print(f"üì∏ Total de im√°genes encontradas: {num_images}")

 ESTRUCTURA DEL DATASET:
üìÅ final_project_resources/
  üìÅ .idea/
    üìÅ inspectionProfiles/
      üìÑ profiles_settings.xml (0.2 KB)
    üìÑ .gitignore (0.0 KB)
    üìÑ final_project_resources.iml (0.3 KB)
    üìÑ misc.xml (0.3 KB)
    üìÑ modules.xml (0.3 KB)
    üìÑ workspace.xml (2.0 KB)
  üìÅ images/
    üìÅ offsite_test/
      üìÑ 102_left.jpg (7.3 KB)
      üìÑ 1071_left.jpg (7.4 KB)
      üìÑ 1147_right.jpg (8.5 KB)
      üìÑ 1210_left.jpg (5.1 KB)
      üìÑ 1212_left.jpg (6.4 KB)
      üìÑ 1221_right.jpg (4.5 KB)
      üìÑ 1229_left.jpg (5.2 KB)
      üìÑ 1234_left.jpg (6.6 KB)
      üìÑ 1237_right.jpg (6.3 KB)
      üìÑ 1239_left.jpg (6.2 KB)
         ... y 190 archivos m√°s
    üìÅ onsite_test/
      üìÑ 1022_left.jpg (5.6 KB)
      üìÑ 1022_right.jpg (5.3 KB)
      üìÑ 102_right.jpg (7.8 KB)
      üìÑ 1071_right.jpg (7.5 KB)
      üìÑ 1082_right.jpg (6.8 KB)
      üìÑ 1138_right.jpg (5.8 KB)
      üìÑ 1157_right.jpg (7.3 KB)
      üìÑ 1167_ri

In [6]:
# ============================================
# CELL 6: Verificar archivos CSV
# ============================================
import pandas as pd

print(" VERIFICANDO ARCHIVOS CSV:")
print("="*60)

# Lista de archivos esperados
csv_files = {
    'train.csv': './data/final_project_resources/train.csv',
    'val.csv': './data/final_project_resources/val.csv',
    'offsite_test.csv': './data/final_project_resources/offsite_test.csv',
    'onsite_test_submission.csv': './data/final_project_resources/onsite_test_submission.csv'
}

for name, path in csv_files.items():
    if os.path.exists(path):
        df = pd.read_csv(path)
        print(f"\n** {name}")
        print(f"   Filas: {len(df)}")
        print(f"   Columnas: {list(df.columns)}")
        print(f"   Primeras filas:")
        print(df.head(2).to_string(index=False))
    else:
        print(f"\n‚ùå {name} - NO ENCONTRADO")
        print(f"   Buscando en: {path}")

print("\n" + "="*60)

 VERIFICANDO ARCHIVOS CSV:

** train.csv
   Filas: 800
   Columnas: ['id', 'D', 'G', 'A']
   Primeras filas:
           id  D  G  A
913_right.jpg  1  0  0
 281_left.jpg  1  0  0

** val.csv
   Filas: 200
   Columnas: ['id', 'D', 'G', 'A']
   Primeras filas:
           id  D  G  A
184_right.jpg  1  0  0
4488_left.jpg  1  0  0

** offsite_test.csv
   Filas: 200
   Columnas: ['id', 'D', 'G', 'A']
   Primeras filas:
           id  D  G  A
568_right.jpg  1  0  0
 748_left.jpg  1  0  0

** onsite_test_submission.csv
   Filas: 250
   Columnas: ['id', 'D', 'G', 'A']
   Primeras filas:
            id  D  G  A
4595_right.jpg  0  0  0
 4155_left.jpg  0  0  0



# TASK 1.1

In [7]:
# ============================================
# ============================================
# TASK 1: TRANSFER LEARNING
# ============================================
# ============================================

print("\n" + "="*80)
print(" INICIANDO TASK 1: TRANSFER LEARNING")
print("="*80 + "\n")


 INICIANDO TASK 1: TRANSFER LEARNING



In [8]:
# ============================================
# Importar librer√≠as necesarias para Task 1
# ============================================
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np
from PIL import Image

print(" Librer√≠as importadas!")
print(f" Device disponible: {'GPU ' if torch.cuda.is_available() else 'CPU '}")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

 Librer√≠as importadas!
 Device disponible: GPU 


In [9]:
# ============================================
# Clase Dataset (Multi-label)
# ============================================
class RetinaMultiLabelDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row.iloc[0])
        img = Image.open(img_path).convert("RGB")
        labels = torch.tensor(row[1:].values.astype("float32"))
        if self.transform:
            img = self.transform(img)
        return img, labels

print(" Clase Dataset definida!")

 Clase Dataset definida!


In [10]:
# ============================================
# Funci√≥n para construir modelos
# ============================================
def build_model(backbone="resnet18", num_classes=3, pretrained=False):
    """
    Construye un modelo con el backbone especificado.

    Args:
        backbone: 'resnet18' o 'efficientnet'
        num_classes: n√∫mero de clases (3 para DR, G, AMD)
        pretrained: usar pesos de ImageNet (no usado aqu√≠)
    """
    if backbone == "resnet18":
        model = models.resnet18(pretrained=pretrained)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    elif backbone == "efficientnet":
        model = models.efficientnet_b0(pretrained=pretrained)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    else:
        raise ValueError(f"Backbone no soportado: {backbone}")

    return model

print(" Funci√≥n build_model definida!")

 Funci√≥n build_model definida!


In [11]:
# ============================================
# Funci√≥n de evaluaci√≥n
# ============================================
def evaluate_model(model, test_loader, device, dataset_name="Test"):
    """
    Eval√∫a un modelo y retorna m√©tricas por enfermedad.
    """
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            probs = torch.sigmoid(outputs).cpu().numpy()
            preds = (probs > 0.5).astype(int)

            y_true.extend(labels.numpy())
            y_pred.extend(preds)

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    disease_names = ["DR", "G", "AMD"]
    results = {}

    print(f"\n{'='*70}")
    print(f" RESULTADOS - {dataset_name}")
    print(f"{'='*70}")

    f_scores = []
    for i, disease in enumerate(disease_names):
        y_t = y_true[:, i]
        y_p = y_pred[:, i]

        precision = precision_score(y_t, y_p, zero_division=0)
        recall = recall_score(y_t, y_p, zero_division=0)
        f1 = f1_score(y_t, y_p, zero_division=0)

        results[disease] = {
            'precision': precision,
            'recall': recall,
            'f1': f1
        }

        f_scores.append(f1)

        print(f"\n{disease}:")
        print(f"  Precision: {precision:.4f}")
        print(f"  Recall:    {recall:.4f}")
        print(f"  F-score:   {f1:.4f}")

    avg_f1 = np.mean(f_scores)
    results['average_f1'] = avg_f1

    print(f"\n{'='*70}")
    print(f" AVERAGE F-SCORE: {avg_f1:.4f}")
    print(f"{'='*70}\n")

    return results

print(" Funci√≥n evaluate_model definida!")

 Funci√≥n evaluate_model definida!


In [12]:
# ============================================
# TASK 1.1 - No Fine-tuning
# Evaluar modelos pre-entrenados sin modificaci√≥n
# ============================================

print("\n" + "="*80)
print(" TASK 1.1: NO FINE-TUNING")
print("   Evaluando modelos pre-entrenados directamente en ODIR test set")
print("="*80 + "\n")

# Configuraci√≥n de rutas
BASE_DIR = "./data/final_project_resources"
offsite_test_csv = f"{BASE_DIR}/offsite_test.csv"
offsite_test_dir = f"{BASE_DIR}/images/offsite_test"

# Transform (mismo que se us√≥ en pre-entrenamiento)
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                       std=[0.229, 0.224, 0.225]),
])

# Crear dataset y dataloader para offsite test
offsite_test_ds = RetinaMultiLabelDataset(
    offsite_test_csv,
    offsite_test_dir,
    transform
)

offsite_test_loader = DataLoader(
    offsite_test_ds,
    batch_size=32,
    shuffle=False,
    num_workers=2
)

print(f" Offsite test set cargado: {len(offsite_test_ds)} im√°genes\n")

# Almacenar resultados
task1_1_results = {}


 TASK 1.1: NO FINE-TUNING
   Evaluando modelos pre-entrenados directamente en ODIR test set

 Offsite test set cargado: 200 im√°genes



In [13]:
# ========================
# Evaluar ResNet18
# ========================
print(" EVALUANDO RESNET18")
print("-" * 70)

resnet_model = build_model("resnet18", num_classes=3, pretrained=False).to(DEVICE)
resnet_pretrained_path = f"{BASE_DIR}/pretrained_backbone/ckpt_resnet18_ep50.pt"

# Cargar pesos pre-entrenados
state_dict = torch.load(resnet_pretrained_path, map_location=DEVICE)
resnet_model.load_state_dict(state_dict)

print(f" Pesos cargados desde: {resnet_pretrained_path}\n")

# Evaluar
resnet_results = evaluate_model(
    resnet_model,
    offsite_test_loader,
    DEVICE,
    "ResNet18 - No Fine-tuning - Offsite Test"
)

task1_1_results['resnet18'] = resnet_results

 EVALUANDO RESNET18
----------------------------------------------------------------------




 Pesos cargados desde: ./data/final_project_resources/pretrained_backbone/ckpt_resnet18_ep50.pt


 RESULTADOS - ResNet18 - No Fine-tuning - Offsite Test

DR:
  Precision: 0.7172
  Recall:    0.5071
  F-score:   0.5941

G:
  Precision: 0.5750
  Recall:    0.4694
  F-score:   0.5169

AMD:
  Precision: 0.3019
  Recall:    0.7273
  F-score:   0.4267

 AVERAGE F-SCORE: 0.5126



In [14]:
# ========================
# Evaluar EfficientNet
# ========================
print("\n EVALUANDO EFFICIENTNET")
print("-" * 70)

effnet_model = build_model("efficientnet", num_classes=3, pretrained=False).to(DEVICE)
effnet_pretrained_path = f"{BASE_DIR}/pretrained_backbone/ckpt_efficientnet_ep50.pt"

# Cargar pesos pre-entrenados
state_dict = torch.load(effnet_pretrained_path, map_location=DEVICE)
effnet_model.load_state_dict(state_dict)

print(f" Pesos cargados desde: {effnet_pretrained_path}\n")

# Evaluar
effnet_results = evaluate_model(
    effnet_model,
    offsite_test_loader,
    DEVICE,
    "EfficientNet - No Fine-tuning - Offsite Test"
)

task1_1_results['efficientnet'] = effnet_results


 EVALUANDO EFFICIENTNET
----------------------------------------------------------------------




 Pesos cargados desde: ./data/final_project_resources/pretrained_backbone/ckpt_efficientnet_ep50.pt


 RESULTADOS - EfficientNet - No Fine-tuning - Offsite Test

DR:
  Precision: 0.7459
  Recall:    0.6500
  F-score:   0.6947

G:
  Precision: 0.5769
  Recall:    0.6122
  F-score:   0.5941

AMD:
  Precision: 0.2464
  Recall:    0.7727
  F-score:   0.3736

 AVERAGE F-SCORE: 0.5541



In [15]:
# ========================
# Resumen Task 1.1
# ========================
print("\n" + "="*80)
print(" RESUMEN TASK 1.1 - NO FINE-TUNING (Offsite Test Set)")
print("="*80)

print(f"\n{'Modelo':<15} {'Avg F-score':<15} {'Status':<20}")
print("-" * 50)

resnet_f1 = task1_1_results['resnet18']['average_f1']
effnet_f1 = task1_1_results['efficientnet']['average_f1']

# Comparar con referencias
resnet_ref = 56.7
effnet_ref = 60.4

resnet_status = " Comparable" if resnet_f1 >= resnet_ref * 0.9 else " Bajo"
effnet_status = " Comparable" if effnet_f1 >= effnet_ref * 0.9 else " Bajo"

print(f"{'ResNet18':<15} {resnet_f1*100:>6.2f}%         {resnet_status}")
print(f"{'EfficientNet':<15} {effnet_f1*100:>6.2f}%         {effnet_status}")

print(f"\n Referencias esperadas (onsite test):")
print(f"   ResNet18: {resnet_ref}%")
print(f"   EfficientNet: {effnet_ref}%")

print("\n Nota: Estos son resultados en offsite test.")
print("   Para onsite test, necesitas generar predicciones y submitir a Kaggle.")
print("="*80)


 RESUMEN TASK 1.1 - NO FINE-TUNING (Offsite Test Set)

Modelo          Avg F-score     Status              
--------------------------------------------------
ResNet18         51.26%          Bajo
EfficientNet     55.41%          Bajo

 Referencias esperadas (onsite test):
   ResNet18: 56.7%
   EfficientNet: 60.4%

 Nota: Estos son resultados en offsite test.
   Para onsite test, necesitas generar predicciones y submitir a Kaggle.


# TASK 1.2

In [16]:
# ============================================
# ============================================
# TASK 1.2: FROZEN BACKBONE - FINE-TUNE CLASSIFIER ONLY
# ============================================
# ============================================

print("\n" + "="*80)
print("üöÄ INICIANDO TASK 1.2: FROZEN BACKBONE - FINE-TUNE CLASSIFIER ONLY")
print("   Congelare el backbone y solo entrenar√© el clasificador final")
print("="*80 + "\n")


üöÄ INICIANDO TASK 1.2: FROZEN BACKBONE - FINE-TUNE CLASSIFIER ONLY
   Congelare el backbone y solo entrenar√© el clasificador final



In [17]:
# ============================================
# Funci√≥n de entrenamiento
# ============================================
import torch.optim as optim

def train_model(model, train_loader, val_loader, criterion, optimizer,
                num_epochs, device, model_name="Model"):
    """
    Entrena un modelo y retorna el mejor seg√∫n validation loss.
    """
    best_val_loss = float('inf')
    best_model_state = None
    train_losses = []
    val_losses = []

    print(f"\n Iniciando entrenamiento de {model_name}")
    print(f"   √âpocas: {num_epochs}")
    print(f"   Device: {device}")
    print("-" * 70)

    for epoch in range(num_epochs):
        # ========================
        # TRAINING
        # ========================
        model.train()
        train_loss = 0.0

        for batch_idx, (imgs, labels) in enumerate(train_loader):
            imgs, labels = imgs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * imgs.size(0)

        train_loss /= len(train_loader.dataset)
        train_losses.append(train_loss)

        # ========================
        # VALIDATION
        # ========================
        model.eval()
        val_loss = 0.0

        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * imgs.size(0)

        val_loss /= len(val_loader.dataset)
        val_losses.append(val_loss)

        # Print progress
        print(f"Epoch {epoch+1:2d}/{num_epochs} | "
              f"Train Loss: {train_loss:.4f} | "
              f"Val Loss: {val_loss:.4f}", end="")

        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict().copy()
            print("  Best!")
        else:
            print()

    print("-" * 70)
    print(f" Entrenamiento completado!")
    print(f"   Best Val Loss: {best_val_loss:.4f}")

    # Load best model
    model.load_state_dict(best_model_state)

    return model, train_losses, val_losses

print(" Funci√≥n de entrenamiento definida!")

 Funci√≥n de entrenamiento definida!


In [18]:
# ============================================
# Preparar datasets de Train y Val
# ============================================

print("üì¶ Preparando datasets de entrenamiento y validaci√≥n...")

# Paths
train_csv = f"{BASE_DIR}/train.csv"
val_csv = f"{BASE_DIR}/val.csv"
train_dir = f"{BASE_DIR}/images/train"
val_dir = f"{BASE_DIR}/images/val"

# Transforms (con data augmentation para train)
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                       std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                       std=[0.229, 0.224, 0.225]),
])

# Datasets
train_ds = RetinaMultiLabelDataset(train_csv, train_dir, train_transform)
val_ds = RetinaMultiLabelDataset(val_csv, val_dir, val_transform)

# Dataloaders
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)

print(f" Datasets cargados:")
print(f"   Train: {len(train_ds)} im√°genes")
print(f"   Val:   {len(val_ds)} im√°genes")
print(f"   Test:  {len(offsite_test_ds)} im√°genes")

üì¶ Preparando datasets de entrenamiento y validaci√≥n...
 Datasets cargados:
   Train: 800 im√°genes
   Val:   200 im√°genes
   Test:  200 im√°genes


In [19]:
# ============================================
# TASK 1.2 - ResNet18 - Frozen Backbone
# ============================================

print("\n" + "="*80)
print("üî¨ TASK 1.2 - RESNET18 - FROZEN BACKBONE")
print("="*80)

# 1. Cargar modelo pre-entrenado
resnet_frozen = build_model("resnet18", num_classes=3, pretrained=False).to(DEVICE)
resnet_frozen.load_state_dict(torch.load(
    f"{BASE_DIR}/pretrained_backbone/ckpt_resnet18_ep50.pt",
    map_location=DEVICE
))

print("‚úÖ Modelo pre-entrenado cargado")

# 2. CONGELAR todas las capas del backbone
for name, param in resnet_frozen.named_parameters():
    if 'fc' not in name:  # Congelar todo excepto la capa final
        param.requires_grad = False
    else:
        param.requires_grad = True

# Verificar qu√© est√° congelado
trainable_params = sum(p.numel() for p in resnet_frozen.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in resnet_frozen.parameters())

print(f"\nüìä Par√°metros del modelo:")
print(f"   Total:      {total_params:,}")
print(f"   Trainable:  {trainable_params:,} ({100*trainable_params/total_params:.2f}%)")
print(f"   Frozen:     {total_params - trainable_params:,}")

# 3. Configurar entrenamiento
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, resnet_frozen.parameters()),
                       lr=1e-3)  # Learning rate m√°s alto porque solo entrenamos classifier

# 4. Entrenar
NUM_EPOCHS = 20

resnet_frozen_trained, train_losses, val_losses = train_model(
    model=resnet_frozen,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=NUM_EPOCHS,
    device=DEVICE,
    model_name="ResNet18 - Frozen Backbone"
)


üî¨ TASK 1.2 - RESNET18 - FROZEN BACKBONE




‚úÖ Modelo pre-entrenado cargado

üìä Par√°metros del modelo:
   Total:      11,178,051
   Trainable:  1,539 (0.01%)
   Frozen:     11,176,512

 Iniciando entrenamiento de ResNet18 - Frozen Backbone
   √âpocas: 20
   Device: cuda
----------------------------------------------------------------------
Epoch  1/20 | Train Loss: 1.2138 | Val Loss: 0.7931  Best!
Epoch  2/20 | Train Loss: 0.6450 | Val Loss: 0.6006  Best!
Epoch  3/20 | Train Loss: 0.4862 | Val Loss: 0.5514  Best!
Epoch  4/20 | Train Loss: 0.4892 | Val Loss: 0.5652
Epoch  5/20 | Train Loss: 0.4757 | Val Loss: 0.5406  Best!
Epoch  6/20 | Train Loss: 0.4633 | Val Loss: 0.5403  Best!
Epoch  7/20 | Train Loss: 0.4709 | Val Loss: 0.5066  Best!
Epoch  8/20 | Train Loss: 0.4517 | Val Loss: 0.5188
Epoch  9/20 | Train Loss: 0.4677 | Val Loss: 0.5346
Epoch 10/20 | Train Loss: 0.4547 | Val Loss: 0.5185
Epoch 11/20 | Train Loss: 0.4638 | Val Loss: 0.5094
Epoch 12/20 | Train Loss: 0.4604 | Val Loss: 0.5323
Epoch 13/20 | Train Loss: 0.4484

In [20]:
# ========================
# Evaluar ResNet18 - Frozen Backbone
# ========================

print("\n EVALUANDO RESNET18 - FROZEN BACKBONE")

resnet_frozen_results = evaluate_model(
    resnet_frozen_trained,
    offsite_test_loader,
    DEVICE,
    "ResNet18 - Frozen Backbone - Offsite Test"
)


 EVALUANDO RESNET18 - FROZEN BACKBONE

 RESULTADOS - ResNet18 - Frozen Backbone - Offsite Test

DR:
  Precision: 0.7733
  Recall:    0.9500
  F-score:   0.8526

G:
  Precision: 0.8889
  Recall:    0.1633
  F-score:   0.2759

AMD:
  Precision: 0.7500
  Recall:    0.1364
  F-score:   0.2308

 AVERAGE F-SCORE: 0.4531



In [21]:
# ============================================
# TASK 1.2 - EfficientNet - Frozen Backbone
# ============================================

print("\n" + "="*80)
print("üî¨ TASK 1.2 - EFFICIENTNET - FROZEN BACKBONE")
print("="*80)

# 1. Cargar modelo pre-entrenado
effnet_frozen = build_model("efficientnet", num_classes=3, pretrained=False).to(DEVICE)
effnet_frozen.load_state_dict(torch.load(
    f"{BASE_DIR}/pretrained_backbone/ckpt_efficientnet_ep50.pt",
    map_location=DEVICE
))

print("‚úÖ Modelo pre-entrenado cargado")

# 2. CONGELAR todas las capas del backbone
for name, param in effnet_frozen.named_parameters():
    if 'classifier' not in name:  # Congelar todo excepto classifier
        param.requires_grad = False
    else:
        param.requires_grad = True

# Verificar
trainable_params = sum(p.numel() for p in effnet_frozen.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in effnet_frozen.parameters())

print(f"\nüìä Par√°metros del modelo:")
print(f"   Total:      {total_params:,}")
print(f"   Trainable:  {trainable_params:,} ({100*trainable_params/total_params:.2f}%)")
print(f"   Frozen:     {total_params - trainable_params:,}")

# 3. Configurar entrenamiento
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, effnet_frozen.parameters()),
                       lr=1e-3)

# 4. Entrenar
effnet_frozen_trained, train_losses_eff, val_losses_eff = train_model(
    model=effnet_frozen,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=NUM_EPOCHS,
    device=DEVICE,
    model_name="EfficientNet - Frozen Backbone"
)


üî¨ TASK 1.2 - EFFICIENTNET - FROZEN BACKBONE
‚úÖ Modelo pre-entrenado cargado

üìä Par√°metros del modelo:
   Total:      4,011,391
   Trainable:  3,843 (0.10%)
   Frozen:     4,007,548

 Iniciando entrenamiento de EfficientNet - Frozen Backbone
   √âpocas: 20
   Device: cuda
----------------------------------------------------------------------




Epoch  1/20 | Train Loss: 1.0481 | Val Loss: 0.7278  Best!
Epoch  2/20 | Train Loss: 0.4829 | Val Loss: 0.5484  Best!
Epoch  3/20 | Train Loss: 0.4373 | Val Loss: 0.4983  Best!
Epoch  4/20 | Train Loss: 0.4040 | Val Loss: 0.4753  Best!
Epoch  5/20 | Train Loss: 0.3943 | Val Loss: 0.4991
Epoch  6/20 | Train Loss: 0.3958 | Val Loss: 0.4643  Best!
Epoch  7/20 | Train Loss: 0.3816 | Val Loss: 0.4570  Best!
Epoch  8/20 | Train Loss: 0.3920 | Val Loss: 0.4430  Best!
Epoch  9/20 | Train Loss: 0.3616 | Val Loss: 0.4709
Epoch 10/20 | Train Loss: 0.3569 | Val Loss: 0.4423  Best!
Epoch 11/20 | Train Loss: 0.3713 | Val Loss: 0.4614
Epoch 12/20 | Train Loss: 0.3523 | Val Loss: 0.4479
Epoch 13/20 | Train Loss: 0.3699 | Val Loss: 0.4305  Best!
Epoch 14/20 | Train Loss: 0.3590 | Val Loss: 0.4510
Epoch 15/20 | Train Loss: 0.3534 | Val Loss: 0.4608
Epoch 16/20 | Train Loss: 0.3523 | Val Loss: 0.4507
Epoch 17/20 | Train Loss: 0.3411 | Val Loss: 0.4343
Epoch 18/20 | Train Loss: 0.3355 | Val Loss: 0.4320
E

In [22]:
# ========================
# Evaluar EfficientNet - Frozen Backbone
# ========================

print("\n EVALUANDO EFFICIENTNET - FROZEN BACKBONE")

effnet_frozen_results = evaluate_model(
    effnet_frozen_trained,
    offsite_test_loader,
    DEVICE,
    "EfficientNet - Frozen Backbone - Offsite Test"
)


 EVALUANDO EFFICIENTNET - FROZEN BACKBONE

 RESULTADOS - EfficientNet - Frozen Backbone - Offsite Test

DR:
  Precision: 0.8561
  Recall:    0.8500
  F-score:   0.8530

G:
  Precision: 0.7353
  Recall:    0.5102
  F-score:   0.6024

AMD:
  Precision: 0.7647
  Recall:    0.5909
  F-score:   0.6667

 AVERAGE F-SCORE: 0.7074



In [23]:
# ============================================
# Comparaci√≥n Task 1.1 vs 1.2
# ============================================

print("\n" + "="*80)
print("üìä COMPARACI√ìN TASK 1.1 vs TASK 1.2")
print("="*80)

comparison_data = {
    'ResNet18': {
        'No Fine-tuning': task1_1_results['resnet18']['average_f1'] * 100,
        'Frozen Backbone': resnet_frozen_results['average_f1'] * 100
    },
    'EfficientNet': {
        'No Fine-tuning': task1_1_results['efficientnet']['average_f1'] * 100,
        'Frozen Backbone': effnet_frozen_results['average_f1'] * 100
    }
}

print(f"\n{'Modelo':<15} {'Task 1.1':<15} {'Task 1.2':<15} {'Mejora':<15}")
print("-" * 60)

for model_name, scores in comparison_data.items():
    task11 = scores['No Fine-tuning']
    task12 = scores['Frozen Backbone']
    improvement = task12 - task11

    print(f"{model_name:<15} {task11:>6.2f}%        {task12:>6.2f}%        "
          f"{'+' if improvement > 0 else ''}{improvement:>5.2f}%")

print("\nüéØ Referencias Task 1.2 (onsite test):")
print("   ResNet18:     61.4%")
print("   EfficientNet: 73.5%")
print("="*80)


üìä COMPARACI√ìN TASK 1.1 vs TASK 1.2

Modelo          Task 1.1        Task 1.2        Mejora         
------------------------------------------------------------
ResNet18         51.26%         45.31%        -5.95%
EfficientNet     55.41%         70.74%        +15.33%

üéØ Referencias Task 1.2 (onsite test):
   ResNet18:     61.4%
   EfficientNet: 73.5%


# TASK 1.3

In [24]:
# ============================================
# ============================================
# TASK 1.3: FULL FINE-TUNING
# ============================================
# ============================================

print("\n" + "="*80)
print(" INICIANDO TASK 1.3: FULL FINE-TUNING")
print("   Entrenando TODO el modelo (backbone + classifier)")
print("="*80 + "\n")


 INICIANDO TASK 1.3: FULL FINE-TUNING
   Entrenando TODO el modelo (backbone + classifier)



In [25]:
# ============================================
# TASK 1.3 - ResNet18 - Full Fine-tuning
# ============================================

print("\n" + "="*80)
print("üî¨ TASK 1.3 - RESNET18 - FULL FINE-TUNING")
print("="*80)

# 1. Cargar modelo pre-entrenado (empezamos desde los pesos originales)
resnet_full = build_model("resnet18", num_classes=3, pretrained=False).to(DEVICE)
resnet_full.load_state_dict(torch.load(
    f"{BASE_DIR}/pretrained_backbone/ckpt_resnet18_ep50.pt",
    map_location=DEVICE
))

print("‚úÖ Modelo pre-entrenado cargado")

# 2. DESCONGELAR TODO - entrenar todas las capas
for param in resnet_full.parameters():
    param.requires_grad = True

# Verificar
trainable_params = sum(p.numel() for p in resnet_full.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in resnet_full.parameters())

print(f"\nüìä Par√°metros del modelo:")
print(f"   Total:      {total_params:,}")
print(f"   Trainable:  {trainable_params:,} ({100*trainable_params/total_params:.2f}%)")

# 3. Configurar entrenamiento con LR M√ÅS BAJO
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(resnet_full.parameters(), lr=1e-4)  # ‚Üê LR m√°s bajo

# 4. Entrenar
NUM_EPOCHS = 20

resnet_full_trained, train_losses_full, val_losses_full = train_model(
    model=resnet_full,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=NUM_EPOCHS,
    device=DEVICE,
    model_name="ResNet18 - Full Fine-tuning"
)


üî¨ TASK 1.3 - RESNET18 - FULL FINE-TUNING




‚úÖ Modelo pre-entrenado cargado

üìä Par√°metros del modelo:
   Total:      11,178,051
   Trainable:  11,178,051 (100.00%)

 Iniciando entrenamiento de ResNet18 - Full Fine-tuning
   √âpocas: 20
   Device: cuda
----------------------------------------------------------------------
Epoch  1/20 | Train Loss: 0.7860 | Val Loss: 0.6476  Best!
Epoch  2/20 | Train Loss: 0.3865 | Val Loss: 0.4457  Best!
Epoch  3/20 | Train Loss: 0.2939 | Val Loss: 0.4134  Best!
Epoch  4/20 | Train Loss: 0.2496 | Val Loss: 0.4107  Best!
Epoch  5/20 | Train Loss: 0.2031 | Val Loss: 0.3946  Best!
Epoch  6/20 | Train Loss: 0.1747 | Val Loss: 0.4039
Epoch  7/20 | Train Loss: 0.1346 | Val Loss: 0.4216
Epoch  8/20 | Train Loss: 0.1170 | Val Loss: 0.4469
Epoch  9/20 | Train Loss: 0.1039 | Val Loss: 0.4404
Epoch 10/20 | Train Loss: 0.0777 | Val Loss: 0.4052
Epoch 11/20 | Train Loss: 0.0679 | Val Loss: 0.5182
Epoch 12/20 | Train Loss: 0.0685 | Val Loss: 0.4444
Epoch 13/20 | Train Loss: 0.0607 | Val Loss: 0.5565
Epoch

In [26]:
# ========================
# Evaluar ResNet18 - Full Fine-tuning
# ========================

print("\nüìä EVALUANDO RESNET18 - FULL FINE-TUNING")

resnet_full_results = evaluate_model(
    resnet_full_trained,
    offsite_test_loader,
    DEVICE,
    "ResNet18 - Full Fine-tuning - Offsite Test"
)


üìä EVALUANDO RESNET18 - FULL FINE-TUNING

 RESULTADOS - ResNet18 - Full Fine-tuning - Offsite Test

DR:
  Precision: 0.8828
  Recall:    0.9143
  F-score:   0.8982

G:
  Precision: 0.8750
  Recall:    0.7143
  F-score:   0.7865

AMD:
  Precision: 0.7368
  Recall:    0.6364
  F-score:   0.6829

 AVERAGE F-SCORE: 0.7892



In [27]:
# ============================================
# TASK 1.3 - EfficientNet - Full Fine-tuning
# ============================================

print("\n" + "="*80)
print("üî¨ TASK 1.3 - EFFICIENTNET - FULL FINE-TUNING")
print("="*80)

# 1. Cargar modelo pre-entrenado
effnet_full = build_model("efficientnet", num_classes=3, pretrained=False).to(DEVICE)
effnet_full.load_state_dict(torch.load(
    f"{BASE_DIR}/pretrained_backbone/ckpt_efficientnet_ep50.pt",
    map_location=DEVICE
))

print("‚úÖ Modelo pre-entrenado cargado")

# 2. DESCONGELAR TODO
for param in effnet_full.parameters():
    param.requires_grad = True

# Verificar
trainable_params = sum(p.numel() for p in effnet_full.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in effnet_full.parameters())

print(f"\nüìä Par√°metros del modelo:")
print(f"   Total:      {total_params:,}")
print(f"   Trainable:  {trainable_params:,} ({100*trainable_params/total_params:.2f}%)")

# 3. Configurar entrenamiento con LR BAJO
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(effnet_full.parameters(), lr=1e-4)  # ‚Üê LR m√°s bajo

# 4. Entrenar
effnet_full_trained, train_losses_eff_full, val_losses_eff_full = train_model(
    model=effnet_full,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=NUM_EPOCHS,
    device=DEVICE,
    model_name="EfficientNet - Full Fine-tuning"
)


üî¨ TASK 1.3 - EFFICIENTNET - FULL FINE-TUNING




‚úÖ Modelo pre-entrenado cargado

üìä Par√°metros del modelo:
   Total:      4,011,391
   Trainable:  4,011,391 (100.00%)

 Iniciando entrenamiento de EfficientNet - Full Fine-tuning
   √âpocas: 20
   Device: cuda
----------------------------------------------------------------------
Epoch  1/20 | Train Loss: 0.9968 | Val Loss: 0.7834  Best!
Epoch  2/20 | Train Loss: 0.4794 | Val Loss: 0.6220  Best!
Epoch  3/20 | Train Loss: 0.3731 | Val Loss: 0.5435  Best!
Epoch  4/20 | Train Loss: 0.2751 | Val Loss: 0.5004  Best!
Epoch  5/20 | Train Loss: 0.2414 | Val Loss: 0.5002  Best!
Epoch  6/20 | Train Loss: 0.2182 | Val Loss: 0.4929  Best!
Epoch  7/20 | Train Loss: 0.1981 | Val Loss: 0.4792  Best!
Epoch  8/20 | Train Loss: 0.1613 | Val Loss: 0.5240
Epoch  9/20 | Train Loss: 0.1614 | Val Loss: 0.5086
Epoch 10/20 | Train Loss: 0.1356 | Val Loss: 0.5179
Epoch 11/20 | Train Loss: 0.1181 | Val Loss: 0.5111
Epoch 12/20 | Train Loss: 0.1120 | Val Loss: 0.5328
Epoch 13/20 | Train Loss: 0.1028 | Val Lo

In [28]:
# ========================
# Evaluar EfficientNet - Full Fine-tuning
# ========================

print("\nüìä EVALUANDO EFFICIENTNET - FULL FINE-TUNING")

effnet_full_results = evaluate_model(
    effnet_full_trained,
    offsite_test_loader,
    DEVICE,
    "EfficientNet - Full Fine-tuning - Offsite Test"
)


üìä EVALUANDO EFFICIENTNET - FULL FINE-TUNING

 RESULTADOS - EfficientNet - Full Fine-tuning - Offsite Test

DR:
  Precision: 0.8519
  Recall:    0.8214
  F-score:   0.8364

G:
  Precision: 0.7273
  Recall:    0.6531
  F-score:   0.6882

AMD:
  Precision: 0.4688
  Recall:    0.6818
  F-score:   0.5556

 AVERAGE F-SCORE: 0.6934



In [29]:
# ============================================
# COMPARACI√ìN COMPLETA: TASK 1.1, 1.2, 1.3
# ============================================

print("\n" + "="*80)
print("üìä RESUMEN COMPLETO - TASK 1: TRANSFER LEARNING")
print("="*80)

# Tabla de resultados
print(f"\n{'Modelo':<15} {'Task 1.1':<12} {'Task 1.2':<12} {'Task 1.3':<12} {'Mejor':<10}")
print("-" * 70)

# ResNet18
resnet_scores = [
    task1_1_results['resnet18']['average_f1'] * 100,
    resnet_frozen_results['average_f1'] * 100,
    resnet_full_results['average_f1'] * 100
]
best_resnet = max(resnet_scores)
print(f"{'ResNet18':<15} {resnet_scores[0]:>6.2f}%     {resnet_scores[1]:>6.2f}%     "
      f"{resnet_scores[2]:>6.2f}%     {best_resnet:>6.2f}%")

# EfficientNet
effnet_scores = [
    task1_1_results['efficientnet']['average_f1'] * 100,
    effnet_frozen_results['average_f1'] * 100,
    effnet_full_results['average_f1'] * 100
]
best_effnet = max(effnet_scores)
print(f"{'EfficientNet':<15} {effnet_scores[0]:>6.2f}%     {effnet_scores[1]:>6.2f}%     "
      f"{effnet_scores[2]:>6.2f}%     {best_effnet:>6.2f}%")

print("\n" + "-" * 70)
print("üéØ REFERENCIAS (onsite test):")
print(f"{'Modelo':<15} {'Task 1.1':<12} {'Task 1.2':<12} {'Task 1.3':<12}")
print("-" * 70)
print(f"{'ResNet18':<15} {'56.7%':<12} {'61.4%':<12} {'78.8%':<12}")
print(f"{'EfficientNet':<15} {'60.4%':<12} {'73.5%':<12} {'80.4%':<12}")

print("\nüí° Nota: Estos son resultados en offsite test.")
print("   Los resultados onsite se obtendr√°n al submitir a Kaggle.")
print("="*80)

# Guardar mejores modelos
task1_best_models = {
    'resnet18': resnet_full_trained if resnet_scores[2] == best_resnet else
                (resnet_frozen_trained if resnet_scores[1] == best_resnet else resnet_model),
    'efficientnet': effnet_full_trained if effnet_scores[2] == best_effnet else
                    (effnet_frozen_trained if effnet_scores[1] == best_effnet else effnet_model)
}

print("\n‚úÖ Mejores modelos guardados en memoria para predicciones onsite")


üìä RESUMEN COMPLETO - TASK 1: TRANSFER LEARNING

Modelo          Task 1.1     Task 1.2     Task 1.3     Mejor     
----------------------------------------------------------------------
ResNet18         51.26%      45.31%      78.92%      78.92%
EfficientNet     55.41%      70.74%      69.34%      70.74%

----------------------------------------------------------------------
üéØ REFERENCIAS (onsite test):
Modelo          Task 1.1     Task 1.2     Task 1.3    
----------------------------------------------------------------------
ResNet18        56.7%        61.4%        78.8%       
EfficientNet    60.4%        73.5%        80.4%       

üí° Nota: Estos son resultados en offsite test.
   Los resultados onsite se obtendr√°n al submitir a Kaggle.

‚úÖ Mejores modelos guardados en memoria para predicciones onsite


In [30]:
# ============================================
# GUARDAR MODELOS ENTRENADOS
# ============================================

print("\n" + "="*80)
print("üíæ GUARDANDO MODELOS ENTRENADOS")
print("="*80 + "\n")

import os

# Crear directorio para modelos
os.makedirs('./trained_models', exist_ok=True)

# Guardar todos los modelos de Task 1
models_to_save = {
    # Task 1.1 (ya los ten√≠amos pre-entrenados, no los guardamos de nuevo)

    # Task 1.2
    'resnet18_task1_2_frozen.pt': resnet_frozen_trained,
    'efficientnet_task1_2_frozen.pt': effnet_frozen_trained,

    # Task 1.3
    'resnet18_task1_3_full.pt': resnet_full_trained,
    'efficientnet_task1_3_full.pt': effnet_full_trained,
}

for filename, model in models_to_save.items():
    filepath = f'./trained_models/{filename}'
    torch.save(model.state_dict(), filepath)
    print(f"‚úÖ Guardado: {filename}")

print(f"\nüíæ Total: {len(models_to_save)} modelos guardados en ./trained_models/")
print("="*80)


üíæ GUARDANDO MODELOS ENTRENADOS

‚úÖ Guardado: resnet18_task1_2_frozen.pt
‚úÖ Guardado: efficientnet_task1_2_frozen.pt
‚úÖ Guardado: resnet18_task1_3_full.pt
‚úÖ Guardado: efficientnet_task1_3_full.pt

üíæ Total: 4 modelos guardados en ./trained_models/


In [35]:
# ============================================
# GENERAR PREDICCIONES ONSITE TEST
# ============================================

print("\n" + "="*80)
print("üì§ GENERANDO PREDICCIONES PARA ONSITE TEST (KAGGLE)")
print("="*80 + "\n")

# Configurar paths
onsite_test_csv = f"{BASE_DIR}/onsite_test_submission.csv"
onsite_test_dir = f"{BASE_DIR}/images/onsite_test"

# Cargar template
submission_template = pd.read_csv(onsite_test_csv)
print(f"üìã Template cargado: {len(submission_template)} im√°genes")
print(f"Columnas: {list(submission_template.columns)}")
print(f"\nPrimeras filas del template:")
print(submission_template.head())

# Crear dataset para onsite (SIN labels)
class OnsiteTestDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_name = row['id']
        img_path = os.path.join(self.image_dir, img_name)
        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img, img_name

# Transform (sin augmentation para test)
test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                       std=[0.229, 0.224, 0.225]),
])

# Crear dataset y loader
onsite_ds = OnsiteTestDataset(onsite_test_csv, onsite_test_dir, test_transform)
onsite_loader = DataLoader(onsite_ds, batch_size=32, shuffle=False, num_workers=2)

print(f"\n‚úÖ Onsite dataset cargado: {len(onsite_ds)} im√°genes")


üì§ GENERANDO PREDICCIONES PARA ONSITE TEST (KAGGLE)

üìã Template cargado: 250 im√°genes
Columnas: ['id', 'D', 'G', 'A']

Primeras filas del template:
               id  D  G  A
0  4595_right.jpg  0  0  0
1   4155_left.jpg  0  0  0
2    597_left.jpg  0  0  0
3  4268_right.jpg  0  0  0
4   579_right.jpg  0  0  0

‚úÖ Onsite dataset cargado: 250 im√°genes


In [32]:
# ============================================
# Funci√≥n para generar CSV de predicciones
# ============================================

def generate_predictions(model, dataloader, device, output_filename):
    """
    Genera predicciones y guarda CSV para Kaggle.
    """
    model.eval()
    predictions = []
    image_names = []

    print(f"\nüîÆ Generando predicciones con {output_filename}...")

    with torch.no_grad():
        for imgs, names in dataloader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            probs = torch.sigmoid(outputs).cpu().numpy()
            preds = (probs > 0.5).astype(int)

            predictions.extend(preds)
            image_names.extend(names)

    # Crear DataFrame
    df = pd.DataFrame(predictions, columns=['D', 'G', 'A'])
    df.insert(0, 'id', image_names)

    # Guardar CSV
    output_path = f'./results/{output_filename}'
    os.makedirs('./results', exist_ok=True)
    df.to_csv(output_path, index=False)

    print(f"‚úÖ Predicciones guardadas en: {output_path}")
    print(f"   Total predicciones: {len(df)}")
    print(f"\nüìä Distribuci√≥n de predicciones:")
    print(f"   DR (D):  {df['D'].sum()} positivos")
    print(f"   G:       {df['G'].sum()} positivos")
    print(f"   AMD (A): {df['A'].sum()} positivos")

    return df

print(" Funci√≥n de predicciones definida!")

 Funci√≥n de predicciones definida!


In [33]:
# ============================================
# GENERAR PREDICCIONES PARA TODOS LOS MODELOS
# ============================================

print("\n" + "="*80)
print("üöÄ GENERANDO TODAS LAS PREDICCIONES ONSITE")
print("="*80)

# Diccionario de modelos a evaluar
models_for_prediction = {
    # Task 1.1
    'task1_1_resnet18.csv': resnet_model,
    'task1_1_efficientnet.csv': effnet_model,

    # Task 1.2
    'task1_2_resnet18.csv': resnet_frozen_trained,
    'task1_2_efficientnet.csv': effnet_frozen_trained,

    # Task 1.3
    'task1_3_resnet18.csv': resnet_full_trained,
    'task1_3_efficientnet.csv': effnet_full_trained,
}

# Generar predicciones
predictions_summary = {}

for filename, model in models_for_prediction.items():
    df = generate_predictions(model, onsite_loader, DEVICE, filename)
    predictions_summary[filename] = df

print("\n" + "="*80)
print("‚úÖ TODAS LAS PREDICCIONES GENERADAS")
print("="*80)
print(f"\nüìÅ Archivos creados en ./results/:")
for filename in models_for_prediction.keys():
    print(f"   ‚úÖ {filename}")

print("\nüí° Pr√≥ximo paso: Subir estos CSVs a Kaggle para obtener los scores reales")
print("="*80)


üöÄ GENERANDO TODAS LAS PREDICCIONES ONSITE

üîÆ Generando predicciones con task1_1_resnet18.csv...
‚úÖ Predicciones guardadas en: ./results/task1_1_resnet18.csv
   Total predicciones: 250

üìä Distribuci√≥n de predicciones:
   DR (D):  124 positivos
   G:       35 positivos
   AMD (A): 56 positivos

üîÆ Generando predicciones con task1_1_efficientnet.csv...
‚úÖ Predicciones guardadas en: ./results/task1_1_efficientnet.csv
   Total predicciones: 250

üìä Distribuci√≥n de predicciones:
   DR (D):  149 positivos
   G:       57 positivos
   AMD (A): 72 positivos

üîÆ Generando predicciones con task1_2_resnet18.csv...
‚úÖ Predicciones guardadas en: ./results/task1_2_resnet18.csv
   Total predicciones: 250

üìä Distribuci√≥n de predicciones:
   DR (D):  221 positivos
   G:       8 positivos
   AMD (A): 2 positivos

üîÆ Generando predicciones con task1_2_efficientnet.csv...
‚úÖ Predicciones guardadas en: ./results/task1_2_efficientnet.csv
   Total predicciones: 250

üìä Distribuci√

In [34]:
# ============================================
# DESCARGAR CSVs PARA KAGGLE
# ============================================

from google.colab import files

print("üì• DESCARGANDO ARCHIVOS CSV PARA KAGGLE SUBMISSION")
print("="*70 + "\n")

# Solo descargamos los mejores modelos (Task 1.3) + Task 1.1 y 1.2 para completar
csvs_to_download = [
    # Task 1.1 (baseline)
    './results/task1_1_resnet18.csv',
    './results/task1_1_efficientnet.csv',

    # Task 1.2 (frozen backbone)
    './results/task1_2_resnet18.csv',
    './results/task1_2_efficientnet.csv',

    # Task 1.3 (full fine-tuning) - LOS MEJORES
    './results/task1_3_resnet18.csv',
    './results/task1_3_efficientnet.csv',
]

for csv_path in csvs_to_download:
    print(f"‚¨áÔ∏è  Descargando: {csv_path}")
    files.download(csv_path)

print("\n‚úÖ Archivos descargados a tu computadora!")
print("üí° Busca los archivos en tu carpeta de Descargas")

üì• DESCARGANDO ARCHIVOS CSV PARA KAGGLE SUBMISSION

‚¨áÔ∏è  Descargando: ./results/task1_1_resnet18.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚¨áÔ∏è  Descargando: ./results/task1_1_efficientnet.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚¨áÔ∏è  Descargando: ./results/task1_2_resnet18.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚¨áÔ∏è  Descargando: ./results/task1_2_efficientnet.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚¨áÔ∏è  Descargando: ./results/task1_3_resnet18.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚¨áÔ∏è  Descargando: ./results/task1_3_efficientnet.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


‚úÖ Archivos descargados a tu computadora!
üí° Busca los archivos en tu carpeta de Descargas


# TASK 2

In [106]:
# ============================================
# ============================================
# TASK 2: LOSS FUNCTIONS
# ============================================
# ============================================

print("\n" + "="*80)
print(" INICIANDO TASK 2: LOSS FUNCTIONS")
print("   Objetivo: Abordar el desbalanceo de clases")
print("="*80 + "\n")

# Verificar distribuci√≥n de clases
train_df = pd.read_csv(f"{BASE_DIR}/train.csv")

class_counts = train_df[['D', 'G', 'A']].sum()
print(" DISTRIBUCI√ìN DE CLASES EN TRAINING SET:")
print("="*60)
print(f"DR (D):   {class_counts['D']:3d} casos ({100*class_counts['D']/len(train_df):.1f}%)")
print(f"G:        {class_counts['G']:3d} casos ({100*class_counts['G']/len(train_df):.1f}%)")
print(f"AMD (A):  {class_counts['A']:3d} casos ({100*class_counts['A']/len(train_df):.1f}%)")
print("="*60)
print(f"\n  Problema: DR tiene {class_counts['D']/class_counts['A']:.1f}x m√°s casos que AMD")
print("   El modelo puede ignorar las clases minoritarias\n")


 INICIANDO TASK 2: LOSS FUNCTIONS
   Objetivo: Abordar el desbalanceo de clases

 DISTRIBUCI√ìN DE CLASES EN TRAINING SET:
DR (D):   517 casos (64.6%)
G:        163 casos (20.4%)
AMD (A):  142 casos (17.8%)

  Problema: DR tiene 3.6x m√°s casos que AMD
   El modelo puede ignorar las clases minoritarias



In [108]:
# ============================================
# TASK 2.1: FOCAL LOSS
# ============================================

class FocalLoss(nn.Module):
    """
    Focal Loss para multi-label classification.

    FL(pt) = -Œ± * (1 - pt)^Œ≥ * log(pt)

    Args:
        alpha: Peso para balancear clases (default: 0.25)
        gamma: Factor de enfoque en ejemplos dif√≠ciles (default: 2.0)
    """
    def __init__(self, alpha=0.25, gamma=2.0):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        # Calcular probabilidades
        BCE_loss = nn.functional.binary_cross_entropy_with_logits(
            inputs, targets, reduction='none'
        )

        # Calcular pt (probabilidad del target)
        pt = torch.exp(-BCE_loss)

        # Aplicar focal loss
        focal_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss

        return focal_loss.mean()

print(" Focal Loss implementado!")
print(f"   Alpha: 0.25")
print(f"   Gamma: 2.0")
print(f"\n Focal Loss penaliza m√°s los ejemplos dif√≠ciles de clasificar")

 Focal Loss implementado!
   Alpha: 0.25
   Gamma: 2.0

 Focal Loss penaliza m√°s los ejemplos dif√≠ciles de clasificar


In [109]:
# ============================================
# TASK 2.2: CLASS-BALANCED LOSS
# ============================================

class ClassBalancedLoss(nn.Module):
    """
    Class-Balanced Loss para multi-label classification.
    Re-pondera BCE Loss seg√∫n la frecuencia de cada clase.

    Args:
        class_counts: Tensor con el n√∫mero de samples positivos por clase
    """
    def __init__(self, class_counts):
        super(ClassBalancedLoss, self).__init__()

        # Calcular pesos inversamente proporcionales a frecuencia
        total_samples = class_counts.sum()
        self.weights = total_samples / (len(class_counts) * class_counts)

        # Normalizar pesos
        self.weights = self.weights / self.weights.sum() * len(class_counts)

        print(f" Pesos calculados para Class-Balanced Loss:")
        disease_names = ['DR', 'G', 'AMD']
        for i, name in enumerate(disease_names):
            print(f"   {name}: {self.weights[i]:.4f} (freq: {class_counts[i]})")

    def forward(self, inputs, targets):
        # BCE loss sin reducci√≥n
        BCE_loss = nn.functional.binary_cross_entropy_with_logits(
            inputs, targets, reduction='none'
        )

        # Aplicar pesos por clase
        weights = self.weights.to(inputs.device)
        weighted_loss = BCE_loss * weights

        return weighted_loss.mean()

# Calcular class counts del training set
class_counts = torch.tensor([
    train_df['D'].sum(),
    train_df['G'].sum(),
    train_df['A'].sum()
], dtype=torch.float32)

print("\n Class-Balanced Loss implementado!")
print(f" Las clases minoritarias (G, AMD) tendr√°n m√°s peso en el loss")


 Class-Balanced Loss implementado!
 Las clases minoritarias (G, AMD) tendr√°n m√°s peso en el loss


In [110]:
# ============================================
# Seleccionar modelo base para Task 2
# ============================================

print("\n" + "="*80)
print(" SELECCI√ìN DE MODELO PARA TASK 2")
print("="*80)

print("\nResultados Task 1.3 (onsite):")
print(f"  ResNet18:     82.29% ")
print(f"  EfficientNet: 80.24%")

print("\n Decisi√≥n: Usaremos ResNet18 para Task 2")
print("   (Mejor performance en Task 1.3)")

SELECTED_MODEL = "resnet18"
print(f"\n Modelo seleccionado: {SELECTED_MODEL}")
print("="*80)


 SELECCI√ìN DE MODELO PARA TASK 2

Resultados Task 1.3 (onsite):
  ResNet18:     82.29% 
  EfficientNet: 80.24%

 Decisi√≥n: Usaremos ResNet18 para Task 2
   (Mejor performance en Task 1.3)

 Modelo seleccionado: resnet18


In [111]:
# ============================================
# TASK 2.1 - Entrenar ResNet18 con Focal Loss
# ============================================

print("\n" + "="*80)
print(" TASK 2.1 - RESNET18 CON FOCAL LOSS")
print("="*80)

# 1. Cargar modelo pre-entrenado
resnet_focal = build_model("resnet18", num_classes=3, pretrained=False).to(DEVICE)
resnet_focal.load_state_dict(torch.load(
    f"{BASE_DIR}/pretrained_backbone/ckpt_resnet18_ep50.pt",
    map_location=DEVICE
))

print(" Modelo pre-entrenado cargado")

# 2. Descongelar todo (full fine-tuning)
for param in resnet_focal.parameters():
    param.requires_grad = True

print(" Todas las capas desbloqueadas (full fine-tuning)")

# 3. Configurar entrenamiento con FOCAL LOSS
criterion = FocalLoss(alpha=0.25, gamma=2.0)
optimizer = optim.Adam(resnet_focal.parameters(), lr=1e-4)

print(f"\n Loss Function: Focal Loss (alpha=0.25, gamma=2.0)")
print(f" Optimizer: Adam (lr=1e-4)")

# 4. Entrenar
NUM_EPOCHS = 20

resnet_focal_trained, train_losses_focal, val_losses_focal = train_model(
    model=resnet_focal,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=NUM_EPOCHS,
    device=DEVICE,
    model_name="ResNet18 - Focal Loss"
)


 TASK 2.1 - RESNET18 CON FOCAL LOSS




 Modelo pre-entrenado cargado
 Todas las capas desbloqueadas (full fine-tuning)

 Loss Function: Focal Loss (alpha=0.25, gamma=2.0)
 Optimizer: Adam (lr=1e-4)

 Iniciando entrenamiento de ResNet18 - Focal Loss
   √âpocas: 20
   Device: cuda
----------------------------------------------------------------------
Epoch  1/20 | Train Loss: 0.1566 | Val Loss: 0.1100  Best!
Epoch  2/20 | Train Loss: 0.0459 | Val Loss: 0.0502  Best!
Epoch  3/20 | Train Loss: 0.0333 | Val Loss: 0.0423  Best!
Epoch  4/20 | Train Loss: 0.0220 | Val Loss: 0.0390  Best!
Epoch  5/20 | Train Loss: 0.0184 | Val Loss: 0.0377  Best!
Epoch  6/20 | Train Loss: 0.0171 | Val Loss: 0.0359  Best!
Epoch  7/20 | Train Loss: 0.0148 | Val Loss: 0.0368
Epoch  8/20 | Train Loss: 0.0126 | Val Loss: 0.0345  Best!
Epoch  9/20 | Train Loss: 0.0115 | Val Loss: 0.0328  Best!
Epoch 10/20 | Train Loss: 0.0090 | Val Loss: 0.0379
Epoch 11/20 | Train Loss: 0.0089 | Val Loss: 0.0412
Epoch 12/20 | Train Loss: 0.0073 | Val Loss: 0.0412
Epoch 13

In [113]:
# ========================
# Evaluar ResNet18 con Focal Loss
# ========================

print("\n EVALUANDO RESNET18 CON FOCAL LOSS")

resnet_focal_results = evaluate_model(
    resnet_focal_trained,
    offsite_test_loader,
    DEVICE,
    "ResNet18 - Focal Loss - Offsite Test"
)

# Guardar modelo
torch.save(resnet_focal_trained.state_dict(),
           './trained_models/resnet18_task2_1_focal.pt')
print("\n Modelo guardado: resnet18_task2_1_focal.pt")


 EVALUANDO RESNET18 CON FOCAL LOSS

 RESULTADOS - ResNet18 - Focal Loss - Offsite Test

DR:
  Precision: 0.8857
  Recall:    0.8857
  F-score:   0.8857

G:
  Precision: 0.8421
  Recall:    0.6531
  F-score:   0.7356

AMD:
  Precision: 0.5667
  Recall:    0.7727
  F-score:   0.6538

 AVERAGE F-SCORE: 0.7584


 Modelo guardado: resnet18_task2_1_focal.pt


In [114]:
# ============================================
# TASK 2.2 - Entrenar ResNet18 con Class-Balanced Loss
# ============================================

print("\n" + "="*80)
print(" TASK 2.2 - RESNET18 CON CLASS-BALANCED LOSS")
print("="*80)

# 1. Cargar modelo pre-entrenado
resnet_balanced = build_model("resnet18", num_classes=3, pretrained=False).to(DEVICE)
resnet_balanced.load_state_dict(torch.load(
    f"{BASE_DIR}/pretrained_backbone/ckpt_resnet18_ep50.pt",
    map_location=DEVICE
))

print(" Modelo pre-entrenado cargado")

# 2. Descongelar todo
for param in resnet_balanced.parameters():
    param.requires_grad = True

print(" Todas las capas desbloqueadas (full fine-tuning)")

# 3. Configurar entrenamiento con CLASS-BALANCED LOSS
criterion = ClassBalancedLoss(class_counts)  # ‚Üê AQU√ç VER√ÅS LOS PESOS
optimizer = optim.Adam(resnet_balanced.parameters(), lr=1e-4)

print(f"\n Loss Function: Class-Balanced Loss")
print(f" Optimizer: Adam (lr=1e-4)")

# 4. Entrenar
resnet_balanced_trained, train_losses_balanced, val_losses_balanced = train_model(
    model=resnet_balanced,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=NUM_EPOCHS,
    device=DEVICE,
    model_name="ResNet18 - Class-Balanced Loss"
)


 TASK 2.2 - RESNET18 CON CLASS-BALANCED LOSS




 Modelo pre-entrenado cargado
 Todas las capas desbloqueadas (full fine-tuning)
 Pesos calculados para Class-Balanced Loss:
   DR: 0.3840 (freq: 517.0)
   G: 1.2179 (freq: 163.0)
   AMD: 1.3981 (freq: 142.0)

 Loss Function: Class-Balanced Loss
 Optimizer: Adam (lr=1e-4)

 Iniciando entrenamiento de ResNet18 - Class-Balanced Loss
   √âpocas: 20
   Device: cuda
----------------------------------------------------------------------
Epoch  1/20 | Train Loss: 0.6428 | Val Loss: 0.5775  Best!
Epoch  2/20 | Train Loss: 0.3310 | Val Loss: 0.4393  Best!
Epoch  3/20 | Train Loss: 0.2484 | Val Loss: 0.4066  Best!
Epoch  4/20 | Train Loss: 0.2008 | Val Loss: 0.4067
Epoch  5/20 | Train Loss: 0.1671 | Val Loss: 0.3771  Best!
Epoch  6/20 | Train Loss: 0.1471 | Val Loss: 0.3806
Epoch  7/20 | Train Loss: 0.1048 | Val Loss: 0.3933
Epoch  8/20 | Train Loss: 0.0955 | Val Loss: 0.4451
Epoch  9/20 | Train Loss: 0.0744 | Val Loss: 0.4011
Epoch 10/20 | Train Loss: 0.0622 | Val Loss: 0.4104
Epoch 11/20 | Trai

In [115]:
# ========================
# Evaluar ResNet18 con Class-Balanced Loss
# ========================

print("\n EVALUANDO RESNET18 CON CLASS-BALANCED LOSS")

resnet_balanced_results = evaluate_model(
    resnet_balanced_trained,
    offsite_test_loader,
    DEVICE,
    "ResNet18 - Class-Balanced Loss - Offsite Test"
)

# Guardar modelo
torch.save(resnet_balanced_trained.state_dict(),
           './trained_models/resnet18_task2_2_balanced.pt')
print("\n Modelo guardado: resnet18_task2_2_balanced.pt")


 EVALUANDO RESNET18 CON CLASS-BALANCED LOSS

 RESULTADOS - ResNet18 - Class-Balanced Loss - Offsite Test

DR:
  Precision: 0.8323
  Recall:    0.9571
  F-score:   0.8904

G:
  Precision: 0.9032
  Recall:    0.5714
  F-score:   0.7000

AMD:
  Precision: 0.7619
  Recall:    0.7273
  F-score:   0.7442

 AVERAGE F-SCORE: 0.7782


 Modelo guardado: resnet18_task2_2_balanced.pt


In [117]:
# ============================================
# COMPARACI√ìN COMPLETA: TASK 1 vs TASK 2
# ============================================

print("\n" + "="*80)
print("üìä COMPARACI√ìN: TASK 1.3 vs TASK 2 (ResNet18)")
print("="*80)

# Resultados
comparison = {
    'Task 1.3 (BCE Loss)': resnet_full_results['average_f1'] * 100,
    'Task 2.1 (Focal Loss)': resnet_focal_results['average_f1'] * 100,
    'Task 2.2 (Class-Balanced)': resnet_balanced_results['average_f1'] * 100,
}

baseline = comparison['Task 1.3 (BCE Loss)']

print(f"\n{'Loss Function':<25} {'F-score':<12} {'vs Baseline':<15}")
print("-" * 60)

for name, score in comparison.items():
    diff = score - baseline
    status = "‚úÖ" if diff >= 0 else "‚ö†Ô∏è"
    print(f"{name:<25} {score:>6.2f}%     {diff:>+6.2f}%  {status}")

print("\n" + "-" * 60)
print("üéØ REFERENCIAS Task 2 (onsite test):")
print("   Baseline (Task 1.3): 78.8%")
print("   Comparable:          ~78.8% (70% de puntos)")
print("   +0.5% mejor:         >79.3% (85% de puntos)")
print("   +1.0% mejor:         >79.8% (100% de puntos)")

# Determinar mejor modelo
best_task2_name = max(comparison.items(), key=lambda x: x[1])
print(f"\n‚≠ê MEJOR MODELO TASK 2: {best_task2_name[0]} ({best_task2_name[1]:.2f}%)")
print("="*80)

# Guardar el mejor para predicciones onsite
if comparison['Task 2.1 (Focal Loss)'] >= comparison['Task 2.2 (Class-Balanced)']:
    best_task2_model = resnet_focal_trained
    best_task2_name = "focal"
else:
    best_task2_model = resnet_balanced_trained
    best_task2_name = "balanced"

print(f"\n‚úÖ Mejor modelo de Task 2 guardado en memoria para predicciones onsite")


üìä COMPARACI√ìN: TASK 1.3 vs TASK 2 (ResNet18)

Loss Function             F-score      vs Baseline    
------------------------------------------------------------
Task 1.3 (BCE Loss)        78.92%      +0.00%  ‚úÖ
Task 2.1 (Focal Loss)      75.84%      -3.08%  ‚ö†Ô∏è
Task 2.2 (Class-Balanced)  77.82%      -1.10%  ‚ö†Ô∏è

------------------------------------------------------------
üéØ REFERENCIAS Task 2 (onsite test):
   Baseline (Task 1.3): 78.8%
   Comparable:          ~78.8% (70% de puntos)
   +0.5% mejor:         >79.3% (85% de puntos)
   +1.0% mejor:         >79.8% (100% de puntos)

‚≠ê MEJOR MODELO TASK 2: Task 1.3 (BCE Loss) (78.92%)

‚úÖ Mejor modelo de Task 2 guardado en memoria para predicciones onsite


In [118]:
# ============================================
# GENERAR PREDICCIONES TASK 2 PARA KAGGLE
# ============================================

print("\n" + "="*80)
print("üì§ GENERANDO PREDICCIONES TASK 2 PARA ONSITE TEST")
print("="*80 + "\n")

# Ya tenemos el dataset onsite y la funci√≥n generate_predictions

# 1. Predicci√≥n con Focal Loss (gamma=2.0)
print("üîÆ Generando predicciones: Focal Loss (gamma=2.0)...")
pred_focal = generate_predictions(
    resnet_focal_trained,
    onsite_loader,
    DEVICE,
    "task2_1_focal_loss.csv"
)

# 2. Predicci√≥n con Class-Balanced Loss
print("\nüîÆ Generando predicciones: Class-Balanced Loss...")
pred_balanced = generate_predictions(
    resnet_balanced_trained,
    onsite_loader,
    DEVICE,
    "task2_2_class_balanced.csv"
)

print("\n" + "="*80)
print("‚úÖ PREDICCIONES TASK 2 GENERADAS")
print("="*80)

print("\nüìä RESUMEN:")
print(f"   Focal Loss (offsite):       75.84%")
print(f"   Class-Balanced (offsite):   77.82% ‚≠ê MEJOR")
print(f"\nüìÅ Archivos listos para Kaggle:")
print("   ‚úÖ task2_1_focal_loss.csv")
print("   ‚úÖ task2_2_class_balanced.csv")

print("\nüí° RECOMENDACI√ìN:")
print("   Submitir AMBOS a Kaggle para Task 2:")
print("   1. Class-Balanced (esperado: ~77-78%)")
print("   2. Focal Loss (esperado: ~75-76%)")
print("="*80)


üì§ GENERANDO PREDICCIONES TASK 2 PARA ONSITE TEST

üîÆ Generando predicciones: Focal Loss (gamma=2.0)...

üîÆ Generando predicciones con task2_1_focal_loss.csv...
‚úÖ Predicciones guardadas en: ./results/task2_1_focal_loss.csv
   Total predicciones: 250

üìä Distribuci√≥n de predicciones:
   DR (D):  180 positivos
   G:       43 positivos
   AMD (A): 37 positivos

üîÆ Generando predicciones: Class-Balanced Loss...

üîÆ Generando predicciones con task2_2_class_balanced.csv...
‚úÖ Predicciones guardadas en: ./results/task2_2_class_balanced.csv
   Total predicciones: 250

üìä Distribuci√≥n de predicciones:
   DR (D):  204 positivos
   G:       35 positivos
   AMD (A): 22 positivos

‚úÖ PREDICCIONES TASK 2 GENERADAS

üìä RESUMEN:
   Focal Loss (offsite):       75.84%
   Class-Balanced (offsite):   77.82% ‚≠ê MEJOR

üìÅ Archivos listos para Kaggle:
   ‚úÖ task2_1_focal_loss.csv
   ‚úÖ task2_2_class_balanced.csv

üí° RECOMENDACI√ìN:
   Submitir AMBOS a Kaggle para Task 2:
   1. C

In [120]:
# ============================================
# DESCARGAR CSVs TASK 2
# ============================================

from google.colab import files

print(" DESCARGANDO ARCHIVOS CSV - TASK 2\n")

csvs_task2 = [
    './results/task2_1_focal_loss.csv',
    './results/task2_2_class_balanced.csv',
]

for csv_path in csvs_task2:
    print(f"‚¨á  Descargando: {csv_path}")
    files.download(csv_path)

print("\n Archivos descargados!")


 DESCARGANDO ARCHIVOS CSV - TASK 2

‚¨á  Descargando: ./results/task2_1_focal_loss.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚¨á  Descargando: ./results/task2_2_class_balanced.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 Archivos descargados!


In [121]:
# ============================================
# RESULTADOS FINALES TASK 2 - ACTUALIZADOS
# ============================================

print("\n" + "="*80)
print("üìä RESULTADOS FINALES TASK 2")
print("="*80)

task2_final_results = {
    'Focal Loss': {
        'offsite': 75.84,
        'onsite': 79.23  # ‚Üê Actualizado con Kaggle
    },
    'Class-Balanced': {
        'offsite': 77.82,
        'onsite': 79.59  # ‚Üê Actualizado con Kaggle
    }
}

print("\nüìà COMPARACI√ìN OFFSITE vs ONSITE:")
print("-" * 80)
print(f"{'Loss Function':<20} {'Offsite':<12} {'Onsite':<12} {'Diferencia':<12}")
print("-" * 80)

for loss_name, scores in task2_final_results.items():
    diff = scores['onsite'] - scores['offsite']
    print(f"{loss_name:<20} {scores['offsite']:>6.2f}%     {scores['onsite']:>6.2f}%     "
          f"{diff:>+6.2f}%")

print("\n" + "="*80)
print("üéØ COMPARACI√ìN CON REFERENCIAS (ONSITE):")
print("-" * 80)

baseline = 78.8
print(f"{'Loss Function':<20} {'Score':<12} {'vs Baseline':<15} {'Status'}")
print("-" * 80)

for loss_name, scores in task2_final_results.items():
    onsite = scores['onsite']
    diff = onsite - baseline

    if diff >= 1.0:
        status = "üéâ 100% pts"
    elif diff >= 0.5:
        status = "‚≠ê 85% pts"
    elif diff >= 0:
        status = "‚úÖ 70% pts"
    else:
        status = "‚ö†Ô∏è 50% pts"

    print(f"{loss_name:<20} {onsite:>6.2f}%     {diff:>+6.2f}%       {status}")

print("\n" + "="*80)
print("‚≠ê MEJOR MODELO: Class-Balanced Loss (79.59%)")
print(f"   Mejora sobre baseline: +0.79%")
print(f"   Puntos estimados Task 2: ~8/10 puntos")
print("="*80)

print("\n‚úÖ TASK 2 COMPLETADO!")
print("üöÄ Listo para continuar con Task 3 (Attention Mechanisms - 15 puntos)")


üìä RESULTADOS FINALES TASK 2

üìà COMPARACI√ìN OFFSITE vs ONSITE:
--------------------------------------------------------------------------------
Loss Function        Offsite      Onsite       Diferencia  
--------------------------------------------------------------------------------
Focal Loss            75.84%      79.23%      +3.39%
Class-Balanced        77.82%      79.59%      +1.77%

üéØ COMPARACI√ìN CON REFERENCIAS (ONSITE):
--------------------------------------------------------------------------------
Loss Function        Score        vs Baseline     Status
--------------------------------------------------------------------------------
Focal Loss            79.23%      +0.43%       ‚úÖ 70% pts
Class-Balanced        79.59%      +0.79%       ‚≠ê 85% pts

‚≠ê MEJOR MODELO: Class-Balanced Loss (79.59%)
   Mejora sobre baseline: +0.79%
   Puntos estimados Task 2: ~8/10 puntos

‚úÖ TASK 2 COMPLETADO!
üöÄ Listo para continuar con Task 3 (Attention Mechanisms - 15 puntos)


# TASK 3

In [124]:
# ============================================
# ============================================
# TASK 3: ATTENTION MECHANISMS
# ============================================
# ============================================

print("\n" + "="*80)
print(" INICIANDO TASK 3: ATTENTION MECHANISMS")
print("   Objetivo: Mejorar el modelo con mecanismos de atenci√≥n")
print("="*80 + "\n")

print(" Sub-tareas:")
print("   Task 3.1: Squeeze-and-Excitation (SE) - 6 puntos")
print("   Task 3.2: Multi-head Attention (MHA) - 9 puntos")
print("\n Baseline: Task 1.3 ResNet18 - 82.29% (onsite)")
print("="*80)


 INICIANDO TASK 3: ATTENTION MECHANISMS
   Objetivo: Mejorar el modelo con mecanismos de atenci√≥n

 Sub-tareas:
   Task 3.1: Squeeze-and-Excitation (SE) - 6 puntos
   Task 3.2: Multi-head Attention (MHA) - 9 puntos

 Baseline: Task 1.3 ResNet18 - 82.29% (onsite)


In [125]:
# ============================================
# TASK 3.1: SQUEEZE-AND-EXCITATION (SE) MODULE
# ============================================

class SEBlock(nn.Module):
    """
    Squeeze-and-Excitation Block

    Paper: "Squeeze-and-Excitation Networks" (Hu et al., 2018)

    Proceso:
    1. Squeeze: Global Average Pooling ‚Üí vector de tama√±o [C]
    2. Excitation: FC ‚Üí ReLU ‚Üí FC ‚Üí Sigmoid ‚Üí pesos por canal
    3. Scale: Multiplica features por pesos aprendidos

    Args:
        channels: N√∫mero de canales de entrada
        reduction: Factor de reducci√≥n para la capa FC (default: 16)
    """
    def __init__(self, channels, reduction=16):
        super(SEBlock, self).__init__()

        # Squeeze: Global Average Pooling (adaptativo)
        self.avg_pool = nn.AdaptiveAvgPool2d(1)

        # Excitation: 2 capas FC
        self.fc = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channels // reduction, channels, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        # x: [batch, channels, height, width]
        batch, channels, _, _ = x.size()

        # Squeeze: [batch, channels, H, W] ‚Üí [batch, channels, 1, 1]
        y = self.avg_pool(x)

        # [batch, channels, 1, 1] ‚Üí [batch, channels]
        y = y.view(batch, channels)

        # Excitation: [batch, channels] ‚Üí [batch, channels]
        y = self.fc(y)

        # [batch, channels] ‚Üí [batch, channels, 1, 1]
        y = y.view(batch, channels, 1, 1)

        # Scale: multiplicar features originales por pesos
        return x * y.expand_as(x)

print(" SEBlock implementado!")
print("\n Arquitectura:")
print("   Input: [B, C, H, W]")
print("   ‚Üí Global Avg Pool: [B, C, 1, 1]")
print("   ‚Üí FC(C, C//16) ‚Üí ReLU ‚Üí FC(C//16, C) ‚Üí Sigmoid")
print("   ‚Üí Multiply: [B, C, H, W] * [B, C, 1, 1]")
print("   Output: [B, C, H, W] (con atenci√≥n por canal)")

 SEBlock implementado!

 Arquitectura:
   Input: [B, C, H, W]
   ‚Üí Global Avg Pool: [B, C, 1, 1]
   ‚Üí FC(C, C//16) ‚Üí ReLU ‚Üí FC(C//16, C) ‚Üí Sigmoid
   ‚Üí Multiply: [B, C, H, W] * [B, C, 1, 1]
   Output: [B, C, H, W] (con atenci√≥n por canal)


In [126]:
# ============================================
# Construir ResNet18 + SE Blocks
# ============================================

class ResNet18_SE(nn.Module):
    """
    ResNet18 con Squeeze-and-Excitation blocks a√±adidos.

    SE blocks se agregan despu√©s de cada bloque residual.
    """
    def __init__(self, num_classes=3, reduction=16):
        super(ResNet18_SE, self).__init__()

        # Cargar ResNet18 pre-entrenado
        resnet = models.resnet18(pretrained=False)

        # Copiar capas iniciales (conv1, bn1, relu, maxpool)
        self.conv1 = resnet.conv1
        self.bn1 = resnet.bn1
        self.relu = resnet.relu
        self.maxpool = resnet.maxpool

        # Layer 1: [64 channels]
        self.layer1 = resnet.layer1
        self.se1 = SEBlock(64, reduction)

        # Layer 2: [128 channels]
        self.layer2 = resnet.layer2
        self.se2 = SEBlock(128, reduction)

        # Layer 3: [256 channels]
        self.layer3 = resnet.layer3
        self.se3 = SEBlock(256, reduction)

        # Layer 4: [512 channels]
        self.layer4 = resnet.layer4
        self.se4 = SEBlock(512, reduction)

        # Global Average Pooling + Classifier
        self.avgpool = resnet.avgpool
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        # Initial layers
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        # Residual blocks + SE
        x = self.layer1(x)
        x = self.se1(x)  # ‚Üê SE after layer 1

        x = self.layer2(x)
        x = self.se2(x)  # ‚Üê SE after layer 2

        x = self.layer3(x)
        x = self.se3(x)  # ‚Üê SE after layer 3

        x = self.layer4(x)
        x = self.se4(x)  # ‚Üê SE after layer 4

        # Classifier
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

print(" ResNet18_SE implementado!")
print("\n  Arquitectura:")
print("   ResNet18 base")
print("   + SE Block despu√©s de layer1 (64 channels)")
print("   + SE Block despu√©s de layer2 (128 channels)")
print("   + SE Block despu√©s de layer3 (256 channels)")
print("   + SE Block despu√©s de layer4 (512 channels)")

 ResNet18_SE implementado!

  Arquitectura:
   ResNet18 base
   + SE Block despu√©s de layer1 (64 channels)
   + SE Block despu√©s de layer2 (128 channels)
   + SE Block despu√©s de layer3 (256 channels)
   + SE Block despu√©s de layer4 (512 channels)


In [127]:
# ============================================
# TASK 3.1 - Entrenar ResNet18 + SE
# ============================================

print("\n" + "="*80)
print("üî¨ TASK 3.1 - RESNET18 + SQUEEZE-AND-EXCITATION")
print("="*80)

# 1. Crear modelo
resnet_se = ResNet18_SE(num_classes=3, reduction=16).to(DEVICE)

print("\nüèóÔ∏è  Modelo creado: ResNet18 + SE Blocks")

# 2. Cargar pesos pre-entrenados del backbone
# (solo las capas compatibles)
pretrained_dict = torch.load(
    f"{BASE_DIR}/pretrained_backbone/ckpt_resnet18_ep50.pt",
    map_location=DEVICE
)

# Filtrar solo las capas que existen en ambos modelos
model_dict = resnet_se.state_dict()
pretrained_dict_filtered = {
    k: v for k, v in pretrained_dict.items()
    if k in model_dict and model_dict[k].shape == v.shape
}

# Cargar pesos compatibles
model_dict.update(pretrained_dict_filtered)
resnet_se.load_state_dict(model_dict)

print(f"‚úÖ Cargados {len(pretrained_dict_filtered)}/{len(pretrained_dict)} pesos pre-entrenados")
print("üí° SE blocks inicializados aleatoriamente (nuevos)")

# 3. Verificar par√°metros
total_params = sum(p.numel() for p in resnet_se.parameters())
trainable_params = sum(p.numel() for p in resnet_se.parameters() if p.requires_grad)

print(f"\nüìä Par√°metros del modelo:")
print(f"   Total:      {total_params:,}")
print(f"   Trainable:  {trainable_params:,}")

# 4. Configurar entrenamiento
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(resnet_se.parameters(), lr=1e-4)

print(f"\nüéØ Loss: BCE with Logits")
print(f"üéØ Optimizer: Adam (lr=1e-4)")

# 5. Entrenar
NUM_EPOCHS = 20

resnet_se_trained, train_losses_se, val_losses_se = train_model(
    model=resnet_se,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=NUM_EPOCHS,
    device=DEVICE,
    model_name="ResNet18 + SE"
)


üî¨ TASK 3.1 - RESNET18 + SQUEEZE-AND-EXCITATION

üèóÔ∏è  Modelo creado: ResNet18 + SE Blocks




‚úÖ Cargados 122/122 pesos pre-entrenados
üí° SE blocks inicializados aleatoriamente (nuevos)

üìä Par√°metros del modelo:
   Total:      11,221,571
   Trainable:  11,221,571

üéØ Loss: BCE with Logits
üéØ Optimizer: Adam (lr=1e-4)

 Iniciando entrenamiento de ResNet18 + SE
   √âpocas: 20
   Device: cuda
----------------------------------------------------------------------
Epoch  1/20 | Train Loss: 0.5467 | Val Loss: 0.4627  Best!
Epoch  2/20 | Train Loss: 0.3360 | Val Loss: 0.4218  Best!
Epoch  3/20 | Train Loss: 0.2732 | Val Loss: 0.3934  Best!
Epoch  4/20 | Train Loss: 0.2285 | Val Loss: 0.4001
Epoch  5/20 | Train Loss: 0.1887 | Val Loss: 0.4149
Epoch  6/20 | Train Loss: 0.1462 | Val Loss: 0.4290
Epoch  7/20 | Train Loss: 0.1322 | Val Loss: 0.4159
Epoch  8/20 | Train Loss: 0.1024 | Val Loss: 0.3960
Epoch  9/20 | Train Loss: 0.0985 | Val Loss: 0.4590
Epoch 10/20 | Train Loss: 0.0815 | Val Loss: 0.4237
Epoch 11/20 | Train Loss: 0.0665 | Val Loss: 0.4447
Epoch 12/20 | Train Loss: 

In [129]:
# ========================
# Evaluar ResNet18 + SE
# ========================

print("\n EVALUANDO RESNET18 + SE")

resnet_se_results = evaluate_model(
    resnet_se_trained,
    offsite_test_loader,
    DEVICE,
    "ResNet18 + SE - Offsite Test"
)

# Guardar modelo
torch.save(resnet_se_trained.state_dict(),
           './trained_models/resnet18_task3_1_se.pt')
print("\n Modelo guardado: resnet18_task3_1_se.pt")

# Comparar con baseline
print("\n" + "="*80)
print(" COMPARACI√ìN CON BASELINE")
print("="*80)

baseline = resnet_full_results['average_f1'] * 100
se_score = resnet_se_results['average_f1'] * 100

print(f"\nTask 1.3 (ResNet18):    {baseline:.2f}%")
print(f"Task 3.1 (ResNet18+SE): {se_score:.2f}%")
print(f"Diferencia:             {se_score - baseline:+.2f}%")

if se_score > baseline:
    print("\n ¬°SE BLOCKS MEJORARON EL MODELO!")
else:
    print("\n  SE Blocks no mejoraron, pero es un resultado v√°lido")

print("="*80)


 EVALUANDO RESNET18 + SE

 RESULTADOS - ResNet18 + SE - Offsite Test

DR:
  Precision: 0.8808
  Recall:    0.9500
  F-score:   0.9141

G:
  Precision: 0.8611
  Recall:    0.6327
  F-score:   0.7294

AMD:
  Precision: 0.7647
  Recall:    0.5909
  F-score:   0.6667

 AVERAGE F-SCORE: 0.7701


 Modelo guardado: resnet18_task3_1_se.pt

 COMPARACI√ìN CON BASELINE

Task 1.3 (ResNet18):    78.92%
Task 3.1 (ResNet18+SE): 77.01%
Diferencia:             -1.92%

  SE Blocks no mejoraron, pero es un resultado v√°lido


In [130]:
# ============================================
# TASK 3.2: MULTI-HEAD ATTENTION (MHA) MODULE
# ============================================

class MultiHeadAttention(nn.Module):
    """
    Multi-head Self-Attention para features convolucionales.

    Paper: "Attention is All You Need" (Vaswani et al., 2017)

    Adaptado para CNNs: convierte [B, C, H, W] a secuencias.

    Args:
        embed_dim: Dimensi√≥n de embedding (n√∫mero de canales)
        num_heads: N√∫mero de cabezas de atenci√≥n
        dropout: Dropout rate (default: 0.1)
    """
    def __init__(self, embed_dim, num_heads=8, dropout=0.1):
        super(MultiHeadAttention, self).__init__()

        assert embed_dim % num_heads == 0, "embed_dim debe ser divisible por num_heads"

        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        self.scale = self.head_dim ** -0.5

        # Proyecciones lineales para Q, K, V
        self.qkv = nn.Linear(embed_dim, embed_dim * 3, bias=False)

        # Proyecci√≥n de salida
        self.proj = nn.Linear(embed_dim, embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # x: [batch, channels, height, width]
        B, C, H, W = x.shape
        N = H * W  # N√∫mero de tokens (pixeles)

        # Reshape: [B, C, H, W] ‚Üí [B, N, C]
        x = x.flatten(2).transpose(1, 2)  # [B, H*W, C]

        # Proyectar a Q, K, V
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim)
        qkv = qkv.permute(2, 0, 3, 1, 4)  # [3, B, num_heads, N, head_dim]
        q, k, v = qkv[0], qkv[1], qkv[2]

        # Attention: Q @ K^T / sqrt(d)
        attn = (q @ k.transpose(-2, -1)) * self.scale  # [B, num_heads, N, N]
        attn = attn.softmax(dim=-1)
        attn = self.dropout(attn)

        # Aplicar atenci√≥n a V
        x = (attn @ v).transpose(1, 2).reshape(B, N, C)  # [B, N, C]

        # Proyecci√≥n de salida
        x = self.proj(x)
        x = self.dropout(x)

        # Reshape back: [B, N, C] ‚Üí [B, C, H, W]
        x = x.transpose(1, 2).reshape(B, C, H, W)

        return x

print(" MultiHeadAttention implementado!")
print("\n Arquitectura:")
print("   Input: [B, C, H, W]")
print("   ‚Üí Flatten: [B, H*W, C]")
print("   ‚Üí Linear(C, 3*C): Q, K, V projections")
print("   ‚Üí Multi-head Attention: [B, num_heads, H*W, H*W]")
print("   ‚Üí Linear(C, C): Output projection")
print("   ‚Üí Reshape: [B, C, H, W]")

 MultiHeadAttention implementado!

 Arquitectura:
   Input: [B, C, H, W]
   ‚Üí Flatten: [B, H*W, C]
   ‚Üí Linear(C, 3*C): Q, K, V projections
   ‚Üí Multi-head Attention: [B, num_heads, H*W, H*W]
   ‚Üí Linear(C, C): Output projection
   ‚Üí Reshape: [B, C, H, W]


In [140]:
# ============================================
# Construir ResNet18 + MHA (OPTIMIZADO)
# ============================================

class ResNet18_MHA_Optimized(nn.Module):
    """
    ResNet18 con Multi-Head Attention SOLO en layers finales.

    Esto reduce uso de memoria significativamente.
    MHA se aplica donde las features son m√°s abstractas y menos pixeles.
    """
    def __init__(self, num_classes=3, num_heads=8):
        super(ResNet18_MHA_Optimized, self).__init__()

        # Cargar ResNet18 pre-entrenado
        resnet = models.resnet18(pretrained=False)

        # Copiar capas iniciales
        self.conv1 = resnet.conv1
        self.bn1 = resnet.bn1
        self.relu = resnet.relu
        self.maxpool = resnet.maxpool

        # Layer 1: [64 channels] - SIN MHA (demasiados tokens)
        self.layer1 = resnet.layer1

        # Layer 2: [128 channels] - SIN MHA (muchos tokens)
        self.layer2 = resnet.layer2

        # Layer 3: [256 channels] - CON MHA
        self.layer3 = resnet.layer3
        self.mha3 = MultiHeadAttention(256, num_heads=num_heads)

        # Layer 4: [512 channels] - CON MHA
        self.layer4 = resnet.layer4
        self.mha4 = MultiHeadAttention(512, num_heads=num_heads)

        # Global Average Pooling + Classifier
        self.avgpool = resnet.avgpool
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        # Initial layers
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        # Layer 1 y 2: SIN MHA
        x = self.layer1(x)
        x = self.layer2(x)

        # Layer 3: CON MHA
        x = self.layer3(x)
        x = x + self.mha3(x)  # ‚Üê MHA aqu√≠ (16√ó16 tokens)

        # Layer 4: CON MHA
        x = self.layer4(x)
        x = x + self.mha4(x)  # ‚Üê MHA aqu√≠ (8√ó8 tokens)

        # Classifier
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

print(" ResNet18_MHA_Optimized implementado!")
print("\n  Arquitectura:")
print("   ResNet18 base")
print("   + layer1 (64 channels) - SIN MHA")
print("   + layer2 (128 channels) - SIN MHA")
print("   + layer3 (256 channels) - CON MHA  (16√ó16 = 256 tokens)")
print("   + layer4 (512 channels) - CON MHA  (8√ó8 = 64 tokens)")
print("\n Esto reduce DR√ÅSTICAMENTE el uso de memoria")

 ResNet18_MHA_Optimized implementado!

  Arquitectura:
   ResNet18 base
   + layer1 (64 channels) - SIN MHA
   + layer2 (128 channels) - SIN MHA
   + layer3 (256 channels) - CON MHA  (16√ó16 = 256 tokens)
   + layer4 (512 channels) - CON MHA  (8√ó8 = 64 tokens)

 Esto reduce DR√ÅSTICAMENTE el uso de memoria


In [142]:
# ============================================
# TASK 3.2 - Entrenar ResNet18 + MHA (OPTIMIZADO)
# ============================================

print("\n" + "="*80)
print("üî¨ TASK 3.2 - RESNET18 + MULTI-HEAD ATTENTION (OPTIMIZADO)")
print("="*80)

# 1. Crear modelo OPTIMIZADO
resnet_mha_opt = ResNet18_MHA_Optimized(num_classes=3, num_heads=8).to(DEVICE)

print("\nüèóÔ∏è  Modelo creado: ResNet18 + MHA (solo en layer3 y layer4)")

# 2. Cargar pesos pre-entrenados
pretrained_dict = torch.load(
    f"{BASE_DIR}/pretrained_backbone/ckpt_resnet18_ep50.pt",
    map_location=DEVICE
)

model_dict = resnet_mha_opt.state_dict()
pretrained_dict_filtered = {
    k: v for k, v in pretrained_dict.items()
    if k in model_dict and model_dict[k].shape == v.shape
}

model_dict.update(pretrained_dict_filtered)
resnet_mha_opt.load_state_dict(model_dict)

print(f"‚úÖ Cargados {len(pretrained_dict_filtered)}/{len(pretrained_dict)} pesos pre-entrenados")

# 3. Par√°metros
total_params = sum(p.numel() for p in resnet_mha_opt.parameters())
print(f"\nüìä Par√°metros: {total_params:,}")

# 4. CREAR DATALOADERS CON BATCH SIZE REDUCIDO
print("\n‚öôÔ∏è  Creando dataloaders con batch_size=16 (reducido de 32)")

train_loader_small = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=2)
val_loader_small = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=2)

# 5. Configurar entrenamiento
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(resnet_mha_opt.parameters(), lr=1e-4)

print(f"üéØ Loss: BCE with Logits")
print(f"üéØ Optimizer: Adam (lr=1e-4)")
print(f"üéØ Batch size: 16 (para ahorrar memoria)")

# 6. Entrenar
NUM_EPOCHS = 20

resnet_mha_trained, train_losses_mha, val_losses_mha = train_model(
    model=resnet_mha_opt,
    train_loader=train_loader_small,
    val_loader=val_loader_small,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=NUM_EPOCHS,
    device=DEVICE,
    model_name="ResNet18 + MHA (Optimized)"
)


üî¨ TASK 3.2 - RESNET18 + MULTI-HEAD ATTENTION (OPTIMIZADO)

üèóÔ∏è  Modelo creado: ResNet18 + MHA (solo en layer3 y layer4)
‚úÖ Cargados 122/122 pesos pre-entrenados

üìä Par√°metros: 12,489,539

‚öôÔ∏è  Creando dataloaders con batch_size=16 (reducido de 32)
üéØ Loss: BCE with Logits
üéØ Optimizer: Adam (lr=1e-4)
üéØ Batch size: 16 (para ahorrar memoria)

 Iniciando entrenamiento de ResNet18 + MHA (Optimized)
   √âpocas: 20
   Device: cuda
----------------------------------------------------------------------
Epoch  1/20 | Train Loss: 0.6033 | Val Loss: 0.5013  Best!
Epoch  2/20 | Train Loss: 0.3284 | Val Loss: 0.3911  Best!
Epoch  3/20 | Train Loss: 0.2586 | Val Loss: 0.4032
Epoch  4/20 | Train Loss: 0.2195 | Val Loss: 0.4478
Epoch  5/20 | Train Loss: 0.1854 | Val Loss: 0.4225
Epoch  6/20 | Train Loss: 0.1622 | Val Loss: 0.4931
Epoch  7/20 | Train Loss: 0.1356 | Val Loss: 0.4340
Epoch  8/20 | Train Loss: 0.1241 | Val Loss: 0.4924
Epoch  9/20 | Train Loss: 0.1145 | Val Loss: 0.

In [143]:
# ========================
# Evaluar ResNet18 + MHA
# ========================

print("\n EVALUANDO RESNET18 + MHA ")

resnet_mha_results = evaluate_model(
    resnet_mha_trained,
    offsite_test_loader,
    DEVICE,
    "ResNet18 + MHA (Optimized) - Offsite Test"
)

# Guardar modelo
torch.save(resnet_mha_trained.state_dict(),
           './trained_models/resnet18_task3_2_mha.pt')
print("\n Modelo guardado: resnet18_task3_2_mha.pt")


 EVALUANDO RESNET18 + MHA 

 RESULTADOS - ResNet18 + MHA (Optimized) - Offsite Test

DR:
  Precision: 0.8553
  Recall:    0.9714
  F-score:   0.9097

G:
  Precision: 0.9394
  Recall:    0.6327
  F-score:   0.7561

AMD:
  Precision: 0.8333
  Recall:    0.6818
  F-score:   0.7500

 AVERAGE F-SCORE: 0.8053


 Modelo guardado: resnet18_task3_2_mha.pt


In [145]:
# ============================================
# COMPARACI√ìN COMPLETA TASK 3
# ============================================

print("\n" + "="*80)
print(" RESUMEN TASK 3: ATTENTION MECHANISMS")
print("="*80)

task3_results = {
    'Task 1.3 (Baseline)': resnet_full_results['average_f1'] * 100,
    'Task 3.1 (SE)': resnet_se_results['average_f1'] * 100,
    'Task 3.2 (MHA)': resnet_mha_results['average_f1'] * 100,
}

baseline = task3_results['Task 1.3 (Baseline)']

print(f"\n{'Modelo':<25} {'F-score':<12} {'vs Baseline':<15} {'Status'}")
print("-" * 70)

best_task3_model = None
best_task3_score = 0
best_task3_name = ""
best_task3_key = ""

for name, score in task3_results.items():
    diff = score - baseline

    # Guardar el mejor de Task 3
    if 'Task 3' in name and score > best_task3_score:
        best_task3_score = score
        best_task3_name = name
        if 'SE' in name:
            best_task3_key = 'SE'
            best_task3_model = resnet_se_trained
        else:
            best_task3_key = 'MHA'
            best_task3_model = resnet_mha_trained

    # Determinar status
    if diff >= 1.5:
        status = "üéâ 100% pts"
    elif diff >= 1.0:
        status = "‚≠ê 85% pts"
    elif diff >= 0:
        status = "‚úÖ 70% pts"
    elif diff >= -2:
        status = "‚ö†Ô∏è 60% pts"
    else:
        status = "‚ùå 50% pts"

    print(f"{name:<25} {score:>6.2f}%     {diff:>+6.2f}%       {status}")

print("\n" + "-" * 70)
print(" REFERENCIAS Task 3 (onsite test):")
print("   Baseline (Task 1.3): 78.8%")
print("   Comparable:          ~78.8% (70% de puntos)")
print("   +1.0% mejor:         >79.8% (85% de puntos)")
print("   +1.5% mejor:         >80.3% (100% de puntos)")

print(f"\n MEJOR MODELO TASK 3: {best_task3_name} ({best_task3_score:.2f}%)")

if best_task3_score > baseline:
    print(f"    SUPER√ì el baseline (+{best_task3_score - baseline:.2f}%)")
else:
    print(f"     Por debajo del baseline ({best_task3_score - baseline:.2f}%)")

print(f"\n Se usar√° {best_task3_key} para la submission de Task 3")
print("="*80)


 RESUMEN TASK 3: ATTENTION MECHANISMS

Modelo                    F-score      vs Baseline     Status
----------------------------------------------------------------------
Task 1.3 (Baseline)        78.92%      +0.00%       ‚úÖ 70% pts
Task 3.1 (SE)              77.01%      -1.92%       ‚ö†Ô∏è 60% pts
Task 3.2 (MHA)             80.53%      +1.60%       üéâ 100% pts

----------------------------------------------------------------------
 REFERENCIAS Task 3 (onsite test):
   Baseline (Task 1.3): 78.8%
   Comparable:          ~78.8% (70% de puntos)
   +1.0% mejor:         >79.8% (85% de puntos)
   +1.5% mejor:         >80.3% (100% de puntos)

 MEJOR MODELO TASK 3: Task 3.2 (MHA) (80.53%)
    SUPER√ì el baseline (+1.60%)

 Se usar√° MHA para la submission de Task 3


In [146]:
# ============================================
# GENERAR PREDICCI√ìN TASK 3 - MHA
# ============================================

print("\n" + "="*80)
print("üì§ GENERANDO PREDICCI√ìN TASK 3 - MULTI-HEAD ATTENTION")
print("="*80 + "\n")

print(f"üéØ Modelo seleccionado: ResNet18 + MHA")
print(f"   Offsite score: 80.53%")
print(f"   Supera baseline: +1.60%")
print(f"   Supera threshold +1.5%: ‚úÖ (potencial 100% de puntos)\n")

# Generar predicci√≥n onsite
pred_mha = generate_predictions(
    resnet_mha_trained,
    onsite_loader,
    DEVICE,
    "task3_mha.csv"
)

print("\n" + "="*80)
print("‚úÖ PREDICCI√ìN TASK 3 GENERADA")
print("="*80)
print("\nüìÅ Archivo listo para Kaggle:")
print("   ‚úÖ task3_mha.csv")
print("\nüéØ Score esperado en Kaggle:")
print("   Offsite: 80.53%")
print("   Onsite esperado: ~80-82%")
print("="*80)


üì§ GENERANDO PREDICCI√ìN TASK 3 - MULTI-HEAD ATTENTION

üéØ Modelo seleccionado: ResNet18 + MHA
   Offsite score: 80.53%
   Supera baseline: +1.60%
   Supera threshold +1.5%: ‚úÖ (potencial 100% de puntos)


üîÆ Generando predicciones con task3_mha.csv...
‚úÖ Predicciones guardadas en: ./results/task3_mha.csv
   Total predicciones: 250

üìä Distribuci√≥n de predicciones:
   DR (D):  198 positivos
   G:       40 positivos
   AMD (A): 24 positivos

‚úÖ PREDICCI√ìN TASK 3 GENERADA

üìÅ Archivo listo para Kaggle:
   ‚úÖ task3_mha.csv

üéØ Score esperado en Kaggle:
   Offsite: 80.53%
   Onsite esperado: ~80-82%


In [147]:
# ============================================
# DESCARGAR CSV TASK 3
# ============================================

from google.colab import files

print(" DESCARGANDO ARCHIVO CSV - TASK 3\n")

csv_path = './results/task3_mha.csv'
print(f"‚¨á  Descargando: {csv_path}")
files.download(csv_path)

print("\n Archivo descargado!")

 DESCARGANDO ARCHIVO CSV - TASK 3

‚¨á  Descargando: ./results/task3_mha.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 Archivo descargado!
