## **UNI MAHMOOD**

In [None]:
%pip install -q timm huggingface_hub xgboost scipy

## **2. SETUP AMBIENTE & CONFIGURAZIONE**

In [None]:
import os
import shutil
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
import xgboost as xgb
from scipy import ndimage
from PIL import Image, ImageOps
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from torchvision.transforms import v2 as transforms
import matplotlib.pyplot as plt
import seaborn as sns

# Configurazione Base
isColab = True
SEED = 42

# Parametri Immagini
IMG_RESIZE = (224, 224) # UNI richiede 224x224
NORMALIZATION_MEAN = [0.485, 0.456, 0.406]
NORMALIZATION_STD = [0.229, 0.224, 0.225]

# Parametri Training UNI
BATCH_SIZE = 32          # Basso per VRAM
GRAD_ACCUMULATION = 4   # Simula batch 256
EPOCHS = 30
PATIENCE = 8
LR_HEAD = 1e-3           # Veloce per il classificatore
LR_BACKBONE = 5e-6       # Lentissimo per UNI
WEIGHT_DECAY = 1e-4
DROPOUT = 0.3
MIXUP_ALPHA = 0.2

# Percorsi
if isColab:
    from google.colab import drive
    drive.mount("/gdrive")
    # Adatta questo percorso al tuo Drive
    base_dir = "/gdrive/My Drive/[2025-2026] AN2DL/Challenge 2"

    # Copia dataset in locale per velocità
    local_dataset_dir = "/content/dataset"
    if not os.path.exists(local_dataset_dir):
        print("Copying dataset to local runtime (fast I/O)...")
        shutil.copytree(os.path.join(base_dir, "dataset"), local_dataset_dir)

    train_dir = os.path.join(local_dataset_dir, "train_data")
    test_dir = os.path.join(local_dataset_dir, "test_data")
    label_file = os.path.join(local_dataset_dir, "train_labels.csv")
    os.makedirs("models", exist_ok=True)
    os.makedirs("submission", exist_ok=True)
else:
    # Configurazione locale/Kaggle
    train_dir = "./dataset/train_data"
    test_dir = "./dataset/test_data"
    label_file = "./dataset/train_labels.csv"

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on: {device}")

# Reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

set_seed(SEED)

## **3. Hugging Face Login**
Inserisci il token qui sotto.

In [None]:
from huggingface_hub import login

login(token="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")


## **4. Dataset & Preprocessing (Fixed Tile Centroids)**
Questa classe estrae crop fissi centrati sui blob di tessuto.

In [None]:
class MacenkoNormalizer:
    def __init__(self):
        self.HERef = np.array([[0.5626, 0.2159], [0.7201, 0.8012], [0.4062, 0.5581]])
        self.maxCRef = np.array([1.9705, 1.0308])

    def __call__(self, img_arr, Io=240, alpha=1, beta=0.15):
        try:
            h, w, c = img_arr.shape
            img_arr = img_arr.reshape((-1, 3))
            OD = -np.log((img_arr.astype(np.float64) + 1) / Io)
            ODhat = OD[~np.any(OD < beta, axis=1)]
            if ODhat.shape[0] < 10: return img_arr.reshape(h, w, c).copy()
            _, eigvecs = np.linalg.eigh(np.cov(ODhat.T))
            That = ODhat.dot(eigvecs[:, -2:])
            phi = np.arctan2(That[:, 1], That[:, 0])
            minPhi = np.percentile(phi, alpha)
            maxPhi = np.percentile(phi, 100 - alpha)
            vMin = eigvecs[:, -2:].dot(np.array([(np.cos(minPhi), np.sin(minPhi))]).T)
            vMax = eigvecs[:, -2:].dot(np.array([(np.cos(maxPhi), np.sin(maxPhi))]).T)
            if vMin[0] > vMax[0]: HE = np.array((vMin[:, 0], vMax[:, 0])).T
            else: HE = np.array((vMax[:, 0], vMin[:, 0])).T
            Y = np.reshape(OD, (-1, 3)).T
            C = np.linalg.lstsq(HE, Y, rcond=None)[0]
            maxC = np.percentile(C, 99, axis=1)
            tmp = np.divide(maxC, self.maxCRef)
            C2 = np.divide(C, tmp[:, np.newaxis])
            Inorm = np.multiply(Io, np.exp(-self.HERef.dot(C2)))
            Inorm[Inorm > 255] = 254
            Inorm = np.reshape(Inorm.T, (h, w, 3)).astype(np.uint8)
            return Inorm
        except:
            return img_arr.reshape(h, w, c).astype(np.uint8)

class MaskedFixedTileDataset(Dataset):
    def __init__(self, dataframe, img_dir, transforms=None, target_size=(224, 224),
                 normalize=True, inference_mode=False, debug_max=None):
        self.samples = []
        self.transforms = transforms
        self.img_dir = img_dir
        self.target_size = target_size
        self.normalizer = MacenkoNormalizer() if normalize else None
        self.inference_mode = inference_mode

        # Load logic
        if dataframe is None: # Inference
            img_names = sorted([f for f in os.listdir(img_dir) if f.lower().endswith('.png') and not f.startswith("mask_")])
            iterator = zip(img_names, [-1] * len(img_names))
            total = len(img_names)
        else: # Train
            iterator = zip(dataframe["sample_index"], dataframe["label_index"])
            total = len(dataframe)

        print(f"Processing {total} slides (Fixed Tile)...")
        count = 0
        for img_name, label in tqdm(iterator, total=total, leave=False):
            if debug_max and count >= debug_max: break
            self._process_and_extract(img_name, label)
            count += 1
        print(f"Total patches extracted: {len(self.samples)}")

    def _process_and_extract(self, img_name, label):
        img_path = os.path.join(self.img_dir, img_name)
        mask_path = os.path.join(self.img_dir, img_name.replace("img_", "mask_"))

        try:
            image = Image.open(img_path).convert("RGB")
            if os.path.exists(mask_path):
                mask = Image.open(mask_path).convert("L")
                mask_arr = np.array(mask) > 0
            else:
                mask_arr = np.ones((image.size[1], image.size[0]), dtype=bool)
        except:
            if self.inference_mode: self._add_fallback(label, img_name)
            return

        # Find Blobs (Centroids)
        labeled_mask, n_components = ndimage.label(mask_arr)
        patches_found = 0

        if n_components > 0:
            for cid in range(1, n_components + 1):
                ys, xs = np.where(labeled_mask == cid)
                if len(xs) < 50: continue # Skip noise

                # Centroid
                cy, cx = int(np.mean(ys)), int(np.mean(xs))
                th, tw = self.target_size

                # Crop coords
                y1, y2 = cy - th//2, cy + th//2
                x1, x2 = cx - tw//2, cx + tw//2

                # Bound to image
                img_w, img_h = image.size
                img_y1, img_y2 = max(0, y1), min(img_h, y2)
                img_x1, img_x2 = max(0, x1), min(img_w, x2)

                patch_crop = image.crop((img_x1, img_y1, img_x2, img_y2))

                # Padding if needed
                pad_l = max(0, -x1)
                pad_t = max(0, -y1)
                pad_r = max(0, x2 - img_w)
                pad_b = max(0, y2 - img_h)

                if pad_l+pad_t+pad_r+pad_b > 0:
                    patch = ImageOps.expand(patch_crop, border=(pad_l, pad_t, pad_r, pad_b), fill=255)
                else:
                    patch = patch_crop

                if patch.size != self.target_size:
                    patch = patch.resize(self.target_size, Image.BICUBIC)

                self._add_sample(patch, label, img_name)
                patches_found += 1

        if patches_found == 0 and self.inference_mode:
            # Center crop fallback
            w, h = image.size
            patch = image.crop((w//2 - 112, h//2 - 112, w//2 + 112, h//2 + 112))
            self._add_sample(patch, label, img_name)

    def _add_sample(self, patch_img, label, parent):
        arr = np.array(patch_img)
        if self.normalizer: arr = self.normalizer(arr)
        self.samples.append({"patch": arr, "label": label, "parent": parent})

    def _add_fallback(self, label, parent):
        arr = np.zeros((224, 224, 3), dtype=np.uint8)
        self.samples.append({"patch": arr, "label": label, "parent": parent})

    def __len__(self): return len(self.samples)
    def __getitem__(self, idx):
        item = self.samples[idx]
        img = Image.fromarray(item["patch"])
        if self.transforms: img = self.transforms(img)
        return img, item["label"], item["parent"]

--- DATA PREPARATION ---

In [None]:
SAMPLES_TO_IGNORE = [
    "img_0001.png", "img_0005.png", "img_0008.png", "img_0012.png", "img_0018.png",
    "img_0020.png", "img_0022.png", "img_0027.png", "img_0028.png", "img_0036.png",
    "img_0044.png", "img_0047.png", "img_0048.png", "img_0052.png", "img_0062.png",
    "img_0078.png", "img_0085.png", "img_0090.png", "img_0094.png", "img_0095.png",
    "img_0126.png", "img_0129.png", "img_0130.png", "img_0133.png", "img_0136.png",
    "img_0138.png", "img_0148.png", "img_0150.png", "img_0155.png", "img_0159.png",
    "img_0161.png", "img_0175.png", "img_0178.png", "img_0179.png", "img_0180.png",
    "img_0184.png", "img_0187.png", "img_0189.png", "img_0193.png", "img_0196.png",
    "img_0222.png", "img_0251.png", "img_0254.png", "img_0263.png", "img_0268.png",
    "img_0286.png", "img_0293.png", "img_0313.png", "img_0319.png", "img_0333.png",
    "img_0342.png", "img_0344.png", "img_0346.png", "img_0355.png", "img_0368.png",
    "img_0371.png", "img_0376.png", "img_0380.png", "img_0390.png", "img_0393.png",
    "img_0407.png", "img_0410.png", "img_0415.png", "img_0424.png", "img_0443.png",
    "img_0453.png", "img_0459.png", "img_0463.png", "img_0486.png", "img_0497.png",
    "img_0498.png", "img_0499.png", "img_0509.png", "img_0521.png", "img_0530.png",
    "img_0531.png", "img_0533.png", "img_0537.png", "img_0540.png", "img_0544.png",
    "img_0547.png", "img_0557.png", "img_0558.png", "img_0560.png", "img_0565.png",
    "img_0567.png", "img_0572.png", "img_0578.png", "img_0580.png", "img_0586.png",
    "img_0602.png", "img_0603.png", "img_0607.png", "img_0609.png", "img_0614.png",
    "img_0620.png", "img_0623.png", "img_0629.png", "img_0635.png", "img_0639.png",
    "img_0643.png", "img_0644.png", "img_0645.png", "img_0646.png", "img_0656.png",
    "img_0657.png", "img_0658.png", "img_0670.png", "img_0673.png", "img_0675.png",
]
full_df = pd.read_csv(label_file)
full_df = full_df[~full_df["sample_index"].isin(SAMPLES_TO_IGNORE)].reset_index(drop=True)

class_names = sorted(full_df["label"].unique())
label_to_idx = {n: i for i, n in enumerate(class_names)}
full_df["label_index"] = full_df["label"].map(label_to_idx)
NUM_CLASSES = len(class_names)

# Split 80/20 (Niente K-Fold per velocità con UNI)
train_df, val_df = train_test_split(full_df, test_size=0.2, stratify=full_df["label_index"], random_state=SEED)

# Transforms
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(180),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=NORMALIZATION_MEAN, std=NORMALIZATION_STD),
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=NORMALIZATION_MEAN, std=NORMALIZATION_STD),
])

print("Building Datasets (this might take a minute)...")
train_ds = MaskedFixedTileDataset(train_df, train_dir, transforms=train_transform, normalize=True)
val_ds = MaskedFixedTileDataset(val_df, train_dir, transforms=val_transform, normalize=True)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True, drop_last=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

## **5. Modello UNI**

In [None]:
class UNIClassifier(nn.Module):
    def __init__(self, num_classes, dropout=0.4, freeze_backbone=True):
        super().__init__()
        print("Loading UNI backbone...")
        self.backbone = timm.create_model("hf_hub:MahmoodLab/uni", pretrained=True, init_values=1e-5, dynamic_img_size=True)
        self.embed_dim = self.backbone.num_features
        self.backbone.reset_classifier(0)

        self.head = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(self.embed_dim, 512),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(512, num_classes)
        )
        if freeze_backbone: self.freeze()

    def freeze(self):
        for p in self.backbone.parameters(): p.requires_grad = False

    def unfreeze(self):
        for p in self.backbone.parameters(): p.requires_grad = True

    def forward(self, x):
        return self.head(self.backbone(x))

## **6. Training Loop (Robust)**

In [None]:
# Helpers
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, weight=self.alpha, reduction='none')
        pt = torch.exp(-ce_loss)
        return ((1-pt)**self.gamma * ce_loss).mean()

def mixup_data(x, y, alpha=0.2):
    if alpha > 0: lam = np.random.beta(alpha, alpha)
    else: lam = 1
    index = torch.randperm(x.size(0)).to(device)
    return lam*x + (1-lam)*x[index, :], y, y[index], lam

def mixup_criterion(crit, pred, ya, yb, lam):
    return lam * crit(pred, ya) + (1-lam) * crit(pred, yb)

def validate_slide(model, loader):
    model.eval()
    slide_probs = {}
    slide_labels = {}
    with torch.no_grad():
        for inputs, targets, pids in loader:
            inputs = inputs.to(device)
            out = torch.softmax(model(inputs), dim=1)
            probs = out.cpu().numpy()
            targs = targets.cpu().numpy()
            for i, pid in enumerate(pids):
                slide_probs.setdefault(pid, []).append(probs[i])
                slide_labels[pid] = targs[i]

    y_true, y_pred = [], []
    for pid in slide_probs:
        # Top-K Pooling (Mean of top 30%)
        p_matrix = np.array(slide_probs[pid])
        k = max(1, int(len(p_matrix)*0.3))
        # Sort desc per class
        p_sorted = np.sort(p_matrix, axis=0)[::-1]
        score = np.mean(p_sorted[:k], axis=0)
        y_pred.append(np.argmax(score))
        y_true.append(slide_labels[pid])

    return f1_score(y_true, y_pred, average="macro")

def run_training():
    model = UNIClassifier(NUM_CLASSES, dropout=DROPOUT).to(device)

    # Weights
    cls_w = compute_class_weight("balanced", classes=np.unique(train_df["label_index"]), y=train_df["label_index"])
    crit = FocalLoss(alpha=torch.tensor(cls_w, dtype=torch.float32).to(device))

    # 1. Warmup Head
    print("\n--- Phase 1: Head Warmup (Frozen Backbone) ---")
    opt = AdamW(model.head.parameters(), lr=LR_HEAD, weight_decay=WEIGHT_DECAY)
    scaler = torch.cuda.amp.GradScaler()

    for ep in range(5):
        model.train()
        loss_ep = 0
        for i, (xb, yb, _) in enumerate(tqdm(train_loader, leave=False)):
            xb, yb = xb.to(device), yb.to(device)
            with torch.cuda.amp.autocast('cuda'):
                loss = crit(model(xb), yb)
            scaler.scale(loss).backward()
            if (i+1) % GRAD_ACCUMULATION == 0:
                scaler.step(opt)
                scaler.update()
                opt.zero_grad()
            loss_ep += loss.item()

        val_f1 = validate_slide(model, val_loader)
        print(f"Warmup Ep {ep+1} | Loss: {loss_ep/len(train_loader):.4f} | Val Slide F1: {val_f1:.4f}")

    # 2. Full Training
    print("\n--- Phase 2: Full Finetuning (Unfrozen) ---")
    model.unfreeze()

    # Differential LR
    params = [
        {'params': model.backbone.parameters(), 'lr': LR_BACKBONE},
        {'params': model.head.parameters(), 'lr': LR_HEAD * 0.5}
    ]
    opt = AdamW(params, weight_decay=WEIGHT_DECAY)
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=EPOCHS, eta_min=1e-7)
    scaler = torch.cuda.amp.GradScaler()

    best_f1 = 0
    patience_counter = 0

    for ep in range(1, EPOCHS+1):
        model.train()
        loss_ep = 0

        for i, (xb, yb, _) in enumerate(tqdm(train_loader, desc=f"Ep {ep}", leave=False)):
            xb, yb = xb.to(device), yb.to(device)

            # Mixup
            xb, ya, yb, lam = mixup_data(xb, yb, MIXUP_ALPHA)

            with torch.cuda.amp.autocast('cuda'):
                out = model(xb)
                loss = mixup_criterion(crit, out, ya, yb, lam) / GRAD_ACCUMULATION

            scaler.scale(loss).backward()

            if (i+1) % GRAD_ACCUMULATION == 0:
                scaler.unscale_(opt)
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                scaler.step(opt)
                scaler.update()
                opt.zero_grad()

            loss_ep += loss.item() * GRAD_ACCUMULATION

        sched.step()
        val_f1 = validate_slide(model, val_loader)

        print(f"Ep {ep} | Loss: {loss_ep/len(train_loader):.4f} | Val F1: {val_f1:.4f} | LR: {opt.param_groups[0]['lr']:.2e}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            torch.save(model.state_dict(), "models/uni_best.pt")
            print("--> Saved Best Model")
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print("Early Stopping.")
                break

    return model

In [None]:
# AVVIA TRAINING UNI
if torch.cuda.is_available():
    torch.cuda.empty_cache()

uni_model = run_training()

In [None]:
import pandas as pd
import torch
import numpy as np
import os
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader

def generate_uni_submission(model, test_dir, output_file="submission_uni_baseline.csv"):
    print(f"Generating UNI-only submission from {test_dir}...")

    # 1. Dataset Test (Inference Mode per sicurezza)
    test_ds = MaskedFixedTileDataset(
        dataframe=None,
        img_dir=test_dir,
        transforms=val_transform,
        target_size=IMG_RESIZE,
        normalize=True,
        inference_mode=True
    )
    test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2)

    # 2. Inference Loop
    model.eval()
    slide_probs = {}

    print("Running Inference (UNI)...")
    with torch.no_grad():
        for inputs, _, parent_ids in tqdm(test_loader):
            inputs = inputs.to(device)

            # TTA: Media tra immagine originale e flippata
            with torch.amp.autocast('cuda'):
                logits = model(inputs)
                probs = torch.softmax(logits, dim=1)

                logits_flip = model(torch.flip(inputs, dims=[3]))
                probs_flip = torch.softmax(logits_flip, dim=1)

            avg_probs = (probs + probs_flip) / 2.0
            avg_probs = avg_probs.cpu().numpy()

            # Raggruppa per Slide
            for i, pid in enumerate(parent_ids):
                slide_probs.setdefault(pid, []).append(avg_probs[i])

    # 3. Aggregazione (Top-K Mean)
    # Media delle il 30% delle patch più sicure per ogni classe
    final_rows = []
    class_names = sorted(list(label_to_idx.keys())) # Assicuriamoci dell'ordine

    for img_name, prob_list in slide_probs.items():
        if len(prob_list) > 0:
            prob_matrix = np.array(prob_list)
            # Top 30% pooling
            k = max(1, int(len(prob_matrix) * 0.3))
            sorted_probs = np.sort(prob_matrix, axis=0)
            top_k = sorted_probs[-k:, :]
            slide_score = np.mean(top_k, axis=0)

            pred_idx = np.argmax(slide_score)
            pred_label = class_names[pred_idx]
        else:
            pred_label = class_names[0] # Fallback

        final_rows.append({"sample_index": img_name, "label": pred_label})

    # 4. Controllo File Mancanti e Salvataggio
    all_test_files = sorted([f for f in os.listdir(test_dir) if f.lower().endswith(".png") and "mask" not in f])
    processed_ids = set([r["sample_index"] for r in final_rows])

    for f in all_test_files:
        if f not in processed_ids:
            final_rows.append({"sample_index": f, "label": class_names[0]})

    output_path = os.path.join(base_dir, "submission/UNI_DATASET_GIUSTO.csv")
    sub_df = pd.DataFrame(final_rows).sort_values("sample_index")
    os.makedirs("submission", exist_ok=True)
    sub_df.to_csv(output_path, index=False)

    print(f"✅ Baseline Submission Saved: submission/{output_file}")
    return sub_df

# --- ESECUZIONE ---
# Assicurati che uni_model sia caricato (o ricarica il best.pt)
if 'uni_model' not in locals():
    print("Loading best model...")
    uni_model = UNIClassifier(NUM_CLASSES).to(device)
    uni_model.load_state_dict(torch.load("models/uni_best.pt"))

df_baseline = generate_uni_submission(uni_model, test_dir)
df_baseline.head()