# **Soft voting**


In [50]:
# Enviroment
isColab = False
colab_dir = "/gdrive/My Drive/Colab Notebooks/[2025-2026] AN2DL/AN2DL-challenge-2"

isKaggle = False
isWsl = True

# Set seed for reproducibility
SEED = 42

## **Loading Enviroment**


In [51]:
import os

# Directory di default
current_dir = os.getcwd()   

if isColab:
    from google.colab import drive # type: ignore
    drive.mount("/gdrive")
    current_dir = colab_dir
    print("In esecuzione su Colab. Google Drive montato.")
    %cd $current_dir
elif isKaggle:
    kaggle_work_dir = "/kaggle/working/AN2DL-challenge-2"
    os.makedirs(kaggle_work_dir, exist_ok=True)
    current_dir = kaggle_work_dir
    print("In esecuzione su Kaggle. Directory di lavoro impostata.")
    os.chdir(current_dir)
elif isWsl:
    local_pref = r"/mnt/g/Il mio Drive/Colab Notebooks/[2025-2026] AN2DL/AN2DL-challenge-2"
    current_dir = local_pref if os.path.isdir(local_pref) else os.getcwd()
    print(f"Esecuzione su WSL. Directory corrente impostata a: {current_dir}")
    os.chdir(current_dir)
else:
    print("Esecuzione locale. Salto mount Google Drive.")
    local_pref = r"G:\Il mio Drive\Colab Notebooks\[2025-2026] AN2DL\AN2DL-challenge-2"
    current_dir = local_pref if os.path.isdir(local_pref) else os.getcwd()
    print(f"Directory corrente impostata a: {current_dir}")
    os.chdir(current_dir)

print(f"Changed directory to: {current_dir}")

# Define absolute paths
dataset_dir = os.path.join(current_dir, "dataset")
train_set_dir = os.path.join(dataset_dir, "train_data")
test_set_dir = os.path.join(dataset_dir, "test_data")
label_file = os.path.join(dataset_dir, "train_labels.csv")

print(f"Dataset directory: {dataset_dir}")
print(f"Train set directory: {train_set_dir}")
print(f"Test set directory: {test_set_dir}")
print(f"Label file: {label_file}")

Esecuzione su WSL. Directory corrente impostata a: /mnt/g/Il mio Drive/Colab Notebooks/[2025-2026] AN2DL/AN2DL-challenge-2
Changed directory to: /mnt/g/Il mio Drive/Colab Notebooks/[2025-2026] AN2DL/AN2DL-challenge-2
Dataset directory: /mnt/g/Il mio Drive/Colab Notebooks/[2025-2026] AN2DL/AN2DL-challenge-2/dataset
Train set directory: /mnt/g/Il mio Drive/Colab Notebooks/[2025-2026] AN2DL/AN2DL-challenge-2/dataset/train_data
Test set directory: /mnt/g/Il mio Drive/Colab Notebooks/[2025-2026] AN2DL/AN2DL-challenge-2/dataset/test_data
Label file: /mnt/g/Il mio Drive/Colab Notebooks/[2025-2026] AN2DL/AN2DL-challenge-2/dataset/train_labels.csv


## **Import Libraries**


In [52]:
# Set environment variables before importing modules
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# Import necessary modules
import logging
import random
import numpy as np

# Set seeds for random number generators in NumPy and Python
np.random.seed(SEED)
random.seed(SEED)

# Import PyTorch
import torch
torch.manual_seed(SEED)
from torch import nn
from torchsummary import summary
import torchvision
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import TensorDataset, DataLoader
%pip install torchview
from torchview import draw_graph


# Configurazione di TensorBoard e directory
logs_dir = "tensorboard"
if isColab or isKaggle:
    !pkill -f tensorboard 
    !mkdir -p models
    print("Killed existing TensorBoard instances and created models directory.") 

os.makedirs("models", exist_ok=True)  

%load_ext tensorboard


if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True
else:
    device = torch.device("cpu")

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {device}")

# Import other libraries
import cv2
import copy
import shutil
from itertools import product
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.gridspec as gridspec
import requests
from io import BytesIO
from torch.utils.data import Dataset, DataLoader
from scipy import ndimage
from torch.optim import AdamW

# Configure plot display settings
sns.set(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline

Note: you may need to restart the kernel to use updated packages.
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
PyTorch version: 2.9.1+cu130
Device: cuda


### **Preparing Dataset for colab**

In [53]:
if isColab:
    drive_dataset_dir = os.path.join(current_dir, "dataset")
    local_dataset_dir = "/content/dataset"

    if not os.path.exists(local_dataset_dir):
        print(f"Copying dataset from {drive_dataset_dir} to {local_dataset_dir}...")
        try:
            shutil.copytree(drive_dataset_dir, local_dataset_dir)
            print("Copy complete.")
        except Exception as e:
            print(f"Error copying dataset: {e}")
            print("Falling back to Drive dataset (slow).")
            # If copy fails, we stick to the original dataset_dir (which might need cleaning too if it was used directly)
            dataset_dir = drive_dataset_dir
    else:
        print("Dataset already copied to local runtime.")

    # If copy succeeded (or already existed), use local path
    if os.path.exists(local_dataset_dir):
        dataset_dir = local_dataset_dir

## ‚è≥ **Data Loading**


In [54]:
# Loader parameters
APPLY_MASK = False
BATCH_SIZE = 32
LOADER_SHUFFLE = False

NORMALIZATION_MEAN = [0.485, 0.456, 0.406]
NORMALIZATION_STD = [0.229, 0.224, 0.225]
IMG_PADDING = 5

IMG_RESIZE = (224, 224)
INPUT_SHAPE = (3, *IMG_RESIZE)

TEST_SET_SIZE = 0.0
VAL_SET_SIZE = 0.2
TRAIN_SET_SIZE = 1.0 - TEST_SET_SIZE - VAL_SET_SIZE

### **Definitions**


In [55]:
SAMPLES_TO_IGNORE = [
    "img_0001.png",
    "img_0005.png",
    "img_0008.png",
    "img_0012.png",
    "img_0018.png",
    "img_0020.png",
    "img_0022.png",
    "img_0027.png",
    "img_0028.png",
    "img_0036.png",
    "img_0044.png",
    "img_0047.png",
    "img_0048.png",
    "img_0052.png",
    "img_0062.png",
    "img_0078.png",
    "img_0085.png",
    "img_0090.png",
    "img_0094.png",
    "img_0095.png",
    "img_0126.png",
    "img_0129.png",
    "img_0130.png",
    "img_0133.png",
    "img_0136.png",
    "img_0138.png",
    "img_0148.png",
    "img_0150.png",
    "img_0155.png",
    "img_0159.png",
    "img_0161.png",
    "img_0175.png",
    "img_0178.png",
    "img_0179.png",
    "img_0180.png",
    "img_0184.png",
    "img_0187.png",
    "img_0189.png",
    "img_0193.png",
    "img_0196.png",
    "img_0222.png",
    "img_0251.png",
    "img_0254.png",
    "img_0263.png",
    "img_0268.png",
    "img_0286.png",
    "img_0293.png",
    "img_0313.png",
    "img_0319.png",
    "img_0333.png",
    "img_0342.png",
    "img_0344.png",
    "img_0346.png",
    "img_0355.png",
    "img_0368.png",
    "img_0371.png",
    "img_0376.png",
    "img_0380.png",
    "img_0390.png",
    "img_0393.png",
    "img_0407.png",
    "img_0410.png",
    "img_0415.png",
    "img_0424.png",
    "img_0443.png",
    "img_0453.png",
    "img_0459.png",
    "img_0463.png",
    "img_0486.png",
    "img_0497.png",
    "img_0498.png",
    "img_0499.png",
    "img_0509.png",
    "img_0521.png",
    "img_0530.png",
    "img_0531.png",
    "img_0533.png",
    "img_0537.png",
    "img_0540.png",
    "img_0544.png",
    "img_0547.png",
    "img_0557.png",
    "img_0558.png",
    "img_0560.png",
    "img_0565.png",
    "img_0567.png",
    "img_0572.png",
    "img_0578.png",
    "img_0580.png",
    "img_0586.png",
    "img_0602.png",
    "img_0603.png",
    "img_0607.png",
    "img_0609.png",
    "img_0614.png",
    "img_0620.png",
    "img_0623.png",
    "img_0629.png",
    "img_0635.png",
    "img_0639.png",
    "img_0643.png",
    "img_0644.png",
    "img_0645.png",
    "img_0646.png",
    "img_0656.png",
    "img_0657.png",
    "img_0658.png",
    "img_0670.png",
    "img_0673.png",
    "img_0675.png",
]

In [56]:
# Load the full dataframe
full_df = pd.read_csv(label_file)

# Remove cursed images
full_df = full_df[~full_df["sample_index"].isin(SAMPLES_TO_IGNORE)].reset_index(
    drop=True
)

# Label mapping
class_names = sorted(full_df["label"].unique())
label_to_index = {name: idx for idx, name in enumerate(class_names)}
full_df["label_index"] = full_df["label"].map(label_to_index)
num_classes = len(class_names)
print(f"Number of classes: {num_classes}")

Number of classes: 4


In [57]:
def make_loader(ds, batch_size, shuffle, drop_last=False):
    """Create a PyTorch DataLoader with optimized settings."""
    cpu_cores = os.cpu_count() or 2
    num_workers = max(2, min(6, cpu_cores))

    return DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
        drop_last=drop_last,
        num_workers=num_workers,
        pin_memory=True,
        pin_memory_device="cuda" if torch.cuda.is_available() else "",
        prefetch_factor=4,
        persistent_workers=isWsl,
    )

In [None]:
from scipy import ndimage
from PIL import Image, ImageOps
import numpy as np
import torch
from torch.utils.data import Dataset
from tqdm.notebook import tqdm
from torchvision.transforms import v2 as transforms


class MaskedFixedTileDataset(Dataset):
    """
    A Dataset class that extracts fixed-size patches from the center of tissue masks
    to preserve biological scale (magnification), rather than resizing variable crops.
    """

    def __init__(
        self, dataframe, img_dir, transform=None, target_size=(224, 224), debug_max=None
    ):
        self.samples = []
        self.transform = transform
        self.img_dir = img_dir
        self.target_size = target_size

        # Handling inference mode (no labels) vs training mode
        self.is_inference_mode = False
        if dataframe is None or "label_index" not in dataframe.columns:
            self.is_inference_mode = True
            if dataframe is None:
                # If just a directory, list images
                img_names = sorted(
                    [f for f in os.listdir(img_dir) if f.startswith("img_")]
                )
            else:
                img_names = dataframe["sample_index"].tolist()
            iterator = zip(img_names, [-1] * len(img_names))
            total_items = len(img_names)
        else:
            iterator = zip(dataframe["sample_index"], dataframe["label_index"])
            total_items = len(dataframe)

        print(
            f"Processing {total_items} images to extract fixed-size {target_size} tiles..."
        )

        count = 0
        for img_name, label in tqdm(iterator, total=total_items):
            if debug_max and count >= debug_max:
                break
            self._process_and_extract(img_name, label)
            count += 1

        print(f"Extraction complete. Total patches: {len(self.samples)}")

    def _process_and_extract(self, img_name, label):
        img_path = os.path.join(self.img_dir, img_name)
        mask_path = os.path.join(self.img_dir, img_name.replace("img_", "mask_"))

        try:
            image = Image.open(img_path).convert("RGB")
            mask = Image.open(mask_path).convert("L")
        except Exception as e:
            print(f"Warning: Could not load {img_name}: {e}")
            return

        img_w, img_h = image.size
        # Create binary mask for component labeling
        mask_arr = np.array(mask) > 0

        # Label connected components (blobs) in the mask
        labeled_mask, n_components = ndimage.label(mask_arr) # type: ignore

        for cid in range(1, n_components + 1):
            # Extract coordinates of the current blob
            ys, xs = np.where(labeled_mask == cid)

            # Filter out very small noise artifacts (< 50 pixels)
            if len(xs) < 50:
                continue

            # Calculate the centroid (center of mass) of the blob
            cy, cx = int(np.mean(ys)), int(np.mean(xs))

            # Define the fixed-size crop window around the centroid
            th, tw = self.target_size
            half_h, half_w = th // 2, tw // 2

            y1 = cy - half_h
            y2 = cy + half_h
            x1 = cx - half_w
            x2 = cx + half_w

            # Handle Edge Cases: Calculate intersection with the actual image
            img_y1, img_y2 = max(0, y1), min(img_h, y2)
            img_x1, img_x2 = max(0, x1), min(img_w, x2)

            # Extract the valid region from the image
            patch_crop = image.crop((img_x1, img_y1, img_x2, img_y2))

            # Calculate required padding if the crop extended beyond image bounds
            pad_left = max(0, -x1)
            pad_top = max(0, -y1)
            pad_right = max(0, x2 - img_w)
            pad_bottom = max(0, y2 - img_h)

            # If padding is needed, pad with white (255) which is standard background in histology
            if pad_left > 0 or pad_top > 0 or pad_right > 0 or pad_bottom > 0:
                patch = ImageOps.expand(
                    patch_crop,
                    border=(pad_left, pad_top, pad_right, pad_bottom),
                    fill=255,
                )
            else:
                patch = patch_crop

            # Ensure precise size match (e.g., if rounding errors occurred)
            if patch.size != self.target_size:
                patch = patch.resize(self.target_size, Image.BICUBIC) # type: ignore

            # Store in RAM (Efficient for ~2k images yielding ~10k-20k patches)
            self.samples.append(
                {"patch": np.array(patch), "label": label, "parent": img_name}
            )

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        item = self.samples[idx]
        img = Image.fromarray(item["patch"])
        label = item["label"]

        if self.transform:
            img = self.transform(img)

        return img, label, item["parent"]

In [None]:
def compute_dataset_stats(dataset_class, dataframe, img_dir):
    """
    Computes channel-wise Mean and Std on the dataset without any normalization applied.
    """
    print("Computing dataset Mean and Std (this may take a moment)...")

    # define a simple transform that only converts to tensor
    basic_transform = transforms.Compose(
        [transforms.Resize(IMG_RESIZE), transforms.ToTensor()]
    )

    # Instantiate dataset temporarily
    temp_ds = dataset_class(dataframe, img_dir, transform=basic_transform)
    loader = make_loader(temp_ds, batch_size=BATCH_SIZE, shuffle=False)

    mean = 0.0
    std = 0.0
    nb_samples = 0.0

    for data, _, _ in tqdm(loader):
        batch_samples = data.size(0)
        # Flatten H and W to calculate stats per channel
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples

    print(f"\nDONE. Copy these values into your config:")
    print(f"NEW_MEAN = {mean.tolist()}") # type: ignore
    print(f"NEW_STD = {std.tolist()}") # type: ignore
    return mean.tolist(), std.tolist()  # type: ignore

In [59]:
print("Calculating stats on Training Data...")
    
# We use the class we just defined
custom_mean, custom_std = compute_dataset_stats(
        dataset_class=MaskedFixedTileDataset, 
        dataframe=full_df, 
        img_dir=train_set_dir
    )

NORMALIZATION_MEAN = custom_mean
NORMALIZATION_STD = custom_std

Calculating stats on Training Data...
Computing dataset Mean and Std (this may take a moment)...
Processing 581 images to extract fixed-size (224, 224) tiles...


  0%|          | 0/581 [00:00<?, ?it/s]

Extraction complete. Total patches: 4955


  0%|          | 0/155 [00:00<?, ?it/s]


DONE. Copy these values into your config:
NEW_MEAN = [0.6673933863639832, 0.6174471974372864, 0.6541767120361328]
NEW_STD = [0.08422686159610748, 0.11849816143512726, 0.0835428312420845]


In [60]:
# Define transformations

# ADVICE 3
train_transform_tl = transforms.Compose(
    [
        transforms.Resize(IMG_RESIZE),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=NORMALIZATION_MEAN, std=NORMALIZATION_STD),
    ]
)

train_transform_ft = transforms.Compose(
    [
        transforms.RandomResizedCrop(
            IMG_RESIZE, scale=(0.7, 1.0), ratio=(0.75, 1.33), antialias=True
        ),
        transforms.RandAugment(num_ops=2, magnitude=7),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomRotation(15),  # type: ignore
        
        transforms.RandomApply([
            transforms.ElasticTransform(alpha=50.0, sigma=5.0)
        ], p=0.25),
        
        
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
        transforms.ToTensor(),
        transforms.Normalize(mean=NORMALIZATION_MEAN, std=NORMALIZATION_STD),
    ]
)

data_transforms = transforms.Compose(
    [
        transforms.Resize(IMG_RESIZE),
        transforms.ToTensor(),
        transforms.Normalize(mean=NORMALIZATION_MEAN, std=NORMALIZATION_STD),
    ]
)

In [61]:
# Train/validation split (stratified)
train_df, val_df = train_test_split(
    full_df,
    test_size=(TEST_SET_SIZE + VAL_SET_SIZE),
    stratify=full_df["label"],
    random_state=SEED,
)
print(f"Train samples: {len(train_df)}, Val samples: {len(val_df)}")

Train samples: 464, Val samples: 117


In [None]:
train_dataset = MaskedFixedTileDataset(
    train_df,
    train_set_dir,
    transform=train_transform_tl,
    target_size=IMG_RESIZE,
    debug_max=None,
)
val_dataset = MaskedFixedTileDataset(
    val_df, train_set_dir, transform=data_transforms, target_size=IMG_RESIZE, debug_max=None
)

train_loader = make_loader(train_dataset, BATCH_SIZE, shuffle=True, drop_last=False)
val_loader = make_loader(val_dataset, BATCH_SIZE, shuffle=False, drop_last=False)

# Alias per le fasi di training/fine-tuning
train_tl_loader, val_tl_loader = train_loader, val_loader
train_ft_loader, val_ft_loader = train_loader, val_loader

print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}")
print(f"Input tensor shape: {INPUT_SHAPE}")

Processing 464 images to extract fixed-size (224, 224) tiles...


  0%|          | 0/464 [00:00<?, ?it/s]

In [None]:
def unnormalize(img, mean, std):
    img = np.array(img, copy=True)
    for c in range(3):
        img[c] = img[c] * std[c] + mean[c]
    return np.clip(img, 0, 1)


def show_mask_patch_effect(dataset, train_transform, data_transforms, n=3):
    parents = list({s["parent"] for s in dataset.samples})
    parents = random.sample(parents, min(n, len(parents)))

    for img_name in parents:
        img_path = os.path.join(dataset.img_dir, img_name)
        mask_path = os.path.join(dataset.img_dir, img_name.replace("img_", "mask_"))

        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L")) > 0

        labeled_mask, n_components = ndimage.label(mask)  # type: ignore

        fig = plt.figure(figsize=(4 * (n_components + 1), 5))
        gs = fig.add_gridspec(1, n_components + 1)

        # Originale + maschera
        ax0 = fig.add_subplot(gs[0])
        ax0.imshow(image)
        ax0.imshow(mask, alpha=0.4, cmap="Reds")
        ax0.set_title("Originale + Maschera")
        ax0.axis("off")

        col = 1
        for cid in range(1, n_components + 1):
            ys, xs = np.where(labeled_mask == cid)
            if len(xs) < 15:
                continue

            x1, x2 = xs.min(), xs.max()
            y1, y2 = ys.min(), ys.max()

            patch = image[y1:y2, x1:x2]
            patch_pil = Image.fromarray(patch)

            train_img = train_transform(patch_pil)
            train_img = unnormalize(train_img.numpy(), NORMALIZATION_MEAN, NORMALIZATION_STD)

            ax = fig.add_subplot(gs[col])
            ax.imshow(np.transpose(train_img, (1, 2, 0)))
            ax.set_title(f"Patch {cid}")
            ax.axis("off")
            col += 1

        plt.suptitle(f"{img_name} ‚Äì Patch estratte dai blob", fontsize=14)
        plt.show()

In [None]:
# TODO: fix the show mask function, beacuse data_transform does nothing

show_mask_patch_effect(
    dataset=train_dataset,
    train_transform=train_transform_tl,
    data_transforms=data_transforms,
    n=5,
)

## üßÆ **Network Parameters**


In [None]:
# Training parameters
LEARNING_RATE = 1e-5
EPOCHS = 200
PATIENCE = 20
DROPOUT_RATE = 0.25
WEIGHT_DECAY = 1e-2

# Regularization
DROPOUT_RATE = 0.3
LABEL_SMOOTHING = 0.05

# Fine tuning parameters
FT_LEARNING_RATE = 5e-6
FT_WEIGHT_DECAY = 1e-3
FT_DROPOUT_RATE = 0.2
N_LAYERS_TO_UNFREEZE = 20

VERBOSE = 5

## üß† **Training Functions**


In [None]:
import numpy as np
import torch


def mixup_data(x, y, alpha=0.2, device="cuda"):
    """Returns mixed inputs, pairs of targets, and lambda"""
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    """Calculates weighted loss for mixed targets"""
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [None]:
from sklearn.metrics import accuracy_score, f1_score


def train_one_epoch(model, train_loader, criterion, optimizer, scaler, device, mixup_alpha=0.0):
    model.train()
    running_loss = 0.0
    all_predictions = []
    all_targets = []

    for inputs, targets, _ in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad(set_to_none=True)
        if mixup_alpha > 0:
            # Generate mixed inputs
            inputs, targets_a, targets_b, lam = mixup_data(
                inputs, targets, mixup_alpha, device
            )

            with torch.amp.autocast(device_type=device.type, enabled=(device.type == "cuda")): # type: ignore
                logits = model(inputs)
                # Calculate loss mixing both targets
                loss = mixup_criterion(criterion, logits, targets_a, targets_b, lam)
        else:
            # Standard training
            with torch.amp.autocast(device_type=device.type, enabled=(device.type == "cuda")): # type: ignore
                logits = model(inputs)
                loss = criterion(logits, targets)

        # Backpropagation
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * inputs.size(0)

        preds = logits.argmax(dim=1)

        # If mixed, use targets_a for rough accuracy estimation
        current_targets = targets_a if mixup_alpha > 0 else targets # type: ignore

        all_predictions.append(preds.cpu().numpy())
        all_targets.append(current_targets.cpu().numpy())

    y_true = np.concatenate(all_targets)
    y_pred = np.concatenate(all_predictions)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = accuracy_score(y_true, y_pred)
    epoch_f1 = f1_score(y_true, y_pred, average="macro")

    return epoch_loss, epoch_acc, epoch_f1

In [None]:
import torch.nn.functional as F


def validate_one_epoch(model, val_loader, criterion, device):
    """Validate one epoch with test time augmentation

    Args:
        model (torch.nn.Module): model to validate
        val_loader (torch.utils.data.DataLoader): dataset loader
        criterion (torch.nn.modules.loss._Loss): criterion
        device (torch.device): device
    """

    model.eval()
    running_loss = 0.0
    all_predictions = []
    all_targets = []

    with torch.no_grad():
        for inputs, targets, _ in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            with torch.amp.autocast(device_type=device.type, enabled=(device.type == "cuda")): # type: ignore
                # TTA Strategy:
                # 1. Original
                out1 = F.softmax(model(inputs), dim=1)

                # 2. Horizontal Flip
                inputs_hf = torch.flip(inputs, dims=[3])
                out2 = F.softmax(model(inputs_hf), dim=1)

                # 3. Vertical Flip
                inputs_vf = torch.flip(inputs, dims=[2])
                out3 = F.softmax(model(inputs_vf), dim=1)

                # Average predictions
                avg_probs = (out1 + out2 + out3) / 3.0

                # Calculate Loss (using original inputs for approximation)
                # Note: Loss with TTA is tricky, usually we just track accuracy/F1
                # We use out1 for loss logging to be consistent
                loss = criterion(model(inputs), targets)
                running_loss += loss.item() * inputs.size(0)

                preds = avg_probs.argmax(dim=1)

                all_predictions.append(preds.cpu().numpy())
                all_targets.append(targets.cpu().numpy())

    y_true = np.concatenate(all_targets)
    y_pred = np.concatenate(all_predictions)

    epoch_loss = running_loss / len(val_loader.dataset)
    epoch_acc = accuracy_score(y_true, y_pred)
    epoch_f1 = f1_score(y_true, y_pred, average="macro")

    return epoch_loss, epoch_acc, epoch_f1

In [None]:
def fit(
    model,
    train_loader,
    val_loader,
    epochs,
    criterion,
    optimizer,
    scaler,
    device,
    scheduler=None,
    patience=0,
    evaluation_metric="val_f1",
    mode="max",
    restore_best_weights=True,
    writer=None,
    verbose=1,
    experiment_name="",
    mixup_alpha=0.0,
):

    history = {"train_loss": [], "train_f1": [], "val_loss": [], "val_f1": []}

    best_metric = float("-inf") if mode == "max" else float("inf")
    best_epoch = 0
    patience_counter = 0

    print(f"Training {epochs} epochs...")
    for epoch in range(1, epochs + 1):
        train_loss, _, train_f1 = train_one_epoch(model, train_loader, criterion, optimizer, scaler, device, mixup_alpha)
        val_loss, _, val_f1 = validate_one_epoch(model, val_loader, criterion, device)

        if scheduler is not None:
            # If using ReduceLROnPlateau, step based on validation metric
            if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                scheduler.step(val_f1)
            else:
                # If using CosineAnnealing, step based on epoch
                scheduler.step()

        history["train_loss"].append(train_loss)
        history["train_f1"].append(train_f1)
        history["val_loss"].append(val_loss)
        history["val_f1"].append(val_f1)

        if writer is not None:
            writer.add_scalar("Loss/Training", train_loss, epoch)
            writer.add_scalar("F1/Training", train_f1, epoch)
            writer.add_scalar("Loss/Validation", val_loss, epoch)
            writer.add_scalar("F1/Validation", val_f1, epoch)

        if verbose > 0 and (epoch % verbose == 0 or epoch == 1):
            print(
                f"Epoch {epoch:3d}/{epochs} | "
                f"Train: Loss={train_loss:.4f}, F1={train_f1:.4f} | "
                f"Val: Loss={val_loss:.4f}, F1={val_f1:.4f} | "
                f"LR: {scheduler.get_last_lr()[0] if scheduler is not None else optimizer.param_groups[0]['lr']:.6f}"
            )

        current_metric = history[evaluation_metric][-1]
        is_improvement = (
            (current_metric > best_metric)
            if mode == "max"
            else (current_metric < best_metric)
        )

        if is_improvement:
            best_metric = current_metric
            best_epoch = epoch
            torch.save(model.state_dict(), "models/" + experiment_name + "_model.pt")
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience and patience > 0:
                print(f"Early stopping triggered after {epoch} epochs.")
                break

    if restore_best_weights and patience > 0:
        model.load_state_dict(torch.load("models/" + experiment_name + "_model.pt"))
        print(
            f"Best model restored from epoch {best_epoch} with {evaluation_metric} {best_metric:.4f}"
        )
    elif patience == 0:
        torch.save(model.state_dict(), "models/" + experiment_name + "_model.pt")

    if writer is not None:
        writer.close()

    return model, history

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd


def analyze_performance(model, loader, device, class_names):
    model.eval()
    all_preds = []
    all_labels = []

    print("Generating predictions for Confusion Matrix...")

    with torch.no_grad():
        for inputs, labels, _ in tqdm(loader, desc="Validating"):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # --- 1. Confusion Matrix ---
    cm = confusion_matrix(all_labels, all_preds)

    # Normalize by row (True Label) to see recall percentages
    cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]

    plt.figure(figsize=(10, 8))
    sns.heatmap(
        cm_normalized,
        annot=True,
        fmt=".2f",
        cmap="Blues",
        xticklabels=class_names,
        yticklabels=class_names,
    )
    plt.ylabel("True Label (Ground Truth)")
    plt.xlabel("Predicted Label")
    plt.title("Normalized Confusion Matrix")
    plt.show()

    # --- 2. Classification Report ---
    print("\n" + "=" * 60)
    print("DETAILED CLASSIFICATION REPORT")
    print("=" * 60)
    print(
        classification_report(all_labels, all_preds, target_names=class_names, digits=4)
    )

    return cm

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction="mean"):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.reduction = reduction
        # Alpha should be the class weights you already calculated
        # If alpha is None, no class weighting is applied
        self.alpha = alpha

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, weight=self.alpha, reduction="none")
        pt = torch.exp(-ce_loss)  # prevents nans when probability is 0
        focal_loss = ((1 - pt) ** self.gamma) * ce_loss

        if self.reduction == "mean":
            return focal_loss.mean()
        elif self.reduction == "sum":
            return focal_loss.sum()
        else:
            return focal_loss

## üõ†Ô∏è **Transfer Learning**


In [None]:
class EfficientNetCustom(nn.Module):
    """
    Instantiates EfficientNet-B0 with ImageNet weights.
    Replaces the classifier head with a high-dropout dense layer to prevent overfitting.
    """

    def __init__(self, num_classes, dropout_rate=0.4):
        super().__init__()
        self.dropout_rate = dropout_rate
        self.num_classes = num_classes

        self.weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
        self.backbone = torchvision.models.efficientnet_b0(weights=self.weights)
        
        in_features = self.backbone.classifier[1].in_features
        self.backbone.classifier = nn.Sequential(
            nn.Dropout(self.dropout_rate),
            nn.Linear(in_features, self.num_classes),  # type: ignore
        )
        self.freeze_backbone()
    
    def freeze_backbone(self):
        # Freeze all layers except the classifier head
        for name, param in self.backbone.named_parameters():
            if not name.startswith("classifier"):
                param.requires_grad = False
        # Ensure classifier params are trainable
        for param in self.backbone.classifier.parameters():
            param.requires_grad = True

    def unfreeze_backbone(self, n_layers, all=False):
        if all:
            for param in self.backbone.parameters():
                param.requires_grad = True
            return
        # Unfreeze the last n_layers of the backbone (excluding classifier which is already trainable)
        child_counter = 0
        for child in reversed(list(self.backbone.children())):
            child_counter += 1
            if child_counter <= n_layers:
                for param in child.parameters():
                    param.requires_grad = True

    def forward(self, x):
        return self.backbone(x)

In [None]:
tl_model = EfficientNetCustom(
    num_classes, DROPOUT_RATE
).to(device)

tl_model.freeze_backbone()

In [None]:
from sklearn.utils.class_weight import compute_class_weight

# calcolo class weights dal TRAIN
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.arange(num_classes),
    y=train_df["label_index"].values,
)

class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)

criterion = FocalLoss(alpha=class_weights_tensor, gamma=2.0, reduction="mean")

print("Class weights:", class_weights)

In [None]:
# Setup training
experiment_name = "transfer_learning"
writer = SummaryWriter("./" + logs_dir + "/" + experiment_name)

optimizer = AdamW(tl_model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=EPOCHS, eta_min=1e-6
)

scaler = torch.amp.GradScaler(enabled=(device.type == "cuda"))  # type: ignore

In [None]:
%%time

# Train with transfer learning
tl_model, tl_history = fit(
    model=tl_model,
    train_loader=train_tl_loader,
    val_loader=val_tl_loader,
    epochs=EPOCHS,
    criterion=criterion,
    optimizer=optimizer,
    scaler=scaler,
    device=device,
    scheduler=scheduler,
    writer=writer,
    verbose=VERBOSE,
    experiment_name=experiment_name,
    patience=PATIENCE,
    mixup_alpha=0.0,
)

final_f1_score = max(tl_history['val_f1'])
print(f'Maximum f1 score: {final_f1_score}')

In [None]:
analyze_performance(tl_model, val_loader, device, class_names)

## **Fine-Tuning**


In [None]:
train_dataset.transform = train_transform_ft

train_ft_loader = make_loader(train_dataset, BATCH_SIZE, shuffle=True)
val_ft_loader = make_loader(val_dataset, BATCH_SIZE, shuffle=False)

ft_model = EfficientNetCustom(
    num_classes, FT_DROPOUT_RATE,
).to(device)
ft_model.load_state_dict(torch.load(f"models/{experiment_name}_model.pt"))

# ft_model.unfreeze_backbone(N_LAYERS_TO_UNFREEZE, all=False)
ft_model.unfreeze_backbone(N_LAYERS_TO_UNFREEZE, all=True)

total_params = sum(p.numel() for p in ft_model.parameters())
trainable_params = sum(p.numel() for p in ft_model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Frozen parameters: {total_params - trainable_params:,}")

In [None]:
experiment_name = "fine_tuning"
writer = SummaryWriter("./" + logs_dir + "/" + experiment_name)

optimizer = AdamW(
    ft_model.parameters(), lr=FT_LEARNING_RATE, weight_decay=FT_WEIGHT_DECAY
)
scaler = torch.amp.GradScaler(enabled=(device.type == "cuda"))  # type: ignore

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=EPOCHS, eta_min=1e-6
)

In [None]:
%%time

ft_model, ft_history = fit(
    model=ft_model,
    train_loader=train_ft_loader,
    val_loader=val_ft_loader,
    epochs=EPOCHS,
    criterion=criterion,
    optimizer=optimizer,
    scaler=scaler,
    device=device,
    scheduler=scheduler,
    writer=writer,
    verbose=VERBOSE,
    experiment_name=experiment_name,
    patience=PATIENCE,
    mixup_alpha=0.2,
)

final_f1_score = max(ft_history['val_f1'])
print(f'Maximum f1 score: {final_f1_score}')

In [None]:
analyze_performance(ft_model, val_ft_loader, device, class_names)

## **Evaluation**


In [None]:
# Plot TL vs FT
def plot_history(tl_history, ft_history):
    epochs_tl = len(tl_history["val_f1"])
    epochs_ft = len(ft_history["val_f1"])

    plt.figure(figsize=(14, 5))

    # F1
    plt.subplot(1, 2, 1)
    plt.plot(tl_history["train_f1"], label="Train F1 TL")
    plt.plot(tl_history["val_f1"], label="Val F1 TL")
    plt.plot(
        range(epochs_tl, epochs_tl + epochs_ft),
        ft_history["train_f1"],
        label="Train F1 FT",
    )
    plt.plot(
        range(epochs_tl, epochs_tl + epochs_ft), ft_history["val_f1"], label="Val F1 FT"
    )
    plt.xlabel("Epoch")
    plt.ylabel("F1 macro")
    plt.title("Andamento F1 TL+FT")
    plt.legend()
    plt.grid(True)

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(tl_history["train_loss"], label="Train Loss TL")
    plt.plot(tl_history["val_loss"], label="Val Loss TL")
    plt.plot(
        range(epochs_tl, epochs_tl + epochs_ft),
        ft_history["train_loss"],
        label="Train Loss FT",
    )
    plt.plot(
        range(epochs_tl, epochs_tl + epochs_ft),
        ft_history["val_loss"],
        label="Val Loss FT",
    )
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Andamento Loss TL+FT")
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()

In [None]:
plot_history(tl_history, ft_history)

In [None]:
from sklearn.metrics import f1_score


def evaluate_macro_f1(model, loader, device):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for images, labels, _ in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)

            y_true.extend(labels.cpu().tolist())
            y_pred.extend(preds.cpu().tolist())

    return f1_score(y_true, y_pred, average="macro")

In [None]:
best_model = EfficientNetCustom(
    num_classes, DROPOUT_RATE,
).to(device)
best_model.load_state_dict(torch.load(f"models/{experiment_name}_model.pt"))
best_model.eval()

In [None]:
train_f1 = evaluate_macro_f1(best_model, train_ft_loader, device)
val_f1 = evaluate_macro_f1(best_model, val_ft_loader, device)

print(f"F1 TRAIN (macro): {train_f1:.4f}")
print(f"F1 VAL   (macro): {val_f1:.4f}")

## **Inference on test_data**


In [None]:
import torch.nn.functional as F

test_dataset = MaskedFixedTileDataset(
    dataframe=None, img_dir=test_set_dir, transform=data_transforms, target_size=IMG_RESIZE, debug_max=None
)

test_loader = make_loader(test_dataset, BATCH_SIZE, shuffle=False, drop_last=False)

print(f"Starting Inference on {len(test_dataset)} patches...")

# Dictionary to store probabilities: { 'img_123.png': [ [p0, p1, p2, p3], ... ] }
patch_probs = {}

with torch.no_grad():
    for inputs, _, parent_ids in tqdm(test_loader, desc="Inference"):
        inputs = inputs.to(device)

        # TTA Strategy:
        # 1. Original
        out1 = F.softmax(best_model(inputs), dim=1)

        # 2. Horizontal Flip
        inputs_hf = torch.flip(inputs, dims=[3])
        out2 = F.softmax(best_model(inputs_hf), dim=1)

        # 3. Vertical Flip
        inputs_vf = torch.flip(inputs, dims=[2])
        out3 = F.softmax(best_model(inputs_vf), dim=1)

        # Average predictions
        avg_probs = (out1 + out2 + out3) / 3.0
        avg_probs = avg_probs.cpu().numpy()

        # Group by Parent Image
        for i, pid in enumerate(parent_ids):
            if pid not in patch_probs:
                patch_probs[pid] = []
            patch_probs[pid].append(probs[i])

In [None]:
print("Aggregating predictions (Soft Voting)...")
final_rows = []

# 2. Aggregation Loop
for img_name, prob_list in patch_probs.items():
    # Stack into shape (N_patches, 4)
    prob_matrix = np.array(prob_list)
    
    # SOFT VOTING: Average the probabilities across all patches for this slide
    avg_probs = np.mean(prob_matrix, axis=0)
    
    # The class with the highest average probability is our prediction
    pred_class = np.argmax(avg_probs)
    
    # Format the filename to match sample_index constraints if needed
    # Assuming sample_index is the full filename like "img_0.png" or just "0"
    # Adjusting to match your snippet's likely expectation:
    sample_index = img_name 
    
    final_rows.append({
        "sample_index": sample_index,
        "label": pred_class
    })

In [None]:
# map label index to class name name
for row in final_rows:
    row["label"] = class_names[row["label"]]
 
submission_df = pd.DataFrame(final_rows)
submission_df = submission_df.sort_values(by="sample_index")

os.makedirs(os.path.join(current_dir, "submission"), exist_ok=True)
submission_file_pos = os.path.join(current_dir, "submission", "efficient_net_b0.csv")
submission_df.to_csv(
    submission_file_pos, index=False
)

print("Submission file created: ", submission_file_pos)
submission_df.head(2000)