In [1]:
from google.colab import drive
drive.mount("/gdrive")

import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split


Mounted at /gdrive


In [2]:
# Set seed for reproducibility
SEED = 42

# Import necessary libraries
import os

# Set environment variables before importing modules
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# Import necessary modules
import logging
import random
import numpy as np

# Set seeds for random number generators in NumPy and Python
np.random.seed(SEED)
random.seed(SEED)

# Import PyTorch
import torch
torch.manual_seed(SEED)
from torch import nn
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter
import torchvision
from torchvision.transforms import v2 as transforms
from torch.utils.data import TensorDataset, DataLoader
!pip install torchview
from torchview import draw_graph

# Configurazione di TensorBoard e directory
logs_dir = "tensorboard"
!pkill -f tensorboard
%load_ext tensorboard
!mkdir -p models

if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True
else:
    device = torch.device("cpu")

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {device}")

# Import other libraries
import requests
from io import BytesIO
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Configure plot display settings
sns.set(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline


Collecting torchview
  Downloading torchview-0.2.7-py3-none-any.whl.metadata (13 kB)
Downloading torchview-0.2.7-py3-none-any.whl (26 kB)
Installing collected packages: torchview
Successfully installed torchview-0.2.7
PyTorch version: 2.9.0+cu126
Device: cuda


In [3]:
current_dir = "/gdrive/My Drive/[2025-2026] AN2DL/Challenge 2/dataset"
train_data_dir = f"{current_dir}/train_data"
test_data_dir = f"{current_dir}/test_data"
csv_path = f"{current_dir}/train_labels.csv"

import pandas as pd
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split

# ============================================================
# IGNORA MANUALMENTE ALCUNI SAMPLE
# ============================================================

# >>> MODIFICA SOLO QUESTA LISTA <<<
# Usa sempre il nome immagine completo: "img_XXXX.png"
SAMPLES_TO_IGNORE = [
    "img_0102.png", "img_0104.png", "img_0108.png", "img_0109.png", "img_0112.png",
    "img_0130.png", "img_0147.png", "img_0152.png", "img_0153.png", "img_0168.png",
    "img_0173.png", "img_0176.png", "img_0182.png", "img_0189.png", "img_0193.png",
    "img_0203.png", "img_0213.png", "img_0218.png", "img_0223.png", "img_0228.png",
    "img_0232.png", "img_0237.png", "img_0239.png", "img_0249.png", "img_0250.png",
    "img_0256.png", "img_0264.png", "img_0269.png", "img_0270.png", "img_0271.png",
    "img_0276.png", "img_0277.png", "img_0282.png", "img_0290.png", "img_0291.png",
    "img_0304.png", "img_0308.png", "img_0318.png", "img_0322.png", "img_0323.png",
    "img_0328.png", "img_0336.png", "img_0342.png", "img_0348.png", "img_0357.png",
    "img_0365.png", "img_0368.png", "img_0369.png", "img_0370.png", "img_0379.png",
    "img_0384.png", "img_0386.png", "img_0390.png", "img_0391.png", "img_0394.png",
    "img_0404.png", "img_0406.png", "img_0411.png", "img_0413.png", "img_0418.png",
    "img_0422.png", "img_0426.png", "img_0428.png", "img_0430.png", "img_0436.png",
    "img_0438.png", "img_0442.png", "img_0446.png", "img_0447.png", "img_0448.png",
    "img_0451.png", "img_0454.png", "img_0455.png", "img_0456.png", "img_0469.png",
    "img_0471.png", "img_0478.png", "img_0480.png", "img_0481.png", "img_0487.png",
    "img_0489.png", "img_0492.png", "img_0493.png", "img_0495.png", "img_0503.png",
    "img_0505.png", "img_0509.png", "img_0511.png", "img_0512.png", "img_0514.png",
    "img_0516.png", "img_0518.png", "img_0520.png", "img_0521.png", "img_0526.png",
    "img_0527.png", "img_0529.png", "img_0536.png", "img_0554.png", "img_0555.png",
    "img_0559.png", "img_0572.png", "img_0574.png", "img_0586.png", "img_0589.png",
    "img_0592.png", "img_0594.png", "img_0597.png", "img_0600.png", "img_0606.png",
    "img_0608.png", "img_0612.png", "img_0629.png", "img_0631.png", "img_0648.png",
    "img_0650.png", "img_0652.png", "img_0653.png", "img_0655.png", "img_0665.png",
    "img_0673.png", "img_0681.png", "img_0687.png", "img_0703.png", "img_0714.png",
    "img_0731.png", "img_0733.png", "img_0735.png", "img_0748.png", "img_0753.png",
    "img_0755.png", "img_0758.png", "img_0767.png", "img_0771.png", "img_0796.png",
    "img_0800.png", "img_0804.png", "img_0813.png", "img_0817.png", "img_0819.png",
    "img_0822.png", "img_0825.png", "img_0826.png", "img_0829.png", "img_0832.png",
    "img_0866.png", "img_0868.png", "img_0892.png", "img_0893.png", "img_0898.png",
    "img_0903.png", "img_0906.png", "img_0907.png", "img_0910.png", "img_0913.png",
    "img_0918.png", "img_0919.png", "img_0924.png", "img_0930.png", "img_0935.png",
    "img_0936.png", "img_0937.png", "img_0941.png", "img_0944.png", "img_0958.png",
    "img_0960.png", "img_0963.png", "img_0973.png", "img_0982.png", "img_0991.png",
    "img_0992.png", "img_0999.png", "img_1004.png", "img_1005.png", "img_1006.png",
    "img_1009.png", "img_1011.png", "img_1015.png", "img_1017.png", "img_1021.png",
    "img_1023.png", "img_1035.png", "img_1037.png", "img_1039.png", "img_1041.png",
    "img_1046.png", "img_1062.png", "img_1086.png", "img_1108.png", "img_1109.png",
    "img_1114.png", "img_1124.png", "img_1125.png", "img_1133.png", "img_1145.png",
    "img_1146.png", "img_1150.png", "img_1156.png", "img_1177.png", "img_1184.png",
    "img_1186.png", "img_1202.png", "img_1206.png", "img_1212.png", "img_1217.png",
    "img_1218.png", "img_1220.png", "img_1223.png", "img_1229.png", "img_1232.png",
    "img_1236.png", "img_1241.png", "img_1245.png", "img_1258.png", "img_1261.png",
    "img_1263.png", "img_1267.png", "img_1271.png", "img_1274.png", "img_1277.png",
    "img_1280.png", "img_1281.png", "img_1298.png", "img_1300.png", "img_1312.png",
    "img_1317.png", "img_1320.png", "img_1326.png", "img_1327.png", "img_1329.png",
    "img_1332.png", "img_1334.png", "img_1335.png", "img_1338.png", "img_1340.png",
    "img_1360.png", "img_1361.png", "img_1362.png", "img_1367.png", "img_1369.png",
    "img_1375.png", "img_1376.png", "img_1377.png", "img_1381.png", "img_1385.png",
    "img_1388.png", "img_1389.png", "img_1392.png",
]

df = pd.read_csv(csv_path, header=None, names=["sample_index", "label"])
df = df.iloc[1:].reset_index(drop=True)

# Numero iniziale di righe
n_before = len(df)

# Filtro logico del dataframe
df = df[~df["sample_index"].isin(SAMPLES_TO_IGNORE)].reset_index(drop=True)

# Report
n_after = len(df)
print(f"Sample ignorati (logico): {n_before - n_after}")
print(f"Sample rimanenti nel dataset: {n_after}")

# Check di sicurezza
assert not df["sample_index"].isin(SAMPLES_TO_IGNORE).any(), \
    "Errore: alcuni sample ignorati sono ancora presenti nel dataframe"


# Label mapping
class_names = sorted(df["label"].unique())
label_to_index = {name: idx for idx, name in enumerate(class_names)}
df["label_index"] = df["label"].map(label_to_index)
num_classes = len(class_names)
print(f"Number of classes: {num_classes}")

# Train/validation split (stratified)
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df["label"],
    random_state=SEED
)
print(f"Train samples: {len(train_df)}, Val samples: {len(val_df)}")



class MaskedCropDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None, padding=10):
        self.df = dataframe.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform
        self.padding = padding

    def _get_bbox_from_mask(self, mask):
        ys, xs = np.where(mask > 0)
        if len(xs) == 0 or len(ys) == 0:
            return None
        x1, x2 = xs.min(), xs.max()
        y1, y2 = ys.min(), ys.max()
        return x1, y1, x2, y2

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_name = row["sample_index"]

        img_path  = os.path.join(self.img_dir, img_name)
        mask_path = os.path.join(self.img_dir, img_name.replace("img_", "mask_"))

        image = np.array(Image.open(img_path).convert("RGB"))
        mask  = np.array(Image.open(mask_path).convert("L"))

        bbox = self._get_bbox_from_mask(mask)

        if bbox is not None:
            x1, y1, x2, y2 = bbox
            h, w = image.shape[:2]
            x1 = max(0, x1 - self.padding)
            y1 = max(0, y1 - self.padding)
            x2 = min(w, x2 + self.padding)
            y2 = min(h, y2 + self.padding)
            image = image[y1:y2, x1:x2]

        image = Image.fromarray(image)

        if self.transform:
            image = self.transform(image)

        label = row["label_index"]
        return image, label

    def __len__(self):
        return len(self.df)


# Transforms
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = MaskedCropDataset(train_df, train_data_dir, transform=data_transforms)
val_dataset   = MaskedCropDataset(val_df, train_data_dir, transform=data_transforms)


def dataset_to_numpy(ds):
    loader = DataLoader(ds, batch_size=32, shuffle=False)
    n = len(ds)
    X = torch.zeros((n, 3, 224, 224), dtype=torch.float32)
    y = torch.zeros(n, dtype=torch.int64)
    idx_offset = 0
    for images, labels in loader:
        bs = images.shape[0]
        X[idx_offset:idx_offset+bs] = images
        y[idx_offset:idx_offset+bs] = labels
        idx_offset += bs
    return X.numpy(), y.numpy()

print("Loading training data...")
X_train, y_train = dataset_to_numpy(train_dataset)
print("Loading validation data...")
X_val, y_val = dataset_to_numpy(val_dataset)




Sample ignorati (logico): 248
Sample rimanenti nel dataset: 1164
Number of classes: 4
Train samples: 931, Val samples: 233
Loading training data...
Loading validation data...


In [95]:
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")


X_train shape: (931, 3, 224, 224), y_train shape: (931,)
X_val shape: (233, 3, 224, 224), y_val shape: (233,)


In [96]:
# Define the input shape and number of classes
input_shape = (3, 224, 224)
num_classes = len(class_names)

print("Input Shape:", input_shape)
print("Number of Classes:", num_classes)


Input Shape: (3, 224, 224)
Number of Classes: 4


In [97]:
# Define the batch size
BATCH_SIZE = 32


In [98]:
def make_loader(ds, batch_size, shuffle, drop_last):
    cpu_cores = os.cpu_count() or 2
    num_workers = max(2, min(4, cpu_cores))
    return DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
        drop_last=drop_last,
        num_workers=num_workers,
        pin_memory=True,
        pin_memory_device="cuda" if torch.cuda.is_available() else "",
        prefetch_factor=4,
    )


In [99]:
# Training parameters
LEARNING_RATE = 1e-4
EPOCHS = 200
PATIENCE = 20
DROPOUT_RATE = 0.5
criterion = nn.CrossEntropyLoss()

print("Epochs:", EPOCHS)
print("Batch Size:", BATCH_SIZE)
print("Learning Rate:", LEARNING_RATE)
print("Dropout Rate:", DROPOUT_RATE)
print("Patience:", PATIENCE)


Epochs: 200
Batch Size: 32
Learning Rate: 0.0001
Dropout Rate: 0.5
Patience: 20


Training Functions


In [100]:
def train_one_epoch(model, train_loader, criterion, optimizer, scaler, device):
    model.train()
    running_loss = 0.0
    all_predictions = []
    all_targets = []
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
            logits = model(inputs)
            loss = criterion(logits, targets)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item() * inputs.size(0)
        predictions = logits.argmax(dim=1)
        all_predictions.append(predictions.cpu().numpy())
        all_targets.append(targets.cpu().numpy())
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = accuracy_score(np.concatenate(all_targets), np.concatenate(all_predictions))
    return epoch_loss, epoch_acc


def validate_one_epoch(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_predictions = []
    all_targets = []
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                logits = model(inputs)
                loss = criterion(logits, targets)
            running_loss += loss.item() * inputs.size(0)
            predictions = logits.argmax(dim=1)
            all_predictions.append(predictions.cpu().numpy())
            all_targets.append(targets.cpu().numpy())
    epoch_loss = running_loss / len(val_loader.dataset)
    epoch_acc = accuracy_score(np.concatenate(all_targets), np.concatenate(all_predictions))
    return epoch_loss, epoch_acc


def fit(model, train_loader, val_loader, epochs, criterion, optimizer, scaler, device,
        patience=0, evaluation_metric="val_acc", mode='max',
        restore_best_weights=True, writer=None, verbose=1, experiment_name=""):
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}
    best_metric = float('-inf') if mode == 'max' else float('inf')
    best_epoch = 0
    patience_counter = 0

    print(f"Training {epochs} epochs...")
    for epoch in range(1, epochs + 1):
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, scaler, device)
        val_loss, val_acc = validate_one_epoch(model, val_loader, criterion, device)
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_acc)
        history['val_acc'].append(val_acc)

        if writer is not None:
            writer.add_scalar('Loss/Training', train_loss, epoch)
            writer.add_scalar('Loss/Validation', val_loss, epoch)
            writer.add_scalar('Accuracy/Training', train_acc, epoch)
            writer.add_scalar('Accuracy/Validation', val_acc, epoch)

        if verbose > 0 and (epoch % verbose == 0 or epoch == 1):
            print(f"Epoch {epoch:3d}/{epochs} | Train: Loss={train_loss:.4f}, Acc={train_acc:.4f} | Val: Loss={val_loss:.4f}, Acc={val_acc:.4f}")

        current_metric = history[evaluation_metric][-1]
        is_improvement = (current_metric > best_metric) if mode == 'max' else (current_metric < best_metric)
        if is_improvement:
            best_metric = current_metric
            best_epoch = epoch
            torch.save(model.state_dict(), "models/"+experiment_name+'_model.pt')
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience and patience > 0:
                print(f"Early stopping triggered after {epoch} epochs.")
                break

    if restore_best_weights and patience > 0:
        model.load_state_dict(torch.load("models/"+experiment_name+'_model.pt'))
        print(f"Best model restored from epoch {best_epoch} with {evaluation_metric} {best_metric:.4f}")
    elif patience == 0:
        torch.save(model.state_dict(), "models/"+experiment_name+'_model.pt')

    if writer is not None:
        writer.close()

    return model, history


## Transfer Learning


In [101]:
class ResNet18(nn.Module):
    """ResNet18 pretrained su ImageNet, con head custom per classificazione."""
    def __init__(self, num_classes, dropout_rate=0.3, freeze_backbone=True):
        super().__init__()
        self.backbone = torchvision.models.resnet18(
            weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1
        )

        # Congelo tutto il backbone se richiesto
        if freeze_backbone:
            for p in self.backbone.parameters():
                p.requires_grad = False

        in_features = self.backbone.fc.in_features
        # Sostituisco la fully connected finale
        self.backbone.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(in_features, num_classes)
        )

    def forward(self, x):
        return self.backbone(x)


In [102]:
## Transfer Learning con ResNet18

tl_model = ResNet18(
    num_classes=num_classes,
    dropout_rate=DROPOUT_RATE,
    freeze_backbone=True
).to(device)

train_augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomAffine(degrees=0, translate=(0.2, 0.2)),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.2))
])

train_tl_ds = MaskedCropDataset(train_df, train_data_dir, transform=data_transforms)
val_tl_ds   = MaskedCropDataset(val_df,   train_data_dir, transform=data_transforms)

train_tl_loader = make_loader(train_tl_ds, BATCH_SIZE, shuffle=True,  drop_last=False)
val_tl_loader   = make_loader(val_tl_ds,   BATCH_SIZE, shuffle=False, drop_last=False)

experiment_name = "resnet18_transfer_learning"
writer = SummaryWriter("./"+logs_dir+"/"+experiment_name)

optimizer = torch.optim.Adam(
    tl_model.parameters(),  # alleniamo SOLO la nuova fc perché il backbone è congelato
    lr=LEARNING_RATE
)
scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))


In [103]:
%%time
# Train with transfer learning
tl_model, tl_history = fit(
    model=tl_model,
    train_loader=train_tl_loader,
    val_loader=val_tl_loader,
    epochs=EPOCHS,
    criterion=criterion,
    optimizer=optimizer,
    scaler=scaler,
    device=device,
    writer=writer,
    verbose=5,
    experiment_name=experiment_name,
    patience=PATIENCE
)

final_val_acc = round(max(tl_history['val_acc']) * 100, 2)
print(f'Final validation accuracy: {final_val_acc}%')

Training 200 epochs...
Epoch   1/200 | Train: Loss=1.5135, Acc=0.2975 | Val: Loss=1.4068, Acc=0.3047
Epoch   5/200 | Train: Loss=1.4808, Acc=0.3190 | Val: Loss=1.3882, Acc=0.2833
Epoch  10/200 | Train: Loss=1.4329, Acc=0.3201 | Val: Loss=1.3840, Acc=0.2876
Epoch  15/200 | Train: Loss=1.4136, Acc=0.3340 | Val: Loss=1.3817, Acc=0.2918
Epoch  20/200 | Train: Loss=1.4124, Acc=0.3362 | Val: Loss=1.3763, Acc=0.2575
Early stopping triggered after 21 epochs.
Best model restored from epoch 1 with val_acc 0.3047
Final validation accuracy: 30.47%
CPU times: user 22.4 s, sys: 9.11 s, total: 31.5 s
Wall time: 8min 28s


## Fine-Tuning


In [107]:
# Carico il modello fine-tuning
ft_model = ResNet18(
    num_classes=num_classes,
    dropout_rate=DROPOUT_RATE,
    freeze_backbone=False
).to(device)

# Carico i pesi del transfer learning
ft_model.load_state_dict(
    torch.load("models/resnet18_transfer_learning_model.pt")
)

# 1️⃣ Congelo TUTTO
for param in ft_model.backbone.parameters():
    param.requires_grad = True                                   ###palese fa cagare
"""
2️⃣ Sblocco SOLO layer4
for param in ft_model.backbone.layer3.parameters():
    param.requires_grad = True
for param in ft_model.backbone.layer4.parameters():
    param.requires_grad = True
"""
# 3️⃣ Sblocco SEMPRE la fully connected finale
for param in ft_model.backbone.fc.parameters():
    param.requires_grad = True

# Debug: contiamo i parametri
total_params = sum(p.numel() for p in ft_model.parameters())
trainable_params = sum(p.numel() for p in ft_model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Frozen parameters: {total_params - trainable_params:,}")



Total parameters: 11,178,564
Trainable parameters: 11,178,564
Frozen parameters: 0


In [108]:
## Fine-Tuning con ResNet18 (sblocco parziale corretto)

# 1️⃣ Istanzia il modello
ft_model = ResNet18(
    num_classes=num_classes,
    dropout_rate=DROPOUT_RATE,
    freeze_backbone=False
).to(device)

# 2️⃣ Carica i pesi della fase di transfer learning
ft_model.load_state_dict(
    torch.load("models/resnet18_transfer_learning_model.pt", map_location=device)
)

# 3️⃣ Congela TUTTO il backbone
for param in ft_model.backbone.parameters():
    param.requires_grad = False

# 4️⃣ Sblocca SOLO quello che davvero vuoi fine-tunare
# ⚠️ Visto che stai overfittando, layer3 è rischioso
# Se vuoi provarlo, fallo DOPO aver stabilizzato layer4
for param in ft_model.backbone.layer4.parameters():
    param.requires_grad = True

# Fully connected SEMPRE sbloccata
for param in ft_model.backbone.fc.parameters():
    param.requires_grad = True

# 5️⃣ Report parametri
total_params = sum(p.numel() for p in ft_model.parameters())
trainable_params = sum(p.numel() for p in ft_model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Frozen parameters: {total_params - trainable_params:,}")


train_ft_loader = train_tl_loader
val_ft_loader   = val_tl_loader

# 6️⃣ Optimizer (LR diversi)
experiment_name = "resnet18_fine_tuning"
writer = SummaryWriter("./" + logs_dir + "/" + experiment_name)

optimizer = torch.optim.Adam(
    [
        {"params": ft_model.backbone.layer4.parameters(), "lr": 1e-5},
        {"params": ft_model.backbone.fc.parameters(),     "lr": 5e-4},
    ]
)

# 7️⃣ Mixed precision (ok)
scaler = torch.amp.GradScaler(enabled=(device.type == "cuda"))


Total parameters: 11,178,564
Trainable parameters: 8,395,780
Frozen parameters: 2,782,784


In [109]:
%%time
ft_model, ft_history = fit(
    model=ft_model,
    train_loader=train_ft_loader,
    val_loader=val_ft_loader,
    epochs=EPOCHS,
    criterion=criterion,
    optimizer=optimizer,
    scaler=scaler,
    device=device,
    writer=writer,
    verbose=5,
    experiment_name=experiment_name,
    patience=PATIENCE
)

final_val_acc = round(max(ft_history['val_acc']) * 100, 2)
print(f'Final validation accuracy: {final_val_acc}%')


Training 200 epochs...
Epoch   1/200 | Train: Loss=1.5009, Acc=0.2911 | Val: Loss=1.3941, Acc=0.2661
Epoch   5/200 | Train: Loss=1.2909, Acc=0.3942 | Val: Loss=1.4122, Acc=0.2876
Epoch  10/200 | Train: Loss=1.1074, Acc=0.4973 | Val: Loss=1.4502, Acc=0.2618
Epoch  15/200 | Train: Loss=0.9094, Acc=0.6434 | Val: Loss=1.5051, Acc=0.3004
Epoch  20/200 | Train: Loss=0.7037, Acc=0.7411 | Val: Loss=1.6395, Acc=0.3090
Early stopping triggered after 24 epochs.
Best model restored from epoch 4 with val_acc 0.3219
Final validation accuracy: 32.19%
CPU times: user 31.1 s, sys: 10.6 s, total: 41.8 s
Wall time: 9min 45s


Evaluation


In [110]:
from sklearn.metrics import f1_score

def evaluate_macro_f1(model, loader, device):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            y_true.extend(labels.cpu().tolist())
            y_pred.extend(preds.cpu().tolist())
    return f1_score(y_true, y_pred, average="macro")


In [111]:
best_model = ResNet18(num_classes, DROPOUT_RATE, freeze_backbone=False).to(device)
best_model.load_state_dict(torch.load("models/resnet18_fine_tuning_model.pt"))
best_model.eval()


ResNet18(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_r

In [112]:
train_f1 = evaluate_macro_f1(best_model, train_ft_loader, device)
val_f1   = evaluate_macro_f1(best_model, val_ft_loader, device)
print(f"F1 TRAIN (macro): {train_f1:.4f}")
print(f"F1 VAL   (macro): {val_f1:.4f}")


F1 TRAIN (macro): 0.3004
F1 VAL   (macro): 0.1755


Inference on test_data


In [113]:
class InferenceImageDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        files = sorted(os.listdir(img_dir))
        seen = set()
        self.files = []
        for f in files:
            base = os.path.splitext(f)[0]
            if base not in seen:
                seen.add(base)
                self.files.append(f)

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.files[idx])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.files[idx]


In [114]:
class MaskedInferenceDataset(Dataset):
    def __init__(self, img_dir, transform=None, padding=10):
        self.img_dir = img_dir
        self.transform = transform
        self.padding = padding

        # Prendo solo le immagini img_xxxx.png
        self.images = sorted([
            f for f in os.listdir(img_dir)
            if f.startswith("img_")
        ])

    def _get_bbox_from_mask(self, mask):
        ys, xs = np.where(mask > 0)
        if len(xs) == 0:
            return None
        return xs.min(), ys.min(), xs.max(), ys.max()

    def __getitem__(self, idx):
        img_name = self.images[idx]
        mask_name = img_name.replace("img_", "mask_")

        img_path  = os.path.join(self.img_dir, img_name)
        mask_path = os.path.join(self.img_dir, mask_name)

        image = np.array(Image.open(img_path).convert("RGB"))
        mask  = np.array(Image.open(mask_path).convert("L"))

        bbox = self._get_bbox_from_mask(mask)
        if bbox is not None:
            x1, y1, x2, y2 = bbox
            h, w = image.shape[:2]
            image = image[
                max(0, y1 - self.padding):min(h, y2 + self.padding),
                max(0, x1 - self.padding):min(w, x2 + self.padding)
            ]

        image = Image.fromarray(image)
        if self.transform:
            image = self.transform(image)

        return image, img_name

    def __len__(self):
        return len(self.images)


In [115]:
test_dataset = MaskedInferenceDataset(
    img_dir=test_data_dir,
    transform=data_transforms
)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=4
)


best_model = ResNet18(
    num_classes=num_classes,
    dropout_rate=DROPOUT_RATE,
    freeze_backbone=False
).to(device)

best_model.load_state_dict(
    torch.load("models/resnet18_fine_tuning_model.pt")
)

best_model.eval()

predictions = []


with torch.no_grad():
    for images, names in test_loader:
        images = images.to(device)
        outputs = best_model(images)
        preds = torch.argmax(outputs, dim=1)
        predictions.extend(preds.cpu().tolist())


In [116]:
submission_df = pd.DataFrame({
    "sample_index": test_dataset.images,
    "label": [class_names[p] for p in predictions]
})

os.makedirs(os.path.join(current_dir, "submission"), exist_ok=True)
submission_df.to_csv(
    os.path.join(current_dir, "submission", "fixed_input_shape.csv"),
    index=False
)

print(len(submission_df), "righe ? OK")
submission_df.head()


954 righe ? OK


Unnamed: 0,sample_index,label
0,img_0000.png,Luminal B
1,img_0001.png,Luminal B
2,img_0002.png,Luminal B
3,img_0003.png,Luminal B
4,img_0004.png,Luminal B
