In [None]:
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py

print("File scaricati con successo!")

In [None]:
pip install torch-lr-finder

In [None]:
import torch
import torchvision
from torchvision import transforms
from torchvision.datasets import CocoDetection
from torchvision.ops import box_convert
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import Subset
import os
import json
from PIL import Image
from engine import train_one_epoch, evaluate
from torch_lr_finder import LRFinder
from PIL import Image
from engine import train_one_epoch, evaluate
import utils
from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
import time

In [None]:
DATASET_PATH = "/kaggle/input/coco-2017-dataset/coco2017"
TRAIN_IMAGES = f"{DATASET_PATH}/train2017"
VAL_IMAGES = f"{DATASET_PATH}/val2017"
TRAIN_ANN = f"{DATASET_PATH}/annotations/instances_train2017.json"
VAL_ANN = f"{DATASET_PATH}/annotations/instances_val2017.json"

In [None]:
transform_train = transforms.Compose([ 
    # 1. Color Jitter: Simula diverse condizioni di luce (sole, ombra, ecc.)
    # È la tecnica pixel-level più efficace.
    transforms.ColorJitter(
        brightness=0.2,  # Varia la luminosità del ±20%
        contrast=0.2,    # Varia il contrasto del ±20%
        saturation=0.2,  # Varia la saturazione del ±20%
        hue=0.05         # Leggera variazione della tonalità
    ),

    # 2. Random Grayscale: Rende l'immagine in b/n il 10% delle volte
    # Aiuta il modello a non basarsi solo sui colori specifici
    transforms.RandomGrayscale(p=0.1),

    # 4. Conversione finale in Tensor
    transforms.ToTensor()
])

transform_val = transforms.Compose([
    transforms.ToTensor()    
])

In [None]:
class CocoToPyTorchFormat:
    def __call__(self, target):
        # 1. Estrai i box grezzi (xywh)
        boxes_xywh = [obj['bbox'] for obj in target]
        boxes_xywh = torch.as_tensor(boxes_xywh, dtype=torch.float32)
        
        # in_fmt='xywh' -> out_fmt='xyxy'
        boxes_xyxy = box_convert(boxes_xywh, in_fmt='xywh', out_fmt='xyxy')

        # 3. Estrai le altre info
        labels = [obj['category_id'] for obj in target]
        labels = torch.as_tensor(labels, dtype=torch.int64)

        area = torch.as_tensor([obj['area'] for obj in target], dtype=torch.float32)
        iscrowd = torch.as_tensor([obj['iscrowd'] for obj in target], dtype=torch.int64)
        
        image_id = torch.tensor([target[0]['image_id']])
        
        return {
            "boxes": boxes_xyxy,
            "labels": labels,
            "image_id": image_id,
            "area": area,
            "iscrowd": iscrowd
        }

In [None]:
train_dataset = CocoDetection(root=TRAIN_IMAGES, annFile=TRAIN_ANN, transform=transform_train,target_transform=CocoToPyTorchFormat())
val_dataset = CocoDetection(root=VAL_IMAGES, annFile=VAL_ANN, transform=transform_val,target_transform=CocoToPyTorchFormat())

In [None]:
def carica_indici_da_json(filename="balanced_indices.json"):
    with open(filename, 'r', encoding='utf-8') as f:
        indices = json.load(f)
    print(f"✅ Caricati {len(indices)} indici da {filename} (Formato JSON)")
    return indices

In [None]:
balanced_indices_train=carica_indici_da_json("/kaggle/input/indici/balanced_indices.json")
balanced_indices_val=carica_indici_da_json("/kaggle/input/indici/balanced_indices_val.json")

In [None]:
train_subset = Subset(train_dataset,balanced_indices_train )
val_subset = Subset(val_dataset, balanced_indices_val)
print(f"✅ TRAIN: {len(train_subset)} images")
print(f"✅ VAL:   Using {len(val_subset)} images")

In [None]:
train_loader = DataLoader(
    train_subset,
    batch_size=4,
    shuffle=True,
    num_workers=0,
    collate_fn=lambda x: tuple(zip(*x))
)

val_loader = DataLoader(
    val_subset,
    batch_size=4,
    shuffle=False,     
    num_workers=0,
    collate_fn=lambda x: tuple(zip(*x))
)

In [None]:
def get_model(num_classes=91):
    # Load pretrained Faster R-CNN model (ResNet50 backbone)
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None, weights_backbone=torchvision.models.resnet.ResNet50_Weights.IMAGENET1K_V1)
    
    # Get input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # Replace with new head (COCO has 91 classes)
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

In [None]:
from torch_lr_finder import LRFinder
import torch

class RetinaNetLRFinder(LRFinder):
    def __init__(self, model, optimizer, criterion=None, device=None, memory_cache=True, cache_dir=None):
        # RetinaNet calcola la loss internamente, quindi non serve un criterio esterno
        super().__init__(model, optimizer, criterion, device, memory_cache, cache_dir)

    def _train_batch(self, train_iter, accumulation_steps, non_blocking_transfer=True):
        self.model.train()
        self.optimizer.zero_grad()

        # Estraiamo il batch dal DataLoader
        # Nota: RetinaNet richiede collate_fn, quindi data è (images, targets)
        try:
            images, targets = next(train_iter)
        except StopIteration:
            return None

        # --- GESTIONE DATI CUSTOM PER RETINANET ---
        # Spostiamo immagini e target su GPU manualmente perché sono liste/dizionari
        images = list(image.to(self.device) for image in images)
        targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets]

        # Forward pass (RetinaNet calcola la loss internamente)
        loss_dict = self.model(images, targets)
        
        # Somma delle loss (Classificazione + Regressione)
        loss = sum(loss for loss in loss_dict.values())

        # Backward pass
        loss.backward()
        self.optimizer.step()

        return loss.item()

In [None]:
#device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
#model = get_model()
#model.to(device)

#optimizer = torch.optim.SGD(
#    [p for p in model.parameters() if p.requires_grad],
#    lr=1e-4,           # Start un po' più alto
#    momentum=0.9,      # Importante per la stabilità
#    weight_decay=0.0005
#)

#lr_finder = RetinaNetLRFinder(model, optimizer, device=device)

#lr_finder.range_test(
#    train_loader, 
#    start_lr=1e-5,     # Start: abbastanza basso da non esplodere subito
#    end_lr=0.1,        # End: abbastanza alto da divergere sicuramente
#    num_iter=100,      # Rapido: 100 batch sono sufficienti
#    step_mode="exp"
#)

#lr_finder.plot() 

In [None]:
#import matplotlib.pyplot as plt

# 1. Salviamo la funzione originale 'show' in una variabile
#original_show = plt.show

# 2. Sostituiamo 'plt.show' con una funzione che non fa nulla (dummy)
#    In questo modo, quando lr_finder chiama .show(), non succede nulla e il grafico resta in memoria!
#plt.show = lambda: None

#try:
    # 3. Generiamo il grafico
    # La libreria proverà a fare show(), ma noi l'abbiamo disattivata :)
  #  lr_finder.plot(suggest_lr=True)

    # 4. Ora il grafico è ancora "vivo". Salviamolo!
#    plt.savefig('lr_finder_result.png', dpi=300, bbox_inches='tight')
#    print("✅ Grafico salvato correttamente!")

#finally:
    # 5. IMPORTANTE: Ripristiniamo la funzione show originale
    # Così i futuri grafici funzioneranno normalmente
#    plt.show = original_show

# 6. Se vuoi vederlo anche a schermo adesso, chiamiamo la show originale
#plt.show()

# 7. Reset del finder
#lr_finder.reset()“

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = get_model()
model.to(device)
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=0.005,  # 5e-3
    momentum=0.9,
    weight_decay=0.0005
)

In [None]:
num_epochs = 26
save_dir = "/kaggle/working/"
model.train()

In [None]:
def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
        iou_types.append("segm")
    if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
        iou_types.append("keypoints")
    return iou_types

In [None]:
@torch.inference_mode()
def myevaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test:"

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator

In [None]:
import torch
from collections import defaultdict

@torch.no_grad()
def evaluate_loss(model, data_loader, device):
    # IMPORTANTE: Mettiamo il modello in train per calcolare le loss
    model.train()
    
    # Usiamo defaultdict per accumulare automaticamente qualsiasi chiave trovi
    metric_logger = defaultdict(float)
    num_batches = 0
    
    for images, targets in data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        # Forward pass
        loss_dict = model(images, targets)
        
        # 1. Calcolo loss totale del batch
        losses = sum(loss for loss in loss_dict.values())
        
        # 2. Accumulo loss totale
        metric_logger['total_loss'] += losses.item()
        
        # 3. Accumulo AUTOMATICO di tutte le loss specifiche trovate
        # (Funziona sia per RetinaNet che per Faster R-CNN senza cambiare codice)
        for k, v in loss_dict.items():
            metric_logger[k] += v.item()
            
        num_batches += 1
        
    if num_batches == 0:
        return {}
        
    # Calcoliamo la media dividendo per il numero di batch
    final_metrics = {k: v / num_batches for k, v in metric_logger.items()}
    
    return final_metrics

In [None]:
from engine import train_one_epoch
import time

# Inizializziamo la history
history = []

print(f"Inizio Training per {num_epochs} epoche (Solo Loss, No mAP)...")
start_total = time.time()

for epoch in range(num_epochs):
    epoch_start = time.time()
    
    # --- 1. TRAINING ---
    metric_logger = train_one_epoch(model, optimizer, train_loader, device, epoch+1, print_freq=500)
    
    # --- 2. VALIDATION LOSS DETTAGLIATA ---
    # Questa funzione ora restituisce il dizionario con TUTTE le loss
    val_metrics = evaluate_loss(model, val_loader, device)
    
    # --- 3. STEP SCHEDULER ---
    lr_scheduler.step()
    current_lr = optimizer.param_groups[0]["lr"]

    # --- 4. RACCOLTA DATI (Tutte le componenti) ---
    epoch_log = {
        'epoch': epoch,
        'lr': current_lr,
        
        # TRAINING (Estraiamo singole componenti se servono, qui prendiamo loss totale e componenti)
        'train_loss': metric_logger.meters['loss'].global_avg,
        'train_loss_classifier': metric_logger.meters['loss_classifier'].global_avg,
        'train_loss_box_reg': metric_logger.meters['loss_box_reg'].global_avg,
        'train_loss_objectness': metric_logger.meters['loss_objectness'].global_avg,
        'train_loss_rpn_box_reg': metric_logger.meters['loss_rpn_box_reg'].global_avg,
        
        # VALIDATION (Salviamo tutto separatamente)
        'val_loss': val_metrics['total_loss'],                 # Media totale
        'val_loss_classifier': val_metrics['loss_classifier'], # Errore classificazione
        'val_loss_box_reg': val_metrics['loss_box_reg'],       # Errore posizione box
        'val_loss_objectness': val_metrics['loss_objectness'], # Errore foreground/background
        'val_loss_rpn_box_reg': val_metrics['loss_rpn_box_reg'] # Errore box RPN
    }
    
    history.append(epoch_log)

    # --- 5. STAMPA RECAP PULITA ---
    epoch_duration = time.time() - epoch_start
    print("-" * 50)
    print(f"Epoch [{epoch+1}/{num_epochs}] - {epoch_duration/60:.1f} min")
    print(f"Train Loss: {epoch_log['train_loss']:.4f}")
    print(f"Val Loss:   {epoch_log['val_loss']:.4f}  <-- (Cls: {epoch_log['val_loss_classifier']:.3f} | Box: {epoch_log['val_loss_box_reg']:.3f})")
    print(f"LR:         {current_lr:.6f}")
    print("-" * 50)
    
    # --- 6. SALVATAGGIO CHECKPOINT ---
    checkpoint_path = os.path.join(save_dir, f"checkpoint_epoch_{epoch}.pth")
    torch.save({
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'lr_scheduler': lr_scheduler.state_dict(),
        'epoch': epoch,
        'history': history, # La history ora contiene tutte le loss dettagliate
    }, checkpoint_path)
    
    print(f"Salvato checkpoint: {checkpoint_path}")

total_time = time.time() - start_total
print(f"Training Completato in {total_time/3600:.2f} ore! ✅")