# Library import

### import librerie

In [1]:
import os
import numpy as np

import json
from PIL import Image
from torch.utils.data import Dataset

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.optim import Adam
from tqdm import tqdm

import ast

In [2]:
pip install triton

Collecting triton
  Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.5/209.5 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: triton
Successfully installed triton-3.1.0
Note: you may need to restart the kernel to use updated packages.


## Path

In [3]:
# file contenente i path delle immagini del dataset
txt_file = "/kaggle/input/our-xview-dataset/xView_class_map.json"
img_dir = "/kaggle/input/our-xview-dataset/images"

annotation_file = "/kaggle/input/our-xview-dataset/COCO_annotations_new.json"

In [4]:
utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd_processing_utils', trust_repo=True)

Downloading: "https://github.com/NVIDIA/DeepLearningExamples/zipball/torchhub" to /root/.cache/torch/hub/torchhub.zip


# Dataloader

In [5]:
class CustomDataset(Dataset):
    def __init__(self, annotations_file, img_dir, utils, aug=False):
        """
        Args:
            annotations_file (str): Path al file JSON delle annotazioni (es. formato COCO).
            img_dir (str): Path alla directory delle immagini.
            utils: Funzioni di utilità per il dataset.
            aug (bool): Flag per attivare le trasformazioni di data augmentation.
        """
        with open(annotations_file, 'r') as f:
            self.annotations = json.load(f)
        self.img_dir = img_dir
        self.utils = utils
        self.aug = aug
        self.transform = transforms.Compose([
            transforms.ToTensor(),  # Converte in formato [C, H, W]
            transforms.Resize((320, 320)),  # Ridimensiona l'immagine
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.annotations['images'])

    def __getitem__(self, idx):
        # Leggi i dettagli dell'immagine
        img_info = self.annotations['images'][idx]
        img_path = os.path.join(self.img_dir, img_info['file_name'])

        # Caricamento immagine
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            raise ValueError(f"Errore nel caricamento dell'immagine: {img_path}, errore: {e}")

        # Trasforma l'immagine
        if self.aug:
            #image = self.utils.augment(image)  # Applica data augmentation, se definita
            image = self.transform(image)

        # Check sul formato dell'immagine
        if not isinstance(image, torch.Tensor):
            raise ValueError(f"L'immagine non è un tensore: {type(image)}")
        if image.shape[0] != 3:
            raise ValueError(f"L'immagine ha un numero di canali errato: {image.shape[0]} (atteso: 3)")

        # Leggi le annotazioni
        img_id = img_info['id']
        annotations = [ann for ann in self.annotations['annotations'] if ann['image_id'] == img_id]

        # Bounding box e label
        if len(annotations) == 0:
            bboxes = np.zeros((0, 4), dtype=np.float32)
            labels = np.zeros((0,), dtype=np.int64)
        else:
            try:
                bboxes = np.array(
                    [ast.literal_eval(ann['bbox']) if isinstance(ann['bbox'], str) else ann['bbox'] for ann in annotations],
                    dtype=np.float32
                )
                labels = np.array([ann['category_id'] for ann in annotations], dtype=np.int64)
                bboxes[:, 2:] += bboxes[:, :2]  # Converti nel formato (x_min, y_min, x_max, y_max)
            except Exception as e:
                raise ValueError(f"Errore nel parsing delle annotazioni: {annotations}, errore: {e}")

        # Check sui bounding box
        if bboxes.ndim != 2 or bboxes.shape[1] != 4:
            raise ValueError(f"Bounding box in formato errato: {bboxes.shape} (atteso: [N, 4])")

        # Check sulle label
        if labels.ndim != 1:
            raise ValueError(f"Labels in formato errato: {labels.shape} (atteso: [N])")
        if not np.issubdtype(labels.dtype, np.integer):
            raise ValueError(f"Labels non sono interi: {labels.dtype}")

        # Costruzione del target
        target = {
            "image": image,
            'boxes': torch.tensor(bboxes, dtype=torch.float32),
            'labels': torch.tensor(labels, dtype=torch.int64),
        }

        return target


In [6]:
def collate_fn(batch):
    # Separate images and targets from the batch
    images = [item['image'] for item in batch]
    targets = [item for item in batch]
    
    # Stack images into a tensor of shape (batch_size, C, H, W)
    images = torch.stack(images, dim=0)
    
    # Prepare boxes and labels lists
    boxes_list = [target['boxes'] for target in targets]
    labels_list = [target['labels'] for target in targets]
    
    # Return images and targets (without using stack)
    return images, [{'boxes': boxes, 'labels': labels} for boxes, labels in zip(boxes_list, labels_list)]

In [7]:
# Creazione dei dataset
train_dataset = CustomDataset(annotation_file, img_dir, utils, aug=True) 
valid_dataset = CustomDataset(annotation_file, img_dir, utils, aug=False)  
test_dataset = CustomDataset(annotation_file, img_dir, utils, aug=False)  

# Creazione dei DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Network

In [8]:
class SSDModel(nn.Module):
    def __init__(self, num_classes):
        super(SSDModel, self).__init__()

        ## Model -> per info https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Detection/SSD
        self.ssd_model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd') # modello pre-addestrato su dataset COCO

    def forward(self, images):

        # Calcola le previsioni con il modello SSD
        predictions = self.ssd_model(images)  # Output grezzo del modello SSD
        
        return predictions

In [9]:
class SSDLoss(nn.Module):
    
    def __init__(self):
        super(SSDLoss, self).__init__()
        self.smooth_l1 = nn.SmoothL1Loss(reduction='sum')  # Per bounding boxes
        self.cross_entropy = nn.CrossEntropyLoss(reduction='sum')  # Per classificazione

    def forward(self, predictions, targets):
        loc_preds, conf_preds = predictions
        loc_targets, conf_targets = targets
        
        # Calcola la perdita di regressione (bounding boxes)
        loc_loss = self.smooth_l1(loc_preds, loc_targets)
        
        # Calcola la perdita di classificazione (confidence scores)
        conf_loss = self.cross_entropy(conf_preds, conf_targets)
        
        # Restituisci la perdita combinata
        return loc_loss + conf_loss


In [10]:
class Trainer:
    def __init__(self, model, train_loader, val_loader, criterion, optimizer=None, device='cuda', checkpoint_dir='checkpoints'):
        """
        Inizializza la classe Trainer.
        
        Args:
            model: Il modello SSD.
            train_loader: DataLoader per il training set.
            val_loader: DataLoader per il validation set.
            criterion: Funzione di perdita.
            optimizer: Ottimizzatore (opzionale, di default Adam).
            device: Dispositivo per il calcolo ('cuda' o 'cpu').
            checkpoint_dir: Directory per salvare i checkpoint.
        """
        self.model = model.to(device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.criterion = criterion
        self.optimizer = optimizer if optimizer else Adam(model.parameters(), lr=1e-4)
        self.device = device
        self.checkpoint_dir = checkpoint_dir
        
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)

    def process_targets(self, raw_targets, anchor_boxes):
        loc_targets = []
        conf_targets = []
    
        for target in raw_targets:
            gt_boxes = target["boxes"]
            gt_labels = target["labels"]
    
            # Effettua il matching
            matched_boxes = match_anchors_to_targets(anchor_boxes, gt_boxes)
            matched_labels = match_labels_to_anchors(anchor_boxes, gt_boxes, gt_labels)
    
            loc_targets.append(matched_boxes)
            conf_targets.append(matched_labels)
    
        # Concatenare per ottenere un tensore compatibile
        loc_targets = torch.stack(loc_targets, dim=0)
        conf_targets = torch.stack(conf_targets, dim=0)
    
        return loc_targets, conf_targets

    def match_anchors_to_targets(anchor_boxes, gt_boxes, iou_threshold=0.5):
        """
        Associa bounding box reali (gt_boxes) agli anchor box predefiniti.
        """
        num_anchors = anchor_boxes.shape[0]
        num_gt_boxes = gt_boxes.shape[0]
    
        # Calcola l'IoU tra ogni anchor e ogni gt_box
        iou_matrix = calculate_iou(anchor_boxes, gt_boxes)
    
        # Trova il miglior match per ogni anchor
        best_gt_idx = iou_matrix.argmax(dim=1)
        best_anchors_idx = iou_matrix.argmax(dim=0)
    
        # Applica la soglia IoU per il matching
        matched_gt_boxes = gt_boxes[best_gt_idx]
        matched_gt_boxes[iou_matrix.max(dim=1).values < iou_threshold] = 0  # Imposta a 0 per match deboli
    
        return matched_gt_boxes


    def train_one_epoch(self, epoch):
        self.model.train()
        epoch_loss = 0
        for images, targets in self.train_loader:
            images = images.to(self.device)
            
            # Processa i targets (in questo caso non vengono 'stackati', sono una lista)
            processed_targets = self.process_targets(targets)
            
            # Esegui il forward pass
            predictions = self.model(images)
            
            # Calcola la perdita (utilizzando il formato richiesto dal modello)
            loss = self.criterion(predictions, processed_targets)
            
            # Ottimizzazione
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
            epoch_loss += loss.item()
        
        return epoch_loss



    
    def validate_one_epoch(self, epoch):
        self.model.eval()
        running_loss = 0.0
        pbar = tqdm(self.val_loader, desc=f"Validation Epoch {epoch}")
        
        with torch.no_grad():
            for images, targets in pbar:
                images = images.to(self.device)
                processed_targets = self.process_targets(targets)
                
                predictions = self.model(images)
                loss = self.compute_loss(predictions, processed_targets)
                
                running_loss += loss.item()
                pbar.set_postfix({"val_loss": running_loss / len(self.val_loader)})
        
        return running_loss / len(self.val_loader)

    
    def save_checkpoint(self, epoch, train_loss, val_loss, predictions=None, targets=None, best=False):
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'train_loss': train_loss,
            'val_loss': val_loss,
            'predictions': predictions,
            'targets': targets
        }
        filename = f"best_model.pth" if best else f"checkpoint_epoch_{epoch}.pth"
        path = os.path.join(self.checkpoint_dir, filename)
        torch.save(checkpoint, path)
        print(f"Checkpoint salvato: {path}")

    
    def fit(self, epochs):
        """
        Esegue l'addestramento e la validazione per un dato numero di epoche.
        """
        train_losses = []
        val_losses = []
        best_val_loss = float('inf')
        
        for epoch in range(1, epochs + 1):
            train_loss = self.train_one_epoch(epoch)
            val_loss = self.validate_one_epoch(epoch)
            
            train_losses.append(train_loss)
            val_losses.append(val_loss)
            
            # Salvataggio del checkpoint per ogni epoca
            self.save_checkpoint(epoch, train_loss, val_loss)
            
            # Salvataggio del miglior modello
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                self.save_checkpoint(epoch, train_loss, val_loss, best=True)
            
            print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
        
        return train_losses, val_losses


# Training

In [11]:
num_classes = 12
ssd_model = SSDModel(num_classes)

Using cache found in /root/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 188MB/s] 
Downloading checkpoint from https://api.ngc.nvidia.com/v2/models/nvidia/ssd_pyt_ckpt_amp/versions/20.06.0/files/nvidia_ssdpyt_amp_200703.pt
  ckpt = torch.load(ckpt_file)


In [12]:
# Addestra il modello, con validazione ad ogni epoca
loss = SSDLoss()

# Inizializza il trainer
trainer = Trainer(
    model=ssd_model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=loss,
    optimizer=Adam(ssd_model.parameters(), lr=1e-4),
    device='cuda'
)

# Avvia il training
train_losses, val_losses = trainer.fit(epochs=10)

TypeError: Trainer.process_targets() missing 1 required positional argument: 'anchor_boxes'

# Testing