# Library import

### import librerie

In [1]:
import os
import numpy as np

import json
from PIL import Image
from torch.utils.data import Dataset

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.optim import Adam
from tqdm import tqdm

import ast

In [2]:
pip install triton

Collecting triton
  Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.5/209.5 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: triton
Successfully installed triton-3.1.0
Note: you may need to restart the kernel to use updated packages.


## Path

In [3]:
# file contenente i path delle immagini del dataset
txt_file = "/kaggle/input/our-xview-dataset/xView_class_map.json"
img_dir = "/kaggle/input/our-xview-dataset/images"

annotation_file = "/kaggle/input/our-xview-dataset/COCO_annotations_new.json"

In [4]:
utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd_processing_utils', trust_repo=True)

Downloading: "https://github.com/NVIDIA/DeepLearningExamples/zipball/torchhub" to /root/.cache/torch/hub/torchhub.zip


# Dataloader

In [16]:
class CustomDataset(Dataset):
    def __init__(self, annotations_file, img_dir, utils, aug=False):
        """
        Args:
            annotations_file (str): Path al file JSON delle annotazioni (es. formato COCO).
            img_dir (str): Path alla directory delle immagini.
            transform (callable, optional): Trasformazioni da applicare alle immagini.
        """

        with open(annotations_file, 'r') as f:
            self.annotations = json.load(f)
        self.img_dir = img_dir
        self.utils = utils
        self.aug = aug
        self. transform = transforms.Compose([  transforms.ToTensor(),  # Converte in formato [C, H, W]
                                                transforms.Resize((320, 320)),  # Ridimensiona l'immagine
                                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                                              ])

    def __len__(self):
        return len(self.annotations['images'])

    def __getitem__(self, idx):
        # Leggi i dettagli dell'immagine
        img_info = self.annotations['images'][idx]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        image = Image.open(img_path).convert("RGB")
        
        #if self.aug:
        image = self.transform(image)
        
        # Leggi le annotazioni
        img_id = img_info['id']
        annotations = [ann for ann in self.annotations['annotations'] if ann['image_id'] == img_id]
        
        #bboxes = np.array([ann['bbox'] for ann in annotations], dtype=np.float32)
        #bboxes = np.array([ast.literal_eval(ann['bbox']) for ann in annotations], dtype=np.float32)
        if len(annotations) == 0:
            bboxes = np.zeros((0, 4), dtype=np.float32)
            labels = np.zeros((0,), dtype=np.int64)
        else:
            bboxes = np.array([ast.literal_eval(ann['bbox']) if isinstance(ann['bbox'], str) else ann['bbox'] for ann in annotations], dtype=np.float32)
            labels = np.array([ann['category_id'] for ann in annotations], dtype=np.int64)
            bboxes[:, 2:] += bboxes[:, :2] # Converti bboxes nel formato richiesto (x_min, y_min, x_max, y_max) 

        target = {
            "image": torch.tensor(np.array(image), dtype=torch.float32).permute(2, 0, 1),  # Converti immagine in tensore
            'boxes': torch.tensor(bboxes, dtype=torch.float32),
            'labels': torch.tensor(labels, dtype=torch.int64),
        }

        #return image, target
        return target


In [17]:
def collate_fn(batch):
    # Separate images and targets from the batch
    images = [item['image'] for item in batch]
    targets = [item for item in batch]
    
    # Stack images into a tensor of shape (batch_size, C, H, W)
    images = torch.stack(images, dim=0)
    
    # List of bounding boxes and labels
    boxes_list = [target['boxes'] for target in targets]
    labels_list = [target['labels'] for target in targets]
    
    # Concatenate the boxes and labels using cat to form the targets for each image
    all_boxes = torch.cat(boxes_list, dim=0)  # This will concatenate all the boxes from the batch
    all_labels = torch.cat(labels_list, dim=0)  # Similarly concatenate all labels
    
    # Return images and targets (list of dictionaries)
    return images, [{'boxes': boxes, 'labels': labels} for boxes, labels in zip(boxes_list, labels_list)]


In [18]:
# Creazione dei dataset
train_dataset = CustomDataset(annotation_file, img_dir, utils, aug=True) 
valid_dataset = CustomDataset(annotation_file, img_dir, utils, aug=False)  
test_dataset = CustomDataset(annotation_file, img_dir, utils, aug=False)  

# Creazione dei DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Network

In [19]:
class SSDModel(nn.Module):
    def __init__(self, num_classes):
        super(SSDModel, self).__init__()

        ## Model -> per info https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Detection/SSD
        self.ssd_model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd') # modello pre-addestrato su dataset COCO

    def forward(self, images):

        # Calcola le previsioni con il modello SSD
        predictions = self.ssd_model(images)  # Output grezzo del modello SSD
        
        return predictions

In [20]:
class SSDLoss(nn.Module):
    def __init__(self, alpha=1.0):
        """
        Combina Smooth L1 Loss per la regressione dei bounding box e
        Cross Entropy Loss per la classificazione.

        Args:
            alpha (float): Peso per bilanciare la regressione e la classificazione.
        """
        super(SSDLoss, self).__init__()
        self.alpha = alpha
        self.smooth_l1 = nn.SmoothL1Loss(reduction='none')
        self.cross_entropy = nn.CrossEntropyLoss(reduction='none')

    def forward(self, predictions, targets):
        """
        Calcola la perdita SSD.

        Args:
            predictions (tuple): (loc_preds, conf_preds), dove
                loc_preds: tensor (N, num_boxes, 4) - Predizioni dei bounding box.
                conf_preds: tensor (N, num_boxes, num_classes) - Predizioni delle classi.
            targets (tuple): (loc_targets, conf_targets), dove
                loc_targets: tensor (N, num_boxes, 4) - Bounding box reali.
                conf_targets: tensor (N, num_boxes) - Classi reali.

        Returns:
            torch.Tensor: Valore della perdita.
        """
        loc_preds, conf_preds = predictions
        loc_targets, conf_targets = targets

        # Loss per la regressione del bounding box
        loc_loss = self.smooth_l1(loc_preds, loc_targets)
        loc_loss = loc_loss.sum(dim=-1)  # Somma per box

        # Loss per la classificazione
        conf_loss = self.cross_entropy(conf_preds.transpose(2, 1), conf_targets)
        
        # Filtro per box positivi (label diversi da 0 o -1)
        pos_mask = conf_targets > 0
        neg_mask = conf_targets == 0

        # Calcola la perdita per i positivi
        loc_loss = (loc_loss * pos_mask).sum()  # Loss solo per i positivi
        conf_loss = conf_loss[pos_mask | neg_mask].sum()  # Loss per positivi e negativi

        # Combinazione di loss
        total_loss = loc_loss + self.alpha * conf_loss
        return total_loss


In [21]:
class Trainer:
    def __init__(self, model, train_loader, val_loader, criterion, optimizer=None, device='cuda', checkpoint_dir='checkpoints'):
        """
        Inizializza la classe Trainer.
        
        Args:
            model: Il modello SSD.
            train_loader: DataLoader per il training set.
            val_loader: DataLoader per il validation set.
            criterion: Funzione di perdita.
            optimizer: Ottimizzatore (opzionale, di default Adam).
            device: Dispositivo per il calcolo ('cuda' o 'cpu').
            checkpoint_dir: Directory per salvare i checkpoint.
        """
        self.model = model.to(device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.criterion = criterion
        self.optimizer = optimizer if optimizer else Adam(model.parameters(), lr=1e-4)
        self.device = device
        self.checkpoint_dir = checkpoint_dir
        
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)

    def process_targets(self, targets):
        processed_targets = []
        for target in targets:
            target_dict = {
                'boxes': target['boxes'],  # Mantieni le bounding boxes come sono
                'labels': target['labels'],  # Mantieni le etichette come sono
            }
            processed_targets.append(target_dict)
        return processed_targets


    def train_one_epoch(self, epoch):
        self.model.train()
        epoch_loss = 0
        for images, targets in self.train_loader:
            images = images.to(self.device)
            
            # Processa i targets (in questo caso non vengono 'stackati', sono una lista)
            processed_targets = self.process_targets(targets)
            
            # Esegui il forward pass
            predictions = self.model(images)
            
            # Calcola la perdita (utilizzando il formato richiesto dal modello)
            loss = self.compute_loss(predictions, processed_targets)
            
            # Ottimizzazione
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
            epoch_loss += loss.item()
        
        return epoch_loss

    
    def validate_one_epoch(self, epoch):
        """
        Esegue un'epoca di validazione.
        """
        self.model.eval()
        running_loss = 0.0
        pbar = tqdm(self.val_loader, desc=f"Validation Epoch {epoch}")
        
        with torch.no_grad():
            for images, targets in pbar:
                # Sposta immagini sul dispositivo
                images = images.to(self.device)
                
                # Elabora i targets
                targets = self.process_targets(targets)
            
                # Forward pass
                predictions = self.model(images)
                
                # Calcolo della perdita
                loss = self.criterion(predictions, (targets['loc_targets'], targets['conf_targets']))
                running_loss += loss.item()
                
                pbar.set_postfix({"val_loss": running_loss / len(self.val_loader)})
        
        return running_loss / len(self.val_loader)
    
    def save_checkpoint(self, epoch, train_loss, val_loss, best=False):
        """
        Salva un checkpoint del modello.
        
        Args:
            epoch: Epoca corrente.
            train_loss: Perdita di training corrente.
            val_loss: Perdita di validazione corrente.
            best: Se True, salva il checkpoint come il migliore.
        """
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'train_loss': train_loss,
            'val_loss': val_loss
        }
        filename = f"best_model.pth" if best else f"checkpoint_epoch_{epoch}.pth"
        path = os.path.join(self.checkpoint_dir, filename)
        torch.save(checkpoint, path)
        print(f"Checkpoint salvato: {path}")
    
    def fit(self, epochs):
        """
        Esegue l'addestramento e la validazione per un dato numero di epoche.
        """
        train_losses = []
        val_losses = []
        best_val_loss = float('inf')
        
        for epoch in range(1, epochs + 1):
            train_loss = self.train_one_epoch(epoch)
            val_loss = self.validate_one_epoch(epoch)
            
            train_losses.append(train_loss)
            val_losses.append(val_loss)
            
            # Salvataggio del checkpoint per ogni epoca
            self.save_checkpoint(epoch, train_loss, val_loss)
            
            # Salvataggio del miglior modello
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                self.save_checkpoint(epoch, train_loss, val_loss, best=True)
            
            print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
        
        return train_losses, val_losses


# Training

In [22]:
num_classes = 12
ssd_model = SSDModel(num_classes)

Using cache found in /root/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


In [23]:
# Addestra il modello, con validazione ad ogni epoca
loss = SSDLoss(alpha=1.0)

# Inizializza il trainer
trainer = Trainer(
    model=ssd_model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=loss,
    optimizer=Adam(ssd_model.parameters(), lr=1e-4),
    device='cuda'
)

# Avvia il training
train_losses, val_losses = trainer.fit(epochs=10)

RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[32, 320, 3, 320] to have 3 channels, but got 320 channels instead

# Testing