In [2]:
from google.colab import drive
import os
import os
from glob import glob
from PIL import Image
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch
import torchvision
import torch.nn as nn
import zipfile
import torch.optim as optim  # Optimization algorithms for training the model
import torch.nn.functional as F  # Common loss functions and activation functions
import itertools  # Utility functions for generating combinations
from torch.optim.lr_scheduler import CosineAnnealingLR  # Learning rate scheduler for training
import matplotlib.pyplot as plt  # Plotting library for visualization
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Subset
import numpy as np
from torchsummary import summary
from torchvision import transforms

drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
# Check if GPU (Graphics Processing Unit) is available for training
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

# Define the device to use for training based on GPU availability
device = torch.device("cuda:0" if train_on_gpu else "cpu")

# Print the chosen device for training
print(device)


CUDA is available!  Training on GPU ...
cuda:0


In [4]:
# Percorso al file ZIP su Google Drive
zip_path = '/content/drive/MyDrive/Colab Notebooks/Enviornment/L3-4/archive.zip'

# Directory di destinazione dove verranno estratti i file
extract_dir = '/content/images'

# Estrazione del file ZIP
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)


In [5]:
# Directory estratta
data_dir = '/content/images/UCMerced_LandUse/Images'

# Ottieni le sottocartelle (che rappresentano le classi)
class_dirs = [f.path for f in os.scandir(data_dir) if f.is_dir()]

# Inizializza una lista per memorizzare i percorsi delle immagini e le rispettive classi
image_paths = []
numeric_labels = []
class_to_idx = {}

# Itera attraverso ciascuna cartella (classe) e assegna un indice numerico alle classi
for idx, class_dir in enumerate(class_dirs):
    class_name = os.path.basename(class_dir)

    # Aggiungi la classe al dizionario se non è già presente
    class_to_idx[class_name] = idx

    # Ottieni tutti i file immagine in formato .tif nella cartella della classe
    images = glob(os.path.join(class_dir, '*.tif'))  # Modificato per cercare immagini .tif

    # Aggiungi i percorsi delle immagini e la classe numerica (label)
    image_paths.extend(images)
    numeric_labels.extend([idx] * len(images))  # Assegna la label numerica direttamente

# Verifica che le immagini siano state caricate correttamente
if image_paths:
    print(f'Trovate {len(image_paths)} immagini.')
    print('Esempio di percorso immagine:', image_paths[0])
    print('Classe associata (numerica):', numeric_labels[0])
else:
    print('Nessuna immagine trovata. Verifica il percorso o il formato dei file.')

print('Mappatura classi:', class_to_idx)

Trovate 2100 immagini.
Esempio di percorso immagine: /content/images/UCMerced_LandUse/Images/beach/beach71.tif
Classe associata (numerica): 0
Mappatura classi: {'beach': 0, 'harbor': 1, 'denseresidential': 2, 'forest': 3, 'runway': 4, 'river': 5, 'sparseresidential': 6, 'overpass': 7, 'intersection': 8, 'airplane': 9, 'agricultural': 10, 'mobilehomepark': 11, 'parkinglot': 12, 'freeway': 13, 'storagetanks': 14, 'baseballdiamond': 15, 'chaparral': 16, 'mediumresidential': 17, 'tenniscourt': 18, 'buildings': 19, 'golfcourse': 20}


In [6]:
# Trasformazioni da applicare alle immagini (es: ridimensionamento, conversione in tensor)
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Ridimensiona le immagini
    transforms.ToTensor(),  # Converti le immagini in tensor PyTorch
])
# Dataset personalizzato per PyTorch (uguale a quello precedente)
class ImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        # Carica l'immagine
        image = Image.open(img_path).convert('RGB')  # Converte in RGB se necessario

        # Applica le trasformazioni (se presenti)
        if self.transform:
            image = self.transform(image)

        return image, label

# Funzione per suddividere in modo stratificato per classe
def stratified_split(image_paths, labels, train_size=0.7, val_size=0.2, test_size=0.1):
    assert np.isclose(train_size + val_size + test_size, 1.0), "La somma delle frazioni deve essere 1."

    # Step 1: Suddividi in train e temp (validation + test)
    train_paths, temp_paths, train_labels, temp_labels = train_test_split(
        image_paths, labels, stratify=labels, test_size=(1 - train_size), random_state=42)

    # Step 2: Suddividi il temp in validation e test
    val_paths, test_paths, val_labels, test_labels = train_test_split(
        temp_paths, temp_labels, stratify=temp_labels, test_size=(test_size / (test_size + val_size)), random_state=42)

    return train_paths, val_paths, test_paths, train_labels, val_labels, test_labels

# Esegui lo split stratificato del dataset
train_paths, val_paths, test_paths, train_labels, val_labels, test_labels = stratified_split(
    image_paths, numeric_labels, train_size=0.7, val_size=0.2, test_size=0.1)

# Crea dataset PyTorch per ogni suddivisione
train_dataset = ImageDataset(image_paths=train_paths, labels=train_labels, transform=transform)
val_dataset = ImageDataset(image_paths=val_paths, labels=val_labels, transform=transform)
test_dataset = ImageDataset(image_paths=test_paths, labels=test_labels, transform=transform)

# Crea DataLoader per ogni suddivisione
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Verifica il caricamento dei dati per il training set
train_iter = iter(train_loader)
images, labels = next(train_iter)
print(f"Batch di immagini nel training: {images.shape}, Batch di etichette: {labels}")


Batch di immagini nel training: torch.Size([32, 3, 256, 256]), Batch di etichette: tensor([15,  4,  0, 12, 18,  9, 13,  0, 15, 14, 18,  0, 20, 10, 13, 14,  1,  7,
         9, 14, 16, 14,  7,  9, 19, 15, 19,  3, 17,  0, 13,  5])


In [7]:

# Define the model architecture (MobileNetV2)
# the model is loaded with weights pre-trained on the ImageNet dataset
net = torchvision.models.googlenet(weights='IMAGENET1K_V1')  # Load pre-trained weights
#print(net)
# Adjust the final classification layer
# MobileNetV2 was originally designed for classifying 1000 different classes in ImageNet.
# Here, the code modifies its classifier to make it suitable for age prediction
num_ftrs = net.fc.in_features  # Get the number of input features for the last layer
net.fc = nn.Sequential(
    nn.Linear(num_ftrs, 512),  # First linear layer with 512 units
    nn.GELU(),  # GELU activation function
    nn.Linear(512, 32),  # Second linear layer with 32 units
    nn.GELU(),  # GELU activation function
    nn.Linear(32, 21)   # Output layer with 21 classe
)

# Move the model to the appropriate device (CPU or GPU)
net.to(device)


Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:01<00:00, 40.7MB/s]


GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [8]:
# Print model summary
summary(net, (3, 256, 256))  # Input shape (channels, height, width)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 128, 128]           9,408
       BatchNorm2d-2         [-1, 64, 128, 128]             128
       BasicConv2d-3         [-1, 64, 128, 128]               0
         MaxPool2d-4           [-1, 64, 64, 64]               0
            Conv2d-5           [-1, 64, 64, 64]           4,096
       BatchNorm2d-6           [-1, 64, 64, 64]             128
       BasicConv2d-7           [-1, 64, 64, 64]               0
            Conv2d-8          [-1, 192, 64, 64]         110,592
       BatchNorm2d-9          [-1, 192, 64, 64]             384
      BasicConv2d-10          [-1, 192, 64, 64]               0
        MaxPool2d-11          [-1, 192, 32, 32]               0
           Conv2d-12           [-1, 64, 32, 32]          12,288
      BatchNorm2d-13           [-1, 64, 32, 32]             128
      BasicConv2d-14           [-1, 64,

In [9]:
# Define training parameters (epochs, loss function, optimizer, and scheduler)
epochs = 100  # Number of training epochs
criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss per problemi multiclasse
optimizer = optim.Adam(net.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001
scheduler = CosineAnnealingLR(optimizer,
                              T_max=len(train_loader) * epochs,  # Maximum number of iterations for scheduler
                              eta_min=1e-5)  # Minimum learning rate for scheduler


In [10]:
'''
Training and Validation section.
'''

# Uncomment if you need to complete the training with pre-saved model
#net.load_state_dict(torch.load("/kaggle/input/vecchio/net_best.pth"))

# Parameters for the early stopping procedure
patience = 6
best_val_loss = float('inf')
epochs_no_improve = 0

# List to evaluate the final model
all_preds = []
all_labels = []

# Training
for epoch in range(epochs):
    running_loss = []  # List to store training loss for each batch
    net.train()  # Set the model to training mode
    for i, data in enumerate(train_loader):
        # Get inputs and labels from the data loader
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device).long()
        # Zero the parameter gradients
        optimizer.zero_grad()
        # Forward pass
        outputs = net(inputs)
        # Calculate loss
        loss = criterion(outputs.squeeze(), labels)
        # Backward pass and parameter update
        loss.backward()
        optimizer.step()
        scheduler.step()
        # Print statistics (every 10% of the training data)
        running_loss.append(loss.item())
        if (i + 1) % (len(train_loader) // 10) == 0:
            print('%d, [%d, %d] loss: %.4f\tlr: %.6f' %
                  (epoch + 1, i + 1, len(train_loader), np.mean(running_loss), optimizer.param_groups[-1]['lr']))
            running_loss = []

    # Validation
    running_loss = []  # List to store validation loss for each batch
    net.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    for i, data in enumerate(val_loader):
        # Get inputs and labels from the data loader
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device).long()

        # Forward pass with gradient suppression
        with torch.no_grad():
            outputs = net(inputs)  # Get model predictions without calculating gradients
            # Finds the class with the highest probability for each image in the batch.
            _, predicted = torch.max(outputs.cpu(), 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            # Update total number of test images
            total += labels.size(0)  # label.size(0) gives the batch size
            # Count correct predictions
            correct += (predicted == labels.cpu()).sum().item()  # Count true positives

        loss = criterion(outputs, labels)  # Calculate Cross Entropy loss
        running_loss.append(loss.item())

    val_loss = np.mean(running_loss)
    print('Validation loss: %.6f' % val_loss)
    # Calculate and print accuracy
    accuracy = 100 * correct / total
    print(f'Validation accuracy: {accuracy:.2f} %')

    # Early stopping if no improvement in validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        torch.save(net.state_dict(), "net_best.pth")
        print(f"Epoch {epoch+1}: Validation loss improved to {val_loss:.6f}. Model saved.")
    else:
        epochs_no_improve += 1
        print(f"Epoch {epoch+1}: No improvement in validation loss ({val_loss:.6f}). Epochs without improvement: {epochs_no_improve}/{patience}")

    if epochs_no_improve == patience:
        print(f"Early stopping triggered after {epoch+1} epochs. Best validation loss: {best_val_loss:.6f}")
        break

print('Finished Training')

1, [4, 46] loss: 2.9957	lr: 0.001000
1, [8, 46] loss: 2.6444	lr: 0.001000
1, [12, 46] loss: 2.0511	lr: 0.001000
1, [16, 46] loss: 2.0611	lr: 0.001000
1, [20, 46] loss: 1.7760	lr: 0.001000
1, [24, 46] loss: 1.4455	lr: 0.001000
1, [28, 46] loss: 1.1726	lr: 0.001000
1, [32, 46] loss: 0.9883	lr: 0.001000
1, [36, 46] loss: 0.9786	lr: 0.001000
1, [40, 46] loss: 0.8704	lr: 0.001000
1, [44, 46] loss: 1.0462	lr: 0.001000
Validation loss: 1.811007
Validation accuracy: 50.95 %
Epoch 1: Validation loss improved to 1.811007. Model saved.
2, [4, 46] loss: 1.0156	lr: 0.001000
2, [8, 46] loss: 0.8417	lr: 0.001000
2, [12, 46] loss: 0.7313	lr: 0.001000
2, [16, 46] loss: 0.7771	lr: 0.001000
2, [20, 46] loss: 0.5888	lr: 0.000999
2, [24, 46] loss: 0.5954	lr: 0.000999
2, [28, 46] loss: 0.6326	lr: 0.000999
2, [32, 46] loss: 0.5848	lr: 0.000999
2, [36, 46] loss: 0.4730	lr: 0.000999
2, [40, 46] loss: 0.5396	lr: 0.000999
2, [44, 46] loss: 0.7695	lr: 0.000999
Validation loss: 0.708045
Validation accuracy: 77.62 

In [12]:
# Training and Validation section remains unchanged

# Una volta terminato il training, carica il modello migliore salvato
print("Loading the best saved model...")
net.load_state_dict(torch.load("net_best.pth"))  # Carica il modello migliore salvato
net.to(device)  # Assicurati che il modello sia caricato sul dispositivo corretto (GPU o CPU)

# Inizia la valutazione sul test set
print("Starting evaluation on the test set...")

# Passa il modello in modalità di valutazione (disattiva il dropout, batchnorm, ecc.)
net.eval()

# Variabili per memorizzare le predizioni e i label del test set
test_preds = []
test_labels = []
test_correct = 0
test_total = 0

# Nessuna necessità di calcolare i gradienti durante la fase di test
with torch.no_grad():
    for i, data in enumerate(test_loader):
        # Carica inputs e labels
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device).long()

        # Esegui una forward pass
        outputs = net(inputs)

        # Trova la classe con la probabilità più alta
        _, predicted = torch.max(outputs.cpu(), 1)
        test_preds.extend(predicted.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())

        # Aggiorna il conteggio dei totali
        test_total += labels.size(0)
        # Conta le predizioni corrette
        test_correct += (predicted == labels.cpu()).sum().item()

# Calcola l'accuratezza sul test set
test_accuracy = 100 * test_correct / test_total
print(f"Test accuracy: {test_accuracy:.2f} %")

# Optional: Se vuoi calcolare metriche aggiuntive (ad esempio precision, recall, f1-score)
from sklearn.metrics import classification_report

print("\nClassification Report:")
print(classification_report(test_labels, test_preds))


Loading the best saved model...


  net.load_state_dict(torch.load("net_best.pth"))  # Carica il modello migliore salvato


Starting evaluation on the test set...
Test accuracy: 92.89 %

Classification Report:
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        10
           1       1.00      1.00      1.00        10
           2       0.75      0.90      0.82        10
           3       0.91      1.00      0.95        10
           4       0.91      1.00      0.95        10
           5       1.00      0.80      0.89        10
           6       1.00      1.00      1.00        11
           7       1.00      0.70      0.82        10
           8       0.90      0.90      0.90        10
           9       0.90      0.90      0.90        10
          10       0.91      1.00      0.95        10
          11       0.91      1.00      0.95        10
          12       1.00      0.80      0.89        10
          13       0.69      0.90      0.78        10
          14       1.00      1.00      1.00        10
          15       0.91      1.00      0.95      