In [2]:
import os
from PIL import Image
import torch
from torchvision import transforms

In [3]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset

In [4]:
class CanDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (str): Directory with all the images (can be train or test).
            transform (callable, optional): Optional transform to be applied on a sample.
            is_test (bool, optional): Whether the dataset is for test set (no labels).
        """
        self.root_dir = root_dir
        self.transform = transform
        #self.is_test = is_test  # Flag to indicate if it's the test dataset
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(root_dir))
        
        for label, class_name in enumerate(self.classes):
            class_dir = os.path.join(root_dir, class_name)
            for file_name in os.listdir(class_dir):
                if file_name.endswith(('.png', '.jpg', '.jpeg')):
                    self.image_paths.append(os.path.join(class_dir, file_name))
                    self.labels.append(label)  # Add label only for training set
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        
        return image, label

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),  # Flip orizzontale
    transforms.RandomRotation(degrees=15),  # Rotazione
    transforms.RandomResizedCrop((224,224), scale=(0.8, 1.0)),  # Zoom
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [5]:
train_dataset = CanDataset(root_dir='/Users/martinapanini/Library/Mobile Documents/com~apple~CloudDocs/Università/Signal_Image_Video/MonsterProject/Monster_energy_drink/Monster_energy_drink/train', transform=transform)
test_dataset = CanDataset(root_dir='/Users/martinapanini/Library/Mobile Documents/com~apple~CloudDocs/Università/Signal_Image_Video/MonsterProject/Monster_energy_drink/Monster_energy_drink/test', transform=transform)
# Crea i dataloader per il dataset di train e test
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
import torch.nn as nn
import torchvision.models as models

# Carica il modello pre-addestrato (ResNet in questo caso)
model = models.resnet50(pretrained=True)

# Congela i livelli convolutivi
for param in model.parameters():
    param.requires_grad = False

# Ottieni il numero di caratteristiche dell'ultimo livello convolutivo
num_features = model.fc.in_features

# Rimuovi l'ultimo livello fully connected
model.fc = nn.Identity()

# Aggiungi una testa personalizzata per la classificazione del colore della lattina
class ColorClassificationHead(nn.Module):
    def __init__(self, num_features, num_classes):
        super(ColorClassificationHead, self).__init__()
        self.fc = nn.Linear(num_features, num_classes)
    
    def forward(self, x):
        return self.fc(x)

# Aggiungi una testa personalizzata per la segmentazione dei bordi
class EdgeSegmentationHead(nn.Module):
    def __init__(self, num_features, num_classes):
        super(EdgeSegmentationHead, self).__init__()
        self.conv1 = nn.Conv2d(num_features, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, num_classes, kernel_size=3, padding=1)
    
    def forward(self, x):
        x = self.conv1(x)
        x = nn.ReLU()(x)
        x = self.conv2(x)
        return x

# Definisci il numero di classi per la classificazione del colore e la segmentazione dei bordi
num_color_classes = 3  # Ad esempio: rosso, verde, blu
num_edge_classes = 1  # Segmentazione binaria: bordo o non bordo

# Crea le teste personalizzate
color_classification_head = ColorClassificationHead(num_features, num_color_classes)
edge_segmentation_head = EdgeSegmentationHead(num_features, num_edge_classes)

# Aggiungi le teste personalizzate al modello
class CustomModel(nn.Module):
    def __init__(self, base_model, color_head, edge_head):
        super(CustomModel, self).__init__()
        self.base_model = base_model
        self.color_head = color_head
        self.edge_head = edge_head
    
    def forward(self, x):
        features = self.base_model(x)
        color_output = self.color_head(features)
        edge_output = self.edge_head(features.unsqueeze(2).unsqueeze(3))
        return color_output, edge_output

# Crea il modello finale
custom_model = CustomModel(model, color_classification_head, edge_segmentation_head)

# Stampa il modello per verificare la struttura
print(custom_model)



CustomModel(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
   

In [7]:
# Imposta il modello in modalità di valutazione
custom_model.eval()

# Disabilita il calcolo dei gradienti per migliorare le prestazioni
with torch.no_grad():
    for images, labels in train_loader:
        # Passa le immagini attraverso il modello
        color_output, edge_output = custom_model(images)
        
        # Stampa le predizioni per la classificazione del colore e la segmentazione dei bordi
        #print("Color Classification Output:", color_output)
        #print("Edge Segmentation Output:", edge_output)



In [10]:
color_output.type()
color_output.shape
edge_output.shape

torch.Size([29, 1, 1, 1])

In [11]:
class ColorClassificationHead(nn.Module):
    def __init__(self, num_features, num_classes):
        super(ColorClassificationHead, self).__init__()
        self.fc1 = nn.Linear(num_features, 512)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(512, num_classes)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Crea la nuova testa di classificazione del colore
color_classification_head = ColorClassificationHead(num_features, num_color_classes)

# Aggiungi la nuova testa al modello personalizzato
custom_model.color_head = color_classification_head

# Stampa il modello per verificare la struttura
print(custom_model)

CustomModel(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
   

In [12]:
import torch.nn.functional as F

class EdgeSegmentationDecoder(nn.Module):
    def __init__(self, num_features, num_classes):
        super(EdgeSegmentationDecoder, self).__init__()
        self.conv1 = nn.Conv2d(num_features, 512, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(64, num_classes, kernel_size=3, padding=1)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.conv5(x)
        return x

# Crea il decoder per la segmentazione dei bordi
edge_segmentation_decoder = EdgeSegmentationDecoder(num_features, num_edge_classes)

# Aggiungi il decoder al modello personalizzato
custom_model.edge_head = edge_segmentation_decoder

# Stampa il modello per verificare la struttura
print(custom_model)

CustomModel(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
   

In [16]:
# Definisci le funzioni di loss
classification_criterion = nn.CrossEntropyLoss()
segmentation_criterion = nn.BCEWithLogitsLoss()

# Definisci l'ottimizzatore
optimizer = torch.optim.Adam(custom_model.parameters(), lr=0.001)

# Numero di epoche
num_epochs = 5

# Ciclo di addestramento
for epoch in range(num_epochs):
    custom_model.train()
    running_loss = 0.0
    
    for images, labels in train_loader:
        # Azzerare i gradienti
        optimizer.zero_grad()
        
        # Passa le immagini attraverso il modello
        color_output, edge_output = custom_model(images)
        
        # Verifica che le etichette siano nel range corretto
        labels = labels % num_color_classes
        
        # Calcola la loss per la classificazione del colore
        classification_loss = classification_criterion(color_output, labels)
        
        # Crea i target per la segmentazione dei bordi (dummy targets in questo esempio)
        edge_targets = torch.zeros_like(edge_output)
        
        # Calcola la loss per la segmentazione dei bordi
        segmentation_loss = segmentation_criterion(edge_output, edge_targets)
        
        # Calcola la loss totale
        total_loss = classification_loss + segmentation_loss
        
        # Backpropagation
        total_loss.backward()
        
        # Aggiorna i pesi
        optimizer.step()
        
        # Aggiorna il running loss
        running_loss += total_loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")

Epoch [1/5], Loss: 0.9283387023349141
Epoch [2/5], Loss: 0.6991754111855529
Epoch [3/5], Loss: 0.5608517541441806
Epoch [4/5], Loss: 0.4937286758145621
Epoch [5/5], Loss: 0.4704018387683602


In [None]:
# Imposta il modello in modalità di valutazione
custom_model.eval()

# Variabili per tenere traccia delle metriche
total_classification_loss = 0.0
total_segmentation_loss = 0.0
correct_color_predictions = 0
total_images = 0

# Disabilita il calcolo dei gradienti per migliorare le prestazioni
with torch.no_grad():
    for images, labels in test_loader:
        # Passa le immagini attraverso il modello
        color_output, edge_output = custom_model(images)
        
        # Verifica che le etichette siano nel range corretto
        labels = labels % num_color_classes
        
        # Calcola la loss per la classificazione del colore
        classification_loss = classification_criterion(color_output, labels)
        
        # Crea i target per la segmentazione dei bordi (dummy targets in questo esempio)
        edge_targets = torch.zeros_like(edge_output)
        
        # Calcola la loss per la segmentazione dei bordi
        segmentation_loss = segmentation_criterion(edge_output, edge_targets)
        
        # Calcola la loss totale
        total_classification_loss += classification_loss.item()
        total_segmentation_loss += segmentation_loss.item()
        
        # Calcola il numero di predizioni corrette per la classificazione del colore
        _, predicted_colors = torch.max(color_output, 1)
        correct_color_predictions += (predicted_colors == labels).sum().item()
        total_images += labels.size(0)

# Calcola la loss media e l'accuratezza
avg_classification_loss = total_classification_loss / len(test_loader)
avg_segmentation_loss = total_segmentation_loss / len(test_loader)
color_classification_accuracy = correct_color_predictions / total_images

print(f"Average Classification Loss: {avg_classification_loss}")
print(f"Average Segmentation Loss: {avg_segmentation_loss}")
print(f"Color Classification Accuracy: {color_classification_accuracy * 100:.2f}%")