Importo le librerie necessarie per il progetto

In [1]:
import os
import csv
import cv2
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
from PIL import Image,ImageOps
import matplotlib.pyplot as plt
import torchvision.transforms as T
import torchvision.models as models
from torch.utils.data import DataLoader
from torchvision.models import resnet152
from torch.optim.lr_scheduler import ReduceLROnPlateau

Recupero il numero delle classi e verifico se la scheda video è impostata come device

In [2]:
train_path = r'C:\Users\alessio\Desktop\Dimarco_Lomonaco_Salemi\BACKREMOVING\AI\train'

num_classes = len(os.listdir(train_path))
print("n° Classi:", num_classes)
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())

n° Classi: 8
True


Questa funzione prende in input un percorso relativo ad un'immagine (image_path), una bounding box (bbox) che specifica la regione di interesse dell'immagine e  l'immagine presa col path viene ritagliata in base ai valori della bounding box e normalizzata. 

In [27]:
def crop(image_path, bbox):
    x_min, y_min, x_max, y_max = bbox
    image_rgb = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image_rgb = image_rgb[y_min:y_max, x_min:x_max]
    res = cv2.resize(image_rgb, (282, 282))
    res =np.array(res)/255.0
    return res

Questo codice legge un file CSV contenente informazioni sulle immagini come:path, bounding box e etichette. Successivamente, manda ciascuna immagine in elaborazione alla funzione precedete, dopo memorizza l'immagine elaborata e l'etichetta associata in liste separate per l'addestramento di un modello di machine learning.

In [4]:
train_images = []
train_labels = []
train_df = pd.read_csv('train.csv')
for i, (imp, x_min, y_min, x_max, y_max, label) in train_df.iterrows():
    image_path = os.path.join('AI/train', imp.replace('/', '\\'))
    label = label
    bbox=[x_min , y_min, x_max, y_max]
    for i in range(len(bbox)):
        if bbox[i] < 0:
            bbox[i] = 0
    image= crop(image_path,bbox)
    image_path = os.path.join('AI/save', imp.replace('/', '\\'))
    train_labels.append(label)
    train_images.append(image)

Questo codice converte le immagini e le etichette del dataset in tensori PyTorch, necessari per l'addestramento di modelli di deep learning. Inoltre, riorganizza le dimensioni delle immagini per adattarle al formato comunemente utilizzato da PyTorch.

In [5]:
train_images = np.array(train_images)
train_labels = np.array(train_labels)
labels = list(set(train_labels))
print("labels:", labels)
# Conversione delle immagini in tensori
train_images_tensor = torch.from_numpy(train_images).permute(0, 3, 1, 2).float()
train_labels_tensor = torch.from_numpy(train_labels).long()

labels: [0, 1, 2, 3, 4, 5, 6, 7]


Imposto la batch size e creo il dataset di addestramento, di validazione e i data loader.

In [6]:
from torch.utils.data import DataLoader, TensorDataset
import torchvision.transforms as T
from sklearn.model_selection import train_test_split
batch_size = 32

# Divisione dei dati in train e validation
X_train, X_val, Y_train, Y_val = train_test_split(train_images_tensor, train_labels_tensor, test_size=0.2, random_state=42)

# Creazione dei dataset
train_dataset = TensorDataset(X_train, Y_train)
val_dataset = TensorDataset(X_val, Y_val)

# Crea i data loader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

Questo modello si sulla trasformazione colorjitter, tale trasformazione aiuta il modello a generalizzare meglio poiche va a cambiare i colori all'interno dell'immagine inoltre viene fatto un crop a 224 per eliminare informazioni inutili dall'immagine e perche resnet lavora meglio con queste dimensioni.
Inoltre i valori di contrasto e luminosità sono stati trovati in maniera manuale giocando con tali valori su software di manipolazioni di immagini, cercando di trovare dei valori che rendessero gli sfondi simili senza coprire l'obiettivo della predizione

In [4]:
model=resnet152(weights="IMAGENET1K_V2")
transf = T.ColorJitter(brightness=(0.1),contrast=(2),saturation=2,hue=(-0.5,.5)),
transform = T.Compose([
    T.RandomApply(transf,0.75),
    T.RandomHorizontalFlip(),
    T.CenterCrop(224)
])
criterion = nn.NLLLoss()
# Modifica il classificatore finale del modello
n_inputs = model.fc.in_features
# Scongelamento dei pesi del modello in modo che possa imparare dai dati di training (vale sia per il classificatore che per tutti gli altri strati)
for param in model.parameters():
    param.requires_grad = True
model.fc = nn.Sequential(
    nn.Dropout(0.2),
    nn.Linear(n_inputs, num_classes),
    nn.LogSoftmax(dim=1)
)
lr=0.00001
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.01)


In [20]:
model=model.to(dev)
best_val_accuracy = 0.0
best_val_loss = 0.0
epoch = 0

#Loop di addestramento(puo essere interrotto)
try:
    while True:
        # Fase di addestramento
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for batch_images, batch_labels in train_loader:
            batch_images=transform(batch_images)
            batch_images = batch_images.to("cuda")
            batch_labels = batch_labels.to("cuda") 
            optimizer.zero_grad()
            outputs = model(batch_images)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            train_total += batch_labels.size(0)
            train_correct += (predicted == batch_labels).sum().item()
            train_loss += loss.item() * batch_images.size(0)

        train_accuracy = train_correct / train_total
        train_loss /= len(train_loader.dataset)
        # Fase di convalida
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for batch_images, batch_labels in val_loader:
                batch_images=transform(batch_images)
                batch_images = batch_images.to("cuda")
                batch_labels = batch_labels.to("cuda") 
                outputs = model(batch_images)
                _, predicted = torch.max(outputs.data, 1)
                val_loss += criterion(outputs, batch_labels)
                val_total += batch_labels.size(0)
                val_correct += (predicted == batch_labels).sum().item()

        val_accuracy = val_correct / val_total
        val_loss /= len(val_loader)
        scheduler.step(val_accuracy)
        torch.save(model.state_dict(), "model_4_"+str(epoch)+".pth")
        print(f"Epoch [{epoch+1}] Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2%}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2%}")

        epoch += 1
        torch.cuda.empty_cache()

except KeyboardInterrupt:
    print("Training interrotto manualmente")
    torch.save(model.state_dict(), "model_4_lastSave.pth")

Epoch [1] Training Loss: 2.0452, Training Accuracy: 16.88%, Validation Loss: 1.9824, Validation Accuracy: 26.25%
Epoch [2] Training Loss: 1.9238, Training Accuracy: 32.66%, Validation Loss: 1.8661, Validation Accuracy: 49.69%
Epoch [3] Training Loss: 1.7094, Training Accuracy: 55.55%, Validation Loss: 1.5404, Validation Accuracy: 79.38%
Epoch [4] Training Loss: 1.4278, Training Accuracy: 76.09%, Validation Loss: 1.2589, Validation Accuracy: 85.94%
Epoch [5] Training Loss: 1.0760, Training Accuracy: 86.41%, Validation Loss: 0.8950, Validation Accuracy: 92.50%
Epoch [6] Training Loss: 0.7332, Training Accuracy: 91.25%, Validation Loss: 0.5643, Validation Accuracy: 96.25%
Epoch [7] Training Loss: 0.4562, Training Accuracy: 96.17%, Validation Loss: 0.5322, Validation Accuracy: 94.38%
Epoch [8] Training Loss: 0.3261, Training Accuracy: 97.19%, Validation Loss: 0.2995, Validation Accuracy: 97.19%
Epoch [9] Training Loss: 0.2304, Training Accuracy: 97.50%, Validation Loss: 0.4347, Validation 

Funzione che legge un path da un file csv, prende l'immagine associata e ne predice il risultato e lo scrive su un file submission.csv

In [31]:
def model_ev(model_path):
    data_transforms = T.Compose([
        T.ToTensor(),
        T.CenterCrop(224)
    ])
    
    label_pre=[]
    model.load_state_dict(torch.load(model_path))
    model.to("cuda")
    model.eval()
    test_df = pd.read_csv('test.csv')
    output_file = 'submission.csv'
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['image', 'class']) 
        for i, (image_path, x_min, y_min, x_max, y_max) in test_df.iterrows():
            bbox = x_min, y_min, x_max, y_max
            image_ = os.path.join('AI/test', image_path.replace('/', '\\'))
            image = crop(image_,bbox)
            image = data_transforms(image)
            image = image.float()
            image = image.unsqueeze(0)
            image = image.cuda()
            with torch.no_grad():
                outputs = model(image)
                _, predicted = torch.max(outputs.data, 1)
                predicted_label = predicted.item()
                label_pre.append(predicted_label)
                writer.writerow([image_path, predicted_label])
    return label_pre

Funziione che ritorna le performance del modello sul test

In [7]:
def test(label_pre):
    class_df = pd.read_csv('class.csv')
    i = 0
    correct = 0
    for label in class_df.iterrows():
        if int(label[1].values) == label_pre[i]:
            correct = correct + 1
        else:
            print(i)
            
        i = i + 1    
    return correct/i

L'epoca 18 si distingue perché ha una perdita di validazione molto bassa e un'accuratezza di validazione molto alta e non siamo in zona di overfitting.
Dopo quest'epoca le prestazioni cominciano a peggiorare.
Risultati sui test 99.125%

In [36]:
model_path = 'model_4_17.pth'
model.load_state_dict(torch.load(model_path))

model=model.to(dev)
# Definizione deela funzione di loss e l'ottimizzatore
lr=10e-6
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0)
best_val_accuracy = 0.0
best_val_loss = 0.0
epoch = 0

#Loop di addestramento(puo essere interrotto)
try:
    while True:
        # Fase di addestramento
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for batch_images, batch_labels in train_loader:
            batch_images=transform(batch_images)
            batch_images = batch_images.to("cuda")
            batch_labels = batch_labels.to("cuda") 
            optimizer.zero_grad()
            outputs = model(batch_images)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            train_total += batch_labels.size(0)
            train_correct += (predicted == batch_labels).sum().item()
            train_loss += loss.item() * batch_images.size(0)

        train_accuracy = train_correct / train_total
        train_loss /= len(train_loader.dataset)
        # Fase di convalida
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for batch_images, batch_labels in val_loader:
                batch_images=transform(batch_images)
                batch_images = batch_images.to("cuda")
                batch_labels = batch_labels.to("cuda") 
                outputs = model(batch_images)
                _, predicted = torch.max(outputs.data, 1)
                val_loss += criterion(outputs, batch_labels)
                val_total += batch_labels.size(0)
                val_correct += (predicted == batch_labels).sum().item()

        val_accuracy = val_correct / val_total
        val_loss /= len(val_loader)
        scheduler.step(val_accuracy)
        torch.save(model.state_dict(), "model_4.1."+str(epoch)+".pth")
        print(f"Epoch [{epoch+1}] Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2%}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2%}")

        epoch += 1
        torch.cuda.empty_cache()

except KeyboardInterrupt:
    print("Training interrotto manualmente")
    torch.save(model.state_dict(), "model_4.1_lastSave.pth")

Epoch [1] Training Loss: 0.0324, Training Accuracy: 99.69%, Validation Loss: 0.0343, Validation Accuracy: 99.38%
Epoch [2] Training Loss: 0.0199, Training Accuracy: 99.77%, Validation Loss: 0.0193, Validation Accuracy: 99.69%
Epoch [3] Training Loss: 0.0248, Training Accuracy: 99.38%, Validation Loss: 0.0305, Validation Accuracy: 99.06%
Epoch [4] Training Loss: 0.0355, Training Accuracy: 98.98%, Validation Loss: 0.0420, Validation Accuracy: 98.75%
Epoch [5] Training Loss: 0.0138, Training Accuracy: 99.77%, Validation Loss: 0.0324, Validation Accuracy: 99.38%
Training interrotto manualmente


Anche qui è facile quale epoca scegliere, la numero 2, 99.75% sul test.Dopo questa epoca il modello comincia a mostrare segni di overfitting.
Adesso alleniamo solo il classificatore per vedere se possiamo aumentare ancora le performance.

In [48]:
model_path = "model_4.1.1.pth"
model.load_state_dict(torch.load(model_path))
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(n_inputs, num_classes),
    nn.LogSoftmax(dim=1)
)
model=model.to(dev)
# Definizione deela funzione di loss e l'ottimizzatore
lr=10e-4
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.1)
best_val_accuracy = 0.0
best_val_loss = 0.0
epoch = 0

#Loop di addestramento(puo essere interrotto)
try:
    while True:
        # Fase di addestramento
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for batch_images, batch_labels in train_loader:
            batch_images=transform(batch_images)
            batch_images = batch_images.to("cuda")
            batch_labels = batch_labels.to("cuda") 
            optimizer.zero_grad()
            outputs = model(batch_images)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            train_total += batch_labels.size(0)
            train_correct += (predicted == batch_labels).sum().item()
            train_loss += loss.item() * batch_images.size(0)

        train_accuracy = train_correct / train_total
        train_loss /= len(train_loader.dataset)
        # Fase di convalida
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for batch_images, batch_labels in val_loader:
                batch_images=transform(batch_images)
                batch_images = batch_images.to("cuda")
                batch_labels = batch_labels.to("cuda") 
                outputs = model(batch_images)
                _, predicted = torch.max(outputs.data, 1)
                val_loss += criterion(outputs, batch_labels)
                val_total += batch_labels.size(0)
                val_correct += (predicted == batch_labels).sum().item()

        val_accuracy = val_correct / val_total
        val_loss /= len(val_loader)
        scheduler.step(val_accuracy)
        torch.save(model.state_dict(), "model_4.1."+str(epoch)+".pth")
        print(f"Epoch [{epoch+1}] Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2%}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2%}")

        epoch += 1
        torch.cuda.empty_cache()

except KeyboardInterrupt:
    print("Training interrotto manualmente")
    torch.save(model.state_dict(), "model_4.1_lastSave.pth")

Epoch [1] Training Loss: 0.2316, Training Accuracy: 93.98%, Validation Loss: 0.0138, Validation Accuracy: 100.00%
Epoch [2] Training Loss: 0.0230, Training Accuracy: 99.38%, Validation Loss: 0.0190, Validation Accuracy: 99.69%
Epoch [3] Training Loss: 0.0269, Training Accuracy: 99.61%, Validation Loss: 0.0430, Validation Accuracy: 99.06%
Epoch [4] Training Loss: 0.0278, Training Accuracy: 99.61%, Validation Loss: 0.0500, Validation Accuracy: 99.38%
Epoch [5] Training Loss: 0.0567, Training Accuracy: 98.67%, Validation Loss: 0.0209, Validation Accuracy: 99.69%
Epoch [6] Training Loss: 0.0434, Training Accuracy: 98.98%, Validation Loss: 0.0370, Validation Accuracy: 99.06%
Epoch [7] Training Loss: 0.0204, Training Accuracy: 99.84%, Validation Loss: 0.2257, Validation Accuracy: 91.88%
Training interrotto manualmente


Anche qui dopo l'epoca 2 ci sono leggeri segni di overfitting che sono gia chiari dopo l'epoca 5. Risulati sul test 99.875%

In [32]:
model_path = "model_4.1.1.pth"
label_pre=model_ev(model_path)
test(label_pre)

331


0.99875