In [1]:
import os
import pandas as pd
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from PIL import Image
from torch import nn, optim
import matplotlib.pyplot as plt
import yaml



In [2]:
class CardDataset(Dataset):
    def __init__(self, images_dir, labels_dir, transform=None, yaml_file="C:/Users/ale03/OneDrive/Desktop/Progetto di AI/archive/data.yaml"):
        """
        Args:
            images_dir (string): Percorso alla directory con le immagini.
            labels_dir (string): Percorso alla directory con le etichette.
            transform (callable, optional): Trasformazioni da applicare alle immagini.
            yaml_file (string): Percorso al file YAML contenente le classi.
        """
        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.transform = transform

        try:
            with open(yaml_file, "r") as file:
                data = yaml.safe_load(file)
                self.classes = data.get("names", [])  # La lista di classi del file data.yaml
                print(f"Classi caricate: {self.classes}")
        except Exception as e:
            print(f"Errore nel caricamento del file YAML: {e}")
            self.classes = []

    def __len__(self):
        return len(os.listdir(self.images_dir))

    def __getitem__(self, idx):
        # Ottieni il nome dell'immagine e il percorso
        img_name = os.path.join(self.images_dir, os.listdir(self.images_dir)[idx])
        label_name = os.path.join(self.labels_dir, os.listdir(self.labels_dir)[idx].replace(".jpg", ".txt"))
        
        # Leggi le etichette dal file .txt
        with open(label_name, 'r') as file:
            label_info = file.readline().split()
        
        image = Image.open(img_name).convert("RGB")
        label = int(label_info[0])  # La prima colonna è l'etichetta numerica
        xmin, ymin, xmax, ymax = map(float, label_info[1:])

        # Normalizza il bounding box
        image_width, image_height = image.size
        bbox = torch.tensor([xmin / image_width, ymin / image_height, xmax / image_width, ymax / image_height], dtype=torch.float)

        if self.transform:
            image = self.transform(image)
        
        return image, bbox, label


In [3]:
def collate_fn(batch):
    # Filtra gli elementi None
    batch = [item for item in batch if item is not None]
    if len(batch) == 0:  # Nessun elemento valido
        return None
    images, labels = zip(*batch)
    return torch.stack(images, 0), torch.tensor(labels)


# Definizione del dataset personalizzato

In [4]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)  # Adatta al tuo input
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN(num_classes=52)


# Trasformazioni per le immagini

In [5]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Dataset e DataLoader

In [6]:
# Percorsi per le immagini e le etichette
train_images_dir = "C:/Users/ale03/OneDrive/Desktop/Progetto di AI/archive/train/images"
train_labels_dir = "C:/Users/ale03/OneDrive/Desktop/Progetto di AI/archive/train/labels"

# Trasformazioni per il dataset
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Creazione del dataset e del dataloader
train_dataset = CardDataset(images_dir=train_images_dir, labels_dir=train_labels_dir, transform=train_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Test del DataLoader
for images, bboxes, labels in train_loader:
    print(f"Batch di immagini: {images.size()}")
    print(f"Bounding box: {bboxes}")
    print(f"Labels: {labels}")
    break

Classi caricate: ['10c', '10d', '10h', '10s', '2c', '2d', '2h', '2s', '3c', '3d', '3h', '3s', '4c', '4d', '4h', '4s', '5c', '5d', '5h', '5s', '6c', '6d', '6h', '6s', '7c', '7d', '7h', '7s', '8c', '8d', '8h', '8s', '9c', '9d', '9h', '9s', 'Ac', 'Ad', 'Ah', 'As', 'Jc', 'Jd', 'Jh', 'Js', 'Kc', 'Kd', 'Kh', 'Ks', 'Qc', 'Qd', 'Qh', 'Qs']
Batch di immagini: torch.Size([32, 3, 224, 224])
Bounding box: tensor([[4.7095e-04, 9.3322e-04, 1.5891e-04, 7.8009e-05],
        [1.2453e-03, 1.1933e-03, 1.6758e-04, 7.2231e-05],
        [2.0109e-03, 8.7833e-04, 2.3981e-04, 1.0690e-04],
        [6.9631e-04, 1.1095e-03, 2.4270e-04, 1.0112e-04],
        [5.9229e-04, 8.1765e-04, 1.0401e-04, 1.5602e-04],
        [1.2048e-03, 7.0208e-04, 8.0899e-05, 1.6469e-04],
        [5.9229e-04, 7.7432e-04, 1.9936e-04, 1.8491e-04],
        [9.3900e-04, 5.9229e-04, 2.3114e-04, 1.2424e-04],
        [6.7897e-04, 1.7653e-03, 2.0803e-04, 1.9069e-04],
        [1.9791e-03, 8.0032e-04, 2.3981e-04, 9.8234e-05],
        [1.1239e-03, 1.

# Modello AlexNet pre-addestrato

In [7]:
from torchvision import models
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 52)




# Sposta il modello sulla GPU se disponibile

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Funzione di perdita e ottimizzatore

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Ciclo di addestramento

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

epochs = 5
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, bboxes, labels in train_loader:  # Adatta al dataset con 3 valori
        label_indices = labels.to(device)  # Le etichette sono già numeriche
        
        images = images.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, label_indices)  # Usa le etichette numeriche come target
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == label_indices).sum().item()
        total += label_indices.size(0)

    train_accuracy = 100 * correct / total
    print(f"Epoca {epoch + 1}, Loss: {running_loss / len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%")


Epoca 1, Loss: 3.9311, Train Accuracy: 3.78%
Epoca 2, Loss: 3.0591, Train Accuracy: 28.01%
Epoca 3, Loss: 2.1557, Train Accuracy: 62.34%
Epoca 4, Loss: 1.2275, Train Accuracy: 89.47%
Epoca 5, Loss: 0.5203, Train Accuracy: 99.03%


# Valutazione sul test set

In [11]:
# Caricamento del dataset di test
test_images_dir = "C:/Users/ale03/OneDrive/Desktop/Progetto di AI/archive/test/images"
test_labels_dir = "C:/Users/ale03/OneDrive/Desktop/Progetto di AI/archive/test/labels"

test_dataset = CardDataset(images_dir=test_images_dir, labels_dir=test_labels_dir, transform=train_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Valutazione del modello
model.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
    for images, bboxes, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        test_correct += (preds == labels).sum().item()
        test_total += labels.size(0)

test_accuracy = 100 * test_correct / test_total
print(f"Test Accuracy: {test_accuracy:.2f}%")


Classi caricate: ['10c', '10d', '10h', '10s', '2c', '2d', '2h', '2s', '3c', '3d', '3h', '3s', '4c', '4d', '4h', '4s', '5c', '5d', '5h', '5s', '6c', '6d', '6h', '6s', '7c', '7d', '7h', '7s', '8c', '8d', '8h', '8s', '9c', '9d', '9h', '9s', 'Ac', 'Ad', 'Ah', 'As', 'Jc', 'Jd', 'Jh', 'Js', 'Kc', 'Kd', 'Kh', 'Ks', 'Qc', 'Qd', 'Qh', 'Qs']


Test Accuracy: 10.68%


# Visualizzazione delle predizioni

In [12]:
def show_predictions(dataloader, model, class_names, num_images=5):
    model.eval()
    images_shown = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            for i in range(images.size(0)):
                if images_shown >= num_images:
                    return

                img = images[i].cpu().permute(1, 2, 0).numpy()
                img = img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]  # De-normalizza
                img = np.clip(img, 0, 1)

                plt.imshow(img)
                plt.title(f"Predetto: {class_names[preds[i]]}, Reale: {class_names[labels[i]]}")
                plt.axis('off')
                plt.show()
                images_shown += 1


# Mostra alcune predizioni

In [13]:
import os

def show_predictions(dataloader, model, class_names, output_dir="archive", num_images=10):
    model.eval()
    images_shown = 0

    # Creare la cartella per salvare le immagini, se non esiste
    os.makedirs(output_dir, exist_ok=True)

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            for i in range(images.size(0)):
                if images_shown >= num_images:
                    return

                # Processa l'immagine
                img = images[i].cpu().permute(1, 2, 0).numpy()
                img = img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]  # Denormalizza
                img = np.clip(img, 0, 1)

                # Nome file per salvare l'immagine
                filename = os.path.join(
                    output_dir,
                    f"predicted_{class_names[preds[i]]}_actual_{class_names[labels[i]]}_{images_shown}.png"
                )

                # Salva l'immagine
                plt.imsave(filename, img)
                print(f"Immagine salvata: {filename}")

                images_shown += 1
