# Import

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import random
import pandas as pd
import os
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

from agent import CardRecognizer
from utils.Loader import CardsDataset
from utils.evaluator import Evaluator

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
recognizer = CardRecognizer(csv_file="cards.csv", device="cpu")
dataset = CardsDataset(scale=0.6, split="test", csv_file="cards.csv", target="labels")

In [3]:
true_labels = []
pred_labels = []

for i in range(len(dataset)):
    image, true_label = dataset.__getitem__(i)
    category, suit = recognizer.classify_card(image)
    pred_label = f"{category} of {suit}"
    true_labels.append(str(dataset.decode_label(true_label)))
    pred_labels.append(pred_label)

df = pd.DataFrame({"True labels": true_labels, "Pred labels": pred_labels})

In [4]:
unique_labels = df["True labels"].unique().tolist()
df["Pred labels"] = df["Pred labels"].apply(lambda x: unique_labels.index(x) if x in unique_labels else -1)
df["True labels"] = df["True labels"].apply(lambda x: unique_labels.index(x) if x in unique_labels else -1)
df.head()

Unnamed: 0,True labels,Pred labels
0,0,0
1,1,1
2,2,2
3,3,3
4,4,4


In [5]:
def plot_and_save_confusion_matrix(true_labels, pred_labels, save_path, num_parameters):
    cm = confusion_matrix(true_labels, pred_labels)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.xlabel("Etiquetas Predichas")
    plt.ylabel("Etiquetas Verdaderas")
    plt.title(f"Matriz de Confusión. Num.Param{num_parameters}")
    plt.savefig(save_path)
    plt.close()
    
def evaluate_model(y_pred: np.array, y_test: np.array, class_names_str: list):
    df_result = pd.DataFrame({"Prediction": y_pred, "GroundTruth": y_test})
    df_result["Prediction"] = df_result["Prediction"].apply(lambda x: class_names_str[x])
    df_result["GroundTruth"] = df_result["GroundTruth"].apply(lambda x: class_names_str[x])
    return Evaluator.evaluate_classification_metrics(df_result)

In [6]:
plot_and_save_confusion_matrix(
    df["True labels"].tolist(),
    df["Pred labels"].tolist(),
    save_path="result/confusion_matrix.png",
    num_parameters=recognizer.size()
)

In [7]:
df_metrics = evaluate_model(df["Pred labels"], df["True labels"], range(len(unique_labels)))
df_metrics["Clase"] = df_metrics.index.to_series().iloc[0:len(unique_labels)].apply(lambda x: unique_labels[x])
df_metrics = df_metrics[["Clase"] + [col for col in df_metrics.columns if col != "Clase"]]
df_metrics.to_csv("result/metrics.csv", index=False)

# MobileNetV2

In [None]:
from torchvision import models, transforms
from torch.utils.data import DataLoader

# Load MobileNet model
mobilenet = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)

# Modify the classifier for the number of classes in CardsDataset
num_classes = len(unique_labels)
mobilenet.classifier[1] = nn.Linear(mobilenet.last_channel, num_classes)

# Define transformations for the dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Apply transformations to the dataset
# Create training and validation datasets
train_dataset = CardsDataset(scale=1, split="train", csv_file="cards.csv", target="labels", transform=transform, convert="RGB")
valid_dataset = CardsDataset(scale=1, split="valid", csv_file="cards.csv", target="labels", transform=transform, convert="RGB")

# Create dataloaders for training and validation
train_dataloader = DataLoader(train_dataset, batch_size=12, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=12, shuffle=False)

# Define optimizer and loss function
optimizer = optim.Adam(mobilenet.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Fine-tune the model
mobilenet.to(DEVICE)
mobilenet.train()
for epoch in range(5):  # Number of epochs
    running_loss = 0.0
    for images, labels in train_dataloader:
        optimizer.zero_grad()
        outputs = mobilenet(images.to(DEVICE))
        loss = criterion(outputs, labels.to(DEVICE).float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
    mobilenet.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in valid_dataloader:
            outputs = mobilenet(images.to(DEVICE))
            loss = criterion(outputs, labels.to(DEVICE).float())
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            
            val_total += labels.size(0)
            val_correct += (predicted.to(DEVICE) == torch.argmax(labels.to(DEVICE), dim=1)).sum().item()
    val_loss_avg = val_loss / len(valid_dataloader)
    val_precision = val_correct / val_total if val_total > 0 else 0
    mobilenet.train()
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_dataloader):.4f}, Validation Loss: {val_loss_avg:.4f}, Validation Precision: {val_precision:.4f}")

print("Number of parameters in the model:", sum(p.numel() for p in mobilenet.parameters()))

test_dataset = CardsDataset(scale=1, split="test", csv_file="cards.csv", target="labels", transform=transform, convert="RGB")

mobilenet.eval()

pred_labels = []
true_labels = []
for images, labels in test_dataset:
    outputs = mobilenet(images.unsqueeze(0).to(DEVICE))
    pred_labels.append(test_dataset.decode_label(outputs.detach().cpu()))
    true_labels.append(test_dataset.decode_label(labels.detach().cpu()))

print("Pred labels:", len(pred_labels))
print("True labels:", len(true_labels))
df = pd.DataFrame({"True labels": true_labels, "Pred labels": pred_labels})

unique_labels = df["True labels"].unique().tolist()
df["Pred labels"] = df["Pred labels"].apply(lambda x: unique_labels.index(x) if x in unique_labels else -1)
df["True labels"] = df["True labels"].apply(lambda x: unique_labels.index(x) if x in unique_labels else -1)

df_metrics = evaluate_model(df["Pred labels"], df["True labels"], range(len(unique_labels)))
df_metrics["Clase"] = df_metrics.index.to_series().iloc[0:len(unique_labels)].apply(lambda x: unique_labels[x])
df_metrics = df_metrics[["Clase"] + [col for col in df_metrics.columns if col != "Clase"]]
df_metrics.to_csv("result/mobilnet_metrics.csv", index=False)

Epoch 1, Loss: 1.5005, Validation Loss: 0.5024, Validation Precision: 0.8615
Epoch 2, Loss: 0.5604, Validation Loss: 0.2303, Validation Precision: 0.9269
Epoch 3, Loss: 0.3879, Validation Loss: 0.1712, Validation Precision: 0.9462
Epoch 4, Loss: 0.3020, Validation Loss: 0.2076, Validation Precision: 0.9346
Epoch 5, Loss: 0.2461, Validation Loss: 0.1098, Validation Precision: 0.9654
Epoch 6, Loss: 0.2354, Validation Loss: 0.1256, Validation Precision: 0.9654


KeyboardInterrupt: 

# Vit-b

In [None]:
from torchvision.models import vit_b_16, ViT_B_16_Weights
from torch.utils.data import DataLoader
import pandas as pd

# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Load a pretrained ViT-B/16 and replace its head
weights = ViT_B_16_Weights.IMAGENET1K_V1
vit = vit_b_16(weights=weights)

# Number of classes
num_classes = len(unique_labels)

# Replace the classification head
in_features = vit.heads.head.in_features
vit.heads.head = nn.Linear(in_features, num_classes)

# 2. Define transforms (use the ViT‐specific mean/std from the weights metadata)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 3. Prepare datasets & dataloaders
train_ds = CardsDataset(scale=1, split="train", csv_file="cards.csv",
                        target="labels", transform=transform, convert="RGB")
valid_ds = CardsDataset(scale=1, split="valid", csv_file="cards.csv",
                        target="labels", transform=transform, convert="RGB")

train_loader = DataLoader(train_ds, batch_size=100, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=100, shuffle=False)

# 4. Optimizer & loss
optimizer = optim.Adam(vit.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# 5. Training loop
vit.to(DEVICE)
for epoch in range(5):
    vit.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = vit(images.to(DEVICE))
        loss = criterion(outputs, labels.to(DEVICE).float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    vit.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    # Validation
    with torch.no_grad():
        for images, labels in valid_dataloader:
            outputs = vit(images.to(DEVICE))
            loss = criterion(outputs, labels.to(DEVICE).float())
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            
            val_total += labels.size(0)
            val_correct += (predicted.to(DEVICE) == torch.argmax(labels.to(DEVICE), dim=1)).sum().item()
    val_loss_avg = val_loss / len(valid_dataloader)
    val_precision = val_correct / val_total if val_total > 0 else 0
    vit.train()
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_dataloader):.4f}, Validation Loss: {val_loss_avg:.4f}, Validation Precision: {val_precision:.4f}")

print("Number of parameters in the model:", sum(p.numel() for p in vit.parameters()))

test_dataset = CardsDataset(scale=1, split="test", csv_file="cards.csv", target="labels", transform=transform, convert="RGB")

vit.eval()

pred_labels = []
true_labels = []
for images, labels in test_dataset:
    outputs = vit(images.unsqueeze(0).to(DEVICE))
    pred_labels.append(test_dataset.decode_label(outputs.detach().cpu()))
    true_labels.append(test_dataset.decode_label(labels.detach().cpu()))

print("Pred labels:", len(pred_labels))
print("True labels:", len(true_labels))
df = pd.DataFrame({"True labels": true_labels, "Pred labels": pred_labels})

unique_labels = df["True labels"].unique().tolist()
df["Pred labels"] = df["Pred labels"].apply(lambda x: unique_labels.index(x) if x in unique_labels else -1)
df["True labels"] = df["True labels"].apply(lambda x: unique_labels.index(x) if x in unique_labels else -1)

df_metrics = evaluate_model(df["Pred labels"], df["True labels"], range(len(unique_labels)))
df_metrics["Clase"] = df_metrics.index.to_series().iloc[0:len(unique_labels)].apply(lambda x: unique_labels[x])
df_metrics = df_metrics[["Clase"] + [col for col in df_metrics.columns if col != "Clase"]]
df_metrics.to_csv("result/vit_metrics.csv", index=False)

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 3.69 GiB of which 2.88 MiB is free. Process 92786 has 946.00 MiB memory in use. Including non-PyTorch memory, this process has 2.74 GiB memory in use. Of the allocated memory 2.25 GiB is allocated by PyTorch, and 396.15 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)