In [67]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys
import os
import time
from PIL import Image

import torch
from torchvision import transforms
from tqdm import tqdm

In [68]:
gt = pd.read_csv('ground_truth/train_small.csv', header=None)
gt.columns = ['image', 'label']

In [None]:
def augment_image(image_path, num_augmentations=5):
    """
    Esegue data augmentation su un'immagine.
    
    Args:
        image_path (str): Percorso dell'immagine di input.
        num_augmentations (int): Numero di immagini augmentate da generare.
    
    Returns:
        list: Lista di immagini augmentate (in formato PIL).
    """
    # Trasformazioni per data augmentation
    augmentation_transforms = transforms.Compose([
        transforms.RandomHorizontalFlip(p=0.5),  # Flip orizzontale casuale
        transforms.RandomRotation(degrees=30),   # Rotazione casuale di ±30°
        transforms.RandomResizedCrop(size=(244, 244), scale=(0.8, 1.0)),  # Crop casuale
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Variazione di colore
    ])

    # Caricamento dell'immagine originale
    original_image = Image.open(image_path).convert("RGB")

    # Genera immagini augmentate
    augmented_images = [augmentation_transforms(original_image) for _ in range(num_augmentations)]

    return augmented_images


In [70]:
train_image = []
for i in tqdm(gt['image']):
    imgs = augment_image('train_set/'+i)
    train_image.append(imgs)

x_train = np.array(train_image)
x_train = x_train.reshape(-1, 244, 244, 3)

100%|██████████| 5020/5020 [02:43<00:00, 30.72it/s]


KeyboardInterrupt: 

In [37]:
def one_hot_encoding(image_labels):
    one_hot_encoded_labels = []
    for label in image_labels:
        y = np.zeros(251)
        y[int(label)] = 1
        one_hot_encoded_labels.append(y)
    one_hot_encoded_labels = np.array(one_hot_encoded_labels)
    return one_hot_encoded_labels

image_labels = gt['label']
y_train = one_hot_encoding(image_labels)

In [38]:
input_shape = x_train[0].shape
num_classes = len(gt['label'].unique())
print(f'input_shape: {input_shape}, num_classes: {num_classes}')

input_shape: (244, 244, 3), num_classes: 251


In [None]:
print(f'x_train.shape: {x_train.shape}, y_train.shape: {y_train.shape}')

x_train.shape: (5020, 244, 244, 3), y_train.shape: (5020, 251)


### Prova predizione con ResNet18

In [25]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image


num_classes = 251
batch_size = 32
learning_rate = 0.001

model = models.resnet18(pretrained=True)

model.fc = nn.Linear(model.fc.in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)



In [40]:
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
# Preprocess x_train
x_train = torch.tensor(x_train, dtype=torch.float32).permute(0, 3, 1, 2)  # (5020, 3, 244, 244)

# Convert y_train from one-hot to scalar labels
y_train = torch.argmax(torch.tensor(y_train, dtype=torch.float32), dim=1)  # (5020,)

# Create DataLoader
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [41]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

print("Training completato!")

Epoch [1/5], Loss: 5.7168
Epoch [2/5], Loss: 5.3369
Epoch [3/5], Loss: 5.0363
Epoch [4/5], Loss: 4.7877
Epoch [5/5], Loss: 4.5723
Training completato!


### Prova predizione con modello classico

In [50]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_loss(history):
  x_plot = list(range(1,len(history.history["loss"])+1))
  plt.figure()
  plt.title("Loss")
  plt.xlabel('Epochs')
  plt.ylabel('Loss')
  plt.plot(x_plot, history.history['loss'])
  plt.plot(x_plot, history.history['val_loss'])
  plt.legend(['Training', 'Validation'])
  plt.ylim(0, 1)

def plot_accuracy(history):
  x_plot = list(range(1,len(history.history["accuracy"])+1))
  plt.figure()
  plt.title("Accuracy")
  plt.xlabel('Epochs')
  plt.ylabel('Accuracy')
  plt.plot(x_plot, history.history['accuracy'])
  plt.plot(x_plot, history.history['val_accuracy'])
  plt.legend(['Training', 'Validation'])
  plt.ylim(0, 1)

def display_error_examples(real_images, true_label, predicted_label):
  n_images = 4
  i = 1
  plt.figure(figsize=(10, 10))
  for j in range(len(true_label)):
      if true_label[j].argmax(axis=-1) != predicted_label[j]:
          plt.subplot(n_images, 2, i)
          i += 1
          real_image = real_images[j].reshape(28, 39)
          plt.title(f' Real: {true_label[j].argmax(axis=-1)} Predicted: {predicted_label[j]}')
          plt.imshow(real_image, cmap='gray')
          plt.axis('off')
          if i > 2 * n_images:
              break

  plt.tight_layout()
  plt.show()

### Test dei vari modelli

In [49]:
test_gt = pd.read_csv('ground_truth/val_info.csv', header=None)
test_gt.columns = ['image', 'label']
test_gt = test_gt.iloc[:502]

In [50]:
test_image = []
for image in test_gt['image']:
    img = Image.open('val_set/'+image)
    img = transform_image(img)
    test_image.append(img)

x_test = np.array(test_image)
print(x_test.shape)

(502, 244, 244, 3)


In [51]:
test_image_labels = test_gt['label']
y_test = one_hot_encoding(test_image_labels)
print(y_test.shape)

(502, 251)


## Accuracy sul modello easy

In [None]:
accuracy = model.evaluate(x_test, y_test)

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - accuracy: 0.0062 - loss: 51.4084


## Accuracy su ResNet18

In [47]:
torch.save(model.state_dict(), 'model.pth')

In [52]:
# Preprocess x_test
x_test = torch.tensor(x_test, dtype=torch.float32).permute(0, 3, 1, 2)  # (N, C, H, W)

# Convert y_test from one-hot to scalar labels
y_test = torch.argmax(torch.tensor(y_test, dtype=torch.float32), dim=1)  # (N,)

# Create DataLoader for test data
test_dataset = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluate the model
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Disable gradient computation
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Get the class with highest score
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate accuracy
accuracy = 100 * correct / total
print(f"Accuracy on test set: {accuracy:.2f}%")

Accuracy on test set: 4.38%
