In [1]:
!pip install kaggle
!kaggle datasets download -d guare13/handwritten-letters-recognition
!unzip handwritten-letters-recognition.zip

[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
  inflating: valid_letters/letter_3363.png  
  inflating: valid_letters/letter_33630.png  
  inflating: valid_letters/letter_33631.png  
  inflating: valid_letters/letter_33632.png  
  inflating: valid_letters/letter_33633.png  
  inflating: valid_letters/letter_33634.png  
  inflating: valid_letters/letter_33635.png  
  inflating: valid_letters/letter_33636.png  
  inflating: valid_letters/letter_33637.png  
  inflating: valid_letters/letter_33638.png  
  inflating: valid_letters/letter_33639.png  
  inflating: valid_letters/letter_3364.png  
  inflating: valid_letters/letter_33640.png  
  inflating: valid_letters/letter_33641.png  
  inflating: valid_letters/letter_33642.png  
  inflating: valid_letters/letter_33643.png  
  inflating: valid_letters/letter_33644.png  
  inflating: valid_letters/letter_33645.png  
  inflating: valid_letters/letter_33646.png  
  inflating: valid_letters/letter_33648.png  
  infla

In [2]:
import torch
import tensorflow as tf
import os
import cv2
import imghdr
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from itertools import chain
import random
import shutil
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms
import torch.nn.functional as F

In [5]:
train = pd.read_csv('written_letters_train.csv')
validation = pd.read_csv('written_letters_validation.csv')

In [6]:
characters=set()
train['nombre']=train['nombre'].apply(lambda x: str(x))
for i in train['nombre'].values:
    for j in i :
        if j not in characters :
            characters.add(j)
characters=sorted(characters)

In [7]:
# 2 Dictionaries  :   Turn all ur characters to num and vice versa
char_to_label = {char:label for label,char in enumerate(characters)}
label_to_char = {label:char for label,char in enumerate(characters)}

In [8]:
path_val = "valid_letters"
path_test = "test_letters"
path_train = "train_letters"

In [19]:
class DataGenerator(Dataset):
    def __init__(self, dataframe, path, char_map, batch_size=128, img_size=(14,10), downsample_factor=4):
        self.dataframe = dataframe
        self.path = path
        self.char_map = char_map
        self.batch_size = batch_size
        self.img_size = img_size
        self.downsample_factor = downsample_factor

    def __len__(self):
        return len(self.dataframe)//self.batch_size

    def __getitem__(self, idx):
        img_path = self.dataframe['id'].values[idx]
        img = cv2.imread(self.path + '/' + img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.resize(img, self.img_size)
        img = (img / 255).astype(np.float32)
        img = torch.from_numpy(img).unsqueeze(0)  # Convertim a tensor i afegim dimensió de canal
        text = self.dataframe['nombre'].values[idx]
        text = str(text)
        label = []
        if text in self.char_map:
            label.append(self.char_map[text])
        else:
            label.append(100)
        label = torch.tensor(label)
        return img, label

# Exemple d'ús:
# dataset = CustomDataset(dataframe, path, char_map)
# dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [20]:
train_generator=DataGenerator(train,path_train,char_to_label)
validation_generator=DataGenerator(validation,path_val,char_to_label)

In [27]:
# Defineix el model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        """
        Entren imatges amb 3 canals de color (RGB) i amb aquesta capa convolucional
        s'estreuen característiques loclas de la imatge detectant patrons com bordes,
        textures i altres detalls que poden ser importants.
        """
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        """
        El max pooling redueix la dimensionalitat dels mapas de característiques,
        disminuint la mida de les imatges i retenint les característiques més importants
        Això ajuda a reduir el sobreajustament i millora l'eficàcia computacional.
        """
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        """
        Les següents dues capes s'ecarreguen de la classificació final. Transformen
        les característiques extretes per les capas convolucionals i de pooling a les
        propietats de les diferents classes (lletres).
        """
        self.fc1 = nn.Linear(32 * 7 * 5, 128)
        self.fc2 = nn.Linear(128, 29)  # 29 perquè hi ha 29 caràcters a l'alfabet

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(-1, 32 * 7 * 5)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [28]:
# Instancia el model, defineix la pèrdua i l'optimitzador
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [32]:
# Entrenament del model
num_epochs = 10
train_losses = []
train_accuracies = []
for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_generator:
        optimizer.zero_grad()
        outputs = model(images)
        labels = torch.tensor(labels, dtype=torch.long)  # Assegura't que els labels són tensors de tipus long
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Calcular la precisión y la pérdida promedio en cada época
    epoch_loss = running_loss / len(train_generator)
    epoch_accuracy = correct / total

    # Guardar la pérdida y la precisión en las listas
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_generator)}")


  labels = torch.tensor(labels, dtype=torch.long)  # Assegura't que els labels són tensors de tipus long


Epoch 1/10, Loss: 26.93720228425084
Epoch 2/10, Loss: 26.951640730776717
Epoch 3/10, Loss: 26.718460141836523
Epoch 4/10, Loss: 25.982473199899147
Epoch 5/10, Loss: 26.297545850393256


KeyboardInterrupt: 

In [None]:
# Entrenar el modelo
num_epochs = 10
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []

model = SimpleCNN()  # Instancia el modelo
criterion = nn.CrossEntropyLoss()  # Función de pérdida
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)  # Optimizador

for epoch in range(num_epochs):
    # Entrenamiento
    model.train()  # Pone el modelo en modo de entrenamiento
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_generator:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_generator)
    epoch_accuracy = correct / total
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)

    # Evaluación en el conjunto de pruebas
    model.eval()  # Pone el modelo en modo de evaluación
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in validation_generator:
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(validation_generator)
    epoch_accuracy = correct / total
    test_losses.append(epoch_loss)
    test_accuracies.append(epoch_accuracy)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_accuracies[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}, Test Acc: {test_accuracies[-1]:.4f}")

# Guardar el modelo entrenado
torch.save(model.state_dict(), 'simple_cnn_model.pth')

# Crear la gráfica de pérdida y precisión
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs+1), train_losses, label='Training Loss')
plt.plot(range(1, num_epochs+1), test_losses, label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss Over Epochs')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs+1), train_accuracies, label='Training Accuracy')
plt.plot(range(1, num_epochs+1), test_accuracies, label='Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy Over Epochs')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


Epoch 1/10, Train Loss: 68.0906, Train Acc: 0.8661, Test Loss: 55.5741, Test Acc: 0.8979


In [None]:
def compare_letter_pairs(true_labels, predicted_labels):
    if len(true_labels) != len(predicted_labels):
        raise ValueError("Les llistes han de tenir la mateixa longitud")

    total_pairs = len(true_labels)
    same_letter_count = 0
    total_letters = 0

    for label1, label2 in zip(true_labels, predicted_labels):
        if label1 == label2:
            same_letter_count += 1

        total_letters += 1

    letter_percentage = (same_letter_count / total_letters) * 100

    return same_letter_count, letter_percentage

# Exemple d'ús amb les prediccions del model
true_labels = []
predicted_labels = []

with torch.no_grad():
    for images, labels in validation_generator:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())

same_letter_count, letter_percentage = compare_letter_pairs(true_labels, predicted_labels)
print(f'Nombre de lletres correctes: {same_letter_count}')
print(f'Percentatge de lletres correctes: {letter_percentage}%')

In [None]:
# Crear la gráfica de pérdida
plt.plot(range(1, num_epochs+1), train_losses, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss Over Epochs')
plt.legend()
plt.grid(True)
plt.show()

# Crear la gráfica de precisión
plt.plot(range(1, num_epochs+1), train_accuracies, label='Training Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training Accuracy Over Epochs')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Guarda el model entrenat
torch.save(model.state_dict(), 'cnn_handwritten_letters_model.pth')

# Funció per mostrar les mètriques d'entrenament
def plot_history(history):
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_history(history)