In [1]:
import torch
from scripts.NN import LetterRecognitionModel

# from scripts.training import train_model
from scripts.dataset import LetterDataset
import torch.optim as optim
from torch.utils.data import DataLoader
import pandas as pd
import torch.nn as nn
import torch
from tqdm.auto import tqdm

In [32]:
LR = 3e-4
BATCH_SIZE = 32
NUM_EPACHS = 50

# Initialize dataset
dataset = pd.read_pickle("data/multi_shape_dataset.pkl")  # .to_dict("records")
# check if all images are the same shape
shapes = dataset.image.apply(lambda x: x.shape)
assert len(set(shapes)) == 1, "All images must be the same shape"

dataset = LetterDataset(dataset.to_dict("records"))

# Initialize data loader
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Initialize model
model = LetterRecognitionModel(dataset.total_letters())

# load weights
model.load_state_dict(torch.load("letter_recognition_model.pt", weights_only=True))

# Initialize optimizer
optimizer = optim.Adam(model.parameters(), lr=LR)
criterion_letter = nn.MSELoss()

# Train model
pbar = tqdm(total=NUM_EPACHS * len(train_loader), desc="Training")
for epoch in range(NUM_EPACHS):
    total_loss = []
    for image, letter in train_loader:
        optimizer.zero_grad()
        predicted_letter = model(image)
        loss = criterion_letter(predicted_letter, letter)
        loss.backward()
        optimizer.step()
        pbar.update(1)
        total_loss.append(loss.item())
        pbar.set_postfix(
            {
                "Epoch": f"{epoch+1}/{NUM_EPACHS}",
                "Total Loss": f"{(sum(total_loss)/len(total_loss)):.4f}",
                "Loss": f"{loss.item():.4f}",
            }
        )
    # if all([loss < 1e-5 for loss in total_loss]):
    #     break
pbar.close()
torch.save(model.state_dict(), "letter_recognition_model.pt")

Training:   0%|          | 0/4850 [00:00<?, ?it/s]

In [3]:
model.load_state_dict(torch.load("letter_recognition_model.pt", weights_only=True))

<All keys matched successfully>

In [33]:
# import random
# import matplotlib.pyplot as plt
import numpy as np

_dataset = pd.read_pickle("data/multi_shape_dataset.pkl").to_dict("records")


def predict_letter(model, image):
    image = torch.tensor(image).unsqueeze(0).unsqueeze(0).float()
    letter = model(image)
    letter = letter.argmax().item()
    # font = font.argmax().item()
    return letter


idx_to_letter = {l: i for i, l in dataset.letter_to_idx.items()}


def predict_random_letter(model, idx):
    model.eval()
    model = model.cpu()
    sample = _dataset[idx]
    image: np.ndarray = sample["image"]
    letter = sample["letter"]
    # font = sample["font"]
    predicted_letter = predict_letter(model, image)
    return idx_to_letter[predicted_letter] == letter  # or predicted_font == font
    # print(f"[{predicted}] => Predicted: {idx_to_letter[predicted]}")
    # print(f"True: {letter}")


results = [predict_random_letter(model, i) for i in range(len(_dataset))]
print(f"Accuracy: {sum(results) / len(results)}")

Accuracy: 0.9767741935483871


In [34]:
if sum(results) / len(results) > 0.95:
    # Save model
    torch.save(model.state_dict(), f"best_letter_recognition_model.pt")
    print("Model saved")

Model saved
