# English Character Recognition in Natural Images

## Load dataset for the training process

In [1]:
import json
import os

import cv2
import numpy as np
import pandas as pd

from typing import List

### Load the class names mapping

In [2]:
DATASET_DIR = os.path.join("dataset")
with open(os.path.join(DATASET_DIR, "class_names.json"), "r") as fp:
    class_names = json.load(fp)
    class_names = {int(k): v for k, v in class_names.items()}

### Create PyTorch dataset with an image preprocessing pipeline

In [3]:
import torch

from chars74k import Chars74kDataset
from torch.utils.data import DataLoader

In [4]:
ds_train = Chars74kDataset(DATASET_DIR, "train")
ds_val = Chars74kDataset(DATASET_DIR, "val")

### Create PyTorch data loader

In [5]:
BATCH_SIZE = 32
loader_train = DataLoader(
    ds_train, batch_size=BATCH_SIZE, num_workers=8, shuffle=True, drop_last=True) # drop last for stability
loader_val = DataLoader(ds_val, batch_size=BATCH_SIZE, num_workers=8)

## Model training

In [6]:
import models

from datetime import datetime
from torch.utils.tensorboard import SummaryWriter
from tqdm.notebook import tqdm
from sklearn.metrics import f1_score, accuracy_score

In [7]:
MAX_EPOCHS = 100
INIT_LR = 1e-3

### Which device we will use for training process (CPU/GPU)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


### Create the model

In [9]:
model = models.CNN5HiddenLayers(n_classes=len(class_names))
# Move the model from CPU to the device
# Actually, only required if the device is not CPU and has no effect if it is CPU
model = model.to(device)

### Define the loss function and the optimizer

In [10]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=INIT_LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, min_lr=1e-9)

### Prepare the logger

In [11]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
save_dir = os.path.join('runs_clf', 'train_{}'.format(timestamp))
os.makedirs(save_dir, exist_ok=True)
writer = SummaryWriter(save_dir)
print(f"Saving model weights and logs to: {save_dir}")

Saving model weights and logs to: runs_clf/train_20221001_060509


### The training and validation process

During the training process, launch tensorboard to see the logged train/val metrics
```bash
tensorboard --logdir runs_clf
```
Then, open the link using web browser

In [12]:
# Variables to hold some training status
epoch_number = 0
lowest_loss = np.inf
best_f1 = 0.
# Training loop
for epoch in tqdm(range(MAX_EPOCHS)):
    # Make sure gradient tracking is on, and do a pass over the data for the training process
    model.train()
    running_loss = 0.
    for i, data in enumerate(loader_train):
        # Every data instance is an input & label pair
        inputs, labels = data
        # We move the data instance from CPU to the device
        inputs = inputs.to(device)
        labels = labels.to(device)
        # Zero your gradients for every batch!
        optimizer.zero_grad()
        # Make predictions for this batch
        outputs = model(inputs)
        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()
        # Adjust learning weights
        optimizer.step()
        # Gather data and report
        running_loss += loss.detach().item()
    # Calculate the average training loss
    avg_loss = running_loss / (i + 1)

    # We don't need gradients for the model validation process
    model.eval()
    running_vloss = 0.0
    y_true = []
    y_pred = []
    with torch.no_grad():
        for i, vdata in enumerate(loader_val):
            vinputs, vlabels = vdata
            y_true.extend(vlabels.numpy().tolist())
            voutputs = model(vinputs.to(device))
            vloss = loss_fn(voutputs, vlabels.to(device))
            running_vloss += vloss.item()
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(voutputs.data, 1)
            y_pred.extend(predicted.cpu().numpy().tolist())

    # Calculate the average validation loss
    avg_vloss = running_vloss / (i + 1)
    scheduler.step(avg_vloss)
    # Calculate our classification metrics
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average="weighted")

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalar('train/loss', avg_loss, epoch_number + 1)
    writer.add_scalar('val/loss', avg_vloss, epoch_number + 1)
    writer.add_scalar('val/acc', acc, epoch_number + 1)
    writer.add_scalar('val/weighted_f1', f1, epoch_number + 1)
    writer.flush()


    # Track best performance, and save the model's state (weights)
    if f1 > best_f1:
        best_f1 = f1
        model_path = os.path.join(save_dir, 'best.pt')
        torch.save(model.state_dict(), model_path)
    if avg_vloss < lowest_loss:
        lowest_loss = avg_vloss
        model_path = os.path.join(save_dir, 'lowest_loss.pt')
        torch.save(model.state_dict(), model_path)
    model_path = os.path.join(save_dir, 'last.pt')
    torch.save(model.state_dict(), model_path)

    epoch_number += 1

  0%|          | 0/100 [00:00<?, ?it/s]

## References
[1] https://pytorch.org/tutorials/beginner/introyt/trainingyt.html