#### **Recurrent Network Testing Notebook**
---

In [1]:
# imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedShuffleSplit

import string
import random

import os
from PIL import Image
import cv2

import torch
from torch import optim
from torch import nn
from torch.utils.data import Dataset, Subset, DataLoader
import torch.nn.functional as F

from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter

#### **Training Dataset import**
----

In [2]:
class Chars74kDataset(Dataset):
    def __init__(self, root_dir, gen_transforms=None, train_transforms=None, train=True):

        self.root_dir = root_dir
        self.gen_transforms = gen_transforms
        self.train_transforms = train_transforms
        self.train = train

        self.samples = []
        for cls in sorted(os.listdir(root_dir)):
            cls_dir = os.path.join(root_dir, cls)
            if not os.path.isdir(cls_dir):
                continue
            for fname in os.listdir(cls_dir):
                if fname.lower().endswith('.png'):
                    self.samples.append((os.path.join(cls_dir, fname), cls))

        self.classes = sorted({label for img, label in self.samples})
        self.cls2idx = {cls: idx + 1 for idx, cls in enumerate(self.classes)}

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, cls = self.samples[idx]
        img = Image.open(path).convert("L")

        if self.gen_transforms:
            img = self.gen_transforms(img)
        if self.train and self.train_transforms:
            img = self.train_transforms(img)

        label = self.cls2idx[cls]
        return img, label

In [3]:
gen_transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: (x < 0.7).float())
])

train_augment = transforms.Compose([
    transforms.RandomVerticalFlip(0.2),
    transforms.RandomRotation(degrees=10),
    transforms.RandomAffine(
        degrees=0,
        translate=(0.1, 0.1),
        scale=(0.9, 1.1)
    ),
])

#### **Séparation des données**

Pour garantir une évaluation fiable et éviter toute fuite d’information (data leakage) :

* **3 ensembles distincts** :

  * `train` (65 %) : apprentissage des paramètres du modèle.
  * `val` (15 %) : réglage des hyperparamètres et choix de la meilleure version du modèle.
  * `test` (20 %) : mesure finale de performance, à n’utiliser qu’une seule fois après la phase de validation.

In [4]:
full_dataset = Chars74kDataset(root_dir='C:/Users/G-PROGNOS-01/Desktop/Slomiany Baptiste/doc_lisibility/data/Chars74k/EnglishImg', gen_transforms=gen_transform,
                               train_transforms=None, train=False)
all_labels = np.array([full_dataset[i][1] for i in range(len(full_dataset))])

sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
temp_idx, test_idx = next(sss1.split(np.zeros(len(all_labels)), all_labels))

temp_labels = all_labels[temp_idx]
val_size = 0.15 / 0.8
sss2 = StratifiedShuffleSplit(n_splits=1, test_size=val_size, random_state=42)
train_subidx, val_subidx = next(sss2.split(np.zeros(len(temp_labels)), temp_labels))

train_idx = temp_idx[train_subidx]
val_idx   = temp_idx[val_subidx]

In [5]:
train_dataset = Chars74kDataset(root_dir='C:/Users/G-PROGNOS-01/Desktop/Slomiany Baptiste/doc_lisibility/data/Chars74k/EnglishImg', gen_transforms=gen_transform,
                                train_transforms=train_augment, train=True)
train_ds = Subset(train_dataset, train_idx)

val_dataset = Chars74kDataset(root_dir='C:/Users/G-PROGNOS-01/Desktop/Slomiany Baptiste/doc_lisibility/data/Chars74k/EnglishImg', gen_transforms=gen_transform,
                              train_transforms=None, train=False)
val_ds = Subset(val_dataset, val_idx)

test_dataset = Chars74kDataset(root_dir='C:/Users/G-PROGNOS-01/Desktop/Slomiany Baptiste/doc_lisibility/data/Chars74k/EnglishImg', gen_transforms=gen_transform,
                               train_transforms=None, train=False)
test_ds = Subset(test_dataset, test_idx)

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=128, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=128, shuffle=False)

In [6]:
characters = list(string.digits + string.ascii_uppercase + string.ascii_lowercase)
class_names = {i + 1: char for i, char in enumerate(characters)}

#### **Model definition and Training**
---

In [8]:
### Train function
def train_model( model, train_loader, val_loader, num_epochs,
                 criterion, optimizer, writer ):

    for epoch in range(1, num_epochs + 1):
        model.train()
        running_loss, running_corrects = 0.0, 0

        for inputs, labels in train_loader:
            
            optimizer.zero_grad()
            result = model(inputs)
            outputs = result
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            preds = outputs.argmax(dim=1)
            running_corrects += (preds == labels).sum().item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects / len(train_loader.dataset)

        model.eval()
        val_loss, val_corrects = 0.0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                result = model(inputs)
                outputs = result
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                preds = outputs.argmax(dim=1)
                val_corrects += (preds == labels).sum().item()

        val_loss = val_loss / len(val_loader.dataset)
        val_acc = val_corrects / len(val_loader.dataset)

        if epoch % 1 == 0:
            print(f"Epoch {epoch}/{num_epochs} | "
                  f"Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f} | "
                  f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        writer.add_scalar("Loss/Train", epoch_loss, epoch)
        writer.add_scalar("Loss/Val", val_loss, epoch)
        writer.add_scalar("Acc/Train", epoch_acc, epoch)
        writer.add_scalar("Acc/Val", val_acc, epoch)

        for name, param in model.named_parameters():
            writer.add_histogram(name, param.cpu(), epoch)
            if param.grad is not None:
                writer.add_histogram(f"{name}_grad", param.grad.cpu(), epoch)

    writer.close()
    return model

#### **SimpleRNN**

In [None]:
class RNNClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, cls_dim):
        super().__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, 2, bidirectional=True, batch_first=True)
        self.out = nn.Linear(hidden_dim * 4, cls_dim)

    def forward(self, x):
        output_sequence, final_hidden_state = self.rnn(x.squeeze(1))
        y = final_hidden_state.permute(1, 0, 2).flatten(start_dim=1)

        return self.out(y)

In [None]:
### Parametrisation

model = RNNClassifier(28, 128, 63)
optimizer = optim.AdamW(model.parameters(), lr=1e-2, weight_decay=1e-2)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runs/rnn_squencerTrain0")
epochs = 5

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)
torch.save(model.state_dict(), "rnn_model0.pth")

Epoch 1/5 | Train Loss: 3.7573, Train Acc: 0.0928 | Val Loss: 3.0211, Val Acc: 0.2017
Epoch 2/5 | Train Loss: 3.4831, Train Acc: 0.1523 | Val Loss: 2.5885, Val Acc: 0.3148
Epoch 3/5 | Train Loss: 2.9216, Train Acc: 0.2529 | Val Loss: 2.2016, Val Acc: 0.3977
Epoch 4/5 | Train Loss: 3.1068, Train Acc: 0.2250 | Val Loss: 2.4828, Val Acc: 0.3048
Epoch 5/5 | Train Loss: 3.2655, Train Acc: 0.1950 | Val Loss: 2.7834, Val Acc: 0.2487


#### **GRUModel**

In [23]:
class GRUClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, cls_dim):
        super().__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, 2, bidirectional=True, batch_first=True)
        self.out = nn.Linear(hidden_dim * 4, cls_dim)

    def forward(self, x):
        output_sequence, final_hidden_state = self.gru(x.squeeze(1))
        y = final_hidden_state.permute(1, 0, 2).flatten(start_dim=1)

        return self.out(y)

In [None]:
### Parametrisation

model = GRUClassifier(28, 128, 63)
optimizer = optim.AdamW(model.parameters(), lr=1e-2, weight_decay=1e-2)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runs/gru_squencerTrain0")
epochs = 5

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)
torch.save(model.state_dict(), "gru_model0.pth")

Epoch 1/5 | Train Loss: 1.7451, Train Acc: 0.5126 | Val Loss: 0.8008, Val Acc: 0.7635
Epoch 2/5 | Train Loss: 0.9568, Train Acc: 0.7082 | Val Loss: 0.6637, Val Acc: 0.7893
Epoch 3/5 | Train Loss: 0.8388, Train Acc: 0.7323 | Val Loss: 0.6366, Val Acc: 0.8039
Epoch 4/5 | Train Loss: 0.7780, Train Acc: 0.7498 | Val Loss: 0.6028, Val Acc: 0.8034
Epoch 5/5 | Train Loss: 0.7462, Train Acc: 0.7571 | Val Loss: 0.6089, Val Acc: 0.7997


#### **LSTM**

In [None]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, cls_dim):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=1, 
                            bidirectional=True, batch_first=True)

        self.out = nn.Linear(hidden_dim * 2, cls_dim)


    def forward(self, x):
        output_sequence, (h_n, c_n) = self.lstm(x.squeeze(1))
        final_layer_h_n = h_n[-2:, :, :]
            
        y = final_layer_h_n.permute(1, 0, 2).flatten(start_dim=1)
        return self.out(y)

In [None]:
### Parametrisation

model = LSTMClassifier(28, 128, 63)
optimizer = optim.AdamW(model.parameters(), lr=1e-2, weight_decay=1e-2)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runs/lstm_squencerTrain0")
epochs = 5

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)
torch.save(model.state_dict(), "lstm_model0.pth")

Epoch 1/5 | Train Loss: 2.2263, Train Acc: 0.3898 | Val Loss: 1.0229, Val Acc: 0.7010
Epoch 2/5 | Train Loss: 1.1644, Train Acc: 0.6471 | Val Loss: 0.7784, Val Acc: 0.7544
Epoch 3/5 | Train Loss: 0.9424, Train Acc: 0.7081 | Val Loss: 0.6541, Val Acc: 0.7952
Epoch 4/5 | Train Loss: 0.8399, Train Acc: 0.7324 | Val Loss: 0.6336, Val Acc: 0.7950
Epoch 5/5 | Train Loss: 0.7734, Train Acc: 0.7503 | Val Loss: 0.5804, Val Acc: 0.8120


#### **LSTM + Attention**
---

In [29]:
class BiLSTMAttnClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, cls_dim):
        super().__init__()
        self.lstm_cell = nn.LSTM(input_dim, hidden_dim, batch_first=True, bidirectional=True)
        
        self.attn = nn.Parameter(torch.randn(hidden_dim * 2))
        self._out = nn.Linear(hidden_dim * 2, cls_dim)

    def forward(self, inputs):
        outputs, (hn, c_n) = self.lstm_cell(inputs.squeeze(1))

        attn_scores = torch.matmul(outputs, self.attn)
        attn_weights = torch.softmax(attn_scores, dim=1).unsqueeze(-1)

        context = torch.sum(attn_weights * outputs, dim = 1)

        logits = self._out(context)
        return logits

In [None]:
### Parametrisation

model = BiLSTMAttnClassifier(28, 128, 63)
optimizer = optim.AdamW(model.parameters(), lr=1e-2, weight_decay=1e-2)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runs/lstmAttn_squencerTrain0")
epochs = 5

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)
torch.save(model.state_dict(), "lstmAttn_model0.pth")

Epoch 1/5 | Train Loss: 2.1074, Train Acc: 0.4292 | Val Loss: 1.0081, Val Acc: 0.7093
Epoch 2/5 | Train Loss: 1.1536, Train Acc: 0.6595 | Val Loss: 0.7764, Val Acc: 0.7567
Epoch 3/5 | Train Loss: 0.9630, Train Acc: 0.7092 | Val Loss: 0.6840, Val Acc: 0.7845
Epoch 4/5 | Train Loss: 0.8561, Train Acc: 0.7352 | Val Loss: 0.6153, Val Acc: 0.8051
Epoch 5/5 | Train Loss: 0.8020, Train Acc: 0.7484 | Val Loss: 0.5863, Val Acc: 0.8080


#### **HybridCNNRecurrent**

In [31]:
class VGGBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels//4, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels//4),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels//4, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )
    
    def forward(self, x):
        return self.block(x)
    
class HybridNet(nn.Module):
    def __init__(self, hidden_dim, cls_dim):
        super().__init__()
        self.bloc1 = VGGBlock(1, 16)
        self.bloc2 = VGGBlock(16, 64)

        self.lstm = nn.LSTM(64, hidden_dim, 1, batch_first=True, bidirectional=True)

        self.attn = nn.Parameter(torch.randn(hidden_dim*2))
        self._out = nn.Linear(hidden_dim * 2, cls_dim)

    def forward(self, x): # input = (batch, 1, 28, 28)
        x = self.bloc2(self.bloc1(x)) # (batch_size, channels = 64, height = 7, width = 7)
        x = torch.flatten(x, 2).permute(0, 2, 1) # (batch, 49 = seq_length, channels)
        outputs, (h_n, c_n) = self.lstm(x)

        attn_scores = torch.matmul(outputs, self.attn)
        attn_weights = torch.softmax(attn_scores, dim=1).unsqueeze(-1)

        context = torch.sum(attn_weights * outputs, dim = 1)

        logits = self._out(context)
        return logits

In [None]:
### Parametrisation

model = HybridNet(128, 63)
optimizer = optim.AdamW(model.parameters(), lr=1e-2, weight_decay=1e-2)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runs/HybridNetTrain0")
epochs = 5

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)
torch.save(model.state_dict(), "HybridNet0.pth")

Epoch 1/5 | Train Loss: 2.5633, Train Acc: 0.2871 | Val Loss: 1.4279, Val Acc: 0.5615
Epoch 2/5 | Train Loss: 1.2875, Train Acc: 0.6070 | Val Loss: 0.8332, Val Acc: 0.7440
Epoch 3/5 | Train Loss: 0.9488, Train Acc: 0.7005 | Val Loss: 0.7341, Val Acc: 0.7565
Epoch 4/5 | Train Loss: 0.8241, Train Acc: 0.7337 | Val Loss: 0.6359, Val Acc: 0.7934
Epoch 5/5 | Train Loss: 0.7565, Train Acc: 0.7493 | Val Loss: 0.5801, Val Acc: 0.8063


#### **VisionEncoder**

In [36]:
class SimpleViT(nn.Module):
    def __init__(self, img_size=28, patch_size=4, num_classes=63, dim=8, depth=2, heads=2):
        super().__init__()

        num_patches = (img_size // patch_size) ** 2
        patch_dim = patch_size * patch_size

        self.patch_embed = nn.Linear(patch_dim, dim)

        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.att_token = nn.Parameter(torch.randn(1, 1, dim))

        encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=64, dropout=0.1, activation='gelu', batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_classes)
        )

        self.patch_size = patch_size
        self.dim = dim

    def forward(self, x):
        batch, canal, height, width = x.shape
        p = self.patch_size

        x = x.unfold(2, p, p).unfold(3, p, p)
        x = x.contiguous().view(batch, canal, -1, p, p)
        x = x.permute(0, 2, 1, 3, 4).flatten(2)

        x = self.patch_embed(x)

        att_tokens = self.att_token.expand(batch, -1, -1)
        x = torch.cat((att_tokens, x), dim=1)

        x = x + self.pos_embedding[:, :x.size(1), :]

        x = self.transformer(x)

        cls_output = x[:, 0]
        out = self.mlp_head(cls_output)

        return out

In [None]:
### Parametrisation

transformer = SimpleViT(63)
optimizer = optim.AdamW(transformer.parameters(), lr=1e-2, weight_decay=1e-2)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runs/transformer_fromscratch_train0_")
epochs = 5

transformer = train_model(transformer, train_loader, val_loader, epochs, criterion, optimizer, writer)
torch.save(transformer.state_dict(), "transformer0.pth")

Epoch 1/5 | Train Loss: 3.8410, Train Acc: 0.0438 | Val Loss: 3.3424, Val Acc: 0.1115
Epoch 2/5 | Train Loss: 3.3893, Train Acc: 0.1043 | Val Loss: 2.9069, Val Acc: 0.1882
Epoch 3/5 | Train Loss: 3.1241, Train Acc: 0.1508 | Val Loss: 2.5723, Val Acc: 0.2679
Epoch 4/5 | Train Loss: 2.9850, Train Acc: 0.1744 | Val Loss: 2.5649, Val Acc: 0.2636
Epoch 5/5 | Train Loss: 2.9212, Train Acc: 0.1866 | Val Loss: 2.3329, Val Acc: 0.3298
