#### **Model from Scratch**
---

In [1]:
# imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedShuffleSplit

import string
import random

import os
from PIL import Image
import cv2

import torch
from torch import optim
from torch import nn
from torch.utils.data import Dataset, Subset, DataLoader
import torch.nn.functional as F

from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter

#### **Training Dataset import**
----

In [2]:
class Chars74kDataset(Dataset):
    def __init__(self, root_dir, gen_transforms=None, train_transforms=None, train=True):

        self.root_dir = root_dir
        self.gen_transforms = gen_transforms
        self.train_transforms = train_transforms
        self.train = train

        self.samples = []
        for cls in sorted(os.listdir(root_dir)):
            cls_dir = os.path.join(root_dir, cls)
            if not os.path.isdir(cls_dir):
                continue
            for fname in os.listdir(cls_dir):
                if fname.lower().endswith('.png'):
                    self.samples.append((os.path.join(cls_dir, fname), cls))

        self.classes = sorted({label for img, label in self.samples})
        self.cls2idx = {cls: idx + 1 for idx, cls in enumerate(self.classes)}

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, cls = self.samples[idx]
        img = Image.open(path).convert("L")

        if self.gen_transforms:
            img = self.gen_transforms(img)
        if self.train and self.train_transforms:
            img = self.train_transforms(img)

        label = self.cls2idx[cls]
        return img, label

In [3]:
gen_transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: (x < 0.7).float())
])

train_augment = transforms.Compose([
    transforms.RandomVerticalFlip(0.2),
    transforms.RandomRotation(degrees=10),
    transforms.RandomAffine(
        degrees=0,
        translate=(0.1, 0.1),
        scale=(0.9, 1.1)
    ),
])

#### **Séparation des données**

Pour garantir une évaluation fiable et éviter toute fuite d’information (data leakage) :

* **3 ensembles distincts** :

  * `train` (65 %) : apprentissage des paramètres du modèle.
  * `val` (15 %) : réglage des hyperparamètres et choix de la meilleure version du modèle.
  * `test` (20 %) : mesure finale de performance, à n’utiliser qu’une seule fois après la phase de validation.

In [4]:
full_dataset = Chars74kDataset(root_dir='C:/Users/G-PROGNOS-01/Desktop/Slomiany Baptiste/doc_lisibility/data/Chars74k/EnglishImg', gen_transforms=gen_transform,
                               train_transforms=None, train=False)
all_labels = np.array([full_dataset[i][1] for i in range(len(full_dataset))])

sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
temp_idx, test_idx = next(sss1.split(np.zeros(len(all_labels)), all_labels))

temp_labels = all_labels[temp_idx]
val_size = 0.15 / 0.8
sss2 = StratifiedShuffleSplit(n_splits=1, test_size=val_size, random_state=42)
train_subidx, val_subidx = next(sss2.split(np.zeros(len(temp_labels)), temp_labels))

train_idx = temp_idx[train_subidx]
val_idx   = temp_idx[val_subidx]

In [5]:
train_dataset = Chars74kDataset(root_dir='C:/Users/G-PROGNOS-01/Desktop/Slomiany Baptiste/doc_lisibility/data/Chars74k/EnglishImg', gen_transforms=gen_transform,
                                train_transforms=train_augment, train=True)
train_ds = Subset(train_dataset, train_idx)

val_dataset = Chars74kDataset(root_dir='C:/Users/G-PROGNOS-01/Desktop/Slomiany Baptiste/doc_lisibility/data/Chars74k/EnglishImg', gen_transforms=gen_transform,
                              train_transforms=None, train=False)
val_ds = Subset(val_dataset, val_idx)

test_dataset = Chars74kDataset(root_dir='C:/Users/G-PROGNOS-01/Desktop/Slomiany Baptiste/doc_lisibility/data/Chars74k/EnglishImg', gen_transforms=gen_transform,
                               train_transforms=None, train=False)
test_ds = Subset(test_dataset, test_idx)

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=128, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=128, shuffle=False)

In [6]:
characters = list(string.digits + string.ascii_uppercase + string.ascii_lowercase)
class_names = {i + 1: char for i, char in enumerate(characters)}

#### **Model definition and Training**
---

In [7]:
### Train function
def train_model( model, train_loader, val_loader, num_epochs,
                 criterion, optimizer, writer ):

    for epoch in range(1, num_epochs + 1):
        model.train()
        running_loss, running_corrects = 0.0, 0

        for inputs, labels in train_loader:
            
            optimizer.zero_grad()
            result = model(inputs)
            outputs = result[0] if isinstance(result, tuple) else result ## prise en compte retour activations
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            preds = outputs.argmax(dim=1)
            running_corrects += (preds == labels).sum().item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects / len(train_loader.dataset)

        model.eval()
        val_loss, val_corrects = 0.0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                result = model(inputs)
                outputs = result[0] if isinstance(result, tuple) else result
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                preds = outputs.argmax(dim=1)
                val_corrects += (preds == labels).sum().item()

        val_loss = val_loss / len(val_loader.dataset)
        val_acc = val_corrects / len(val_loader.dataset)

        if epoch % 10 == 0:
            print(f"Epoch {epoch}/{num_epochs} | "
                  f"Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f} | "
                  f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        writer.add_scalar("Loss/Train", epoch_loss, epoch)
        writer.add_scalar("Loss/Val", val_loss, epoch)
        writer.add_scalar("Acc/Train", epoch_acc, epoch)
        writer.add_scalar("Acc/Val", val_acc, epoch)

        for name, param in model.named_parameters():
            writer.add_histogram(name, param.cpu(), epoch)
            if param.grad is not None:
                writer.add_histogram(f"{name}_grad", param.grad.cpu(), epoch)

        images, _ = next(iter(val_loader))
        
        with torch.no_grad():
            result = model(images, log_activations=True)
            outputs, activations = result if isinstance(result, tuple) else (result, {})
        for layer, activation in activations.items():
            if activation.ndim == 4:
                for i in range(activation.size(1)):
                    writer.add_image(
                        f"Activations/{layer}/{i}",
                        activation[0, i].unsqueeze(0).cpu(),
                        epoch)

    writer.close()
    return model

#### **1st Expansion -- Width**
---

In [10]:
class VGGBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels//4, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels//4),
            nn.ReLU(inplace=True),

            nn.Conv2d(out_channels//4, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),

            nn.MaxPool2d(2, 2)
        )
    
    def forward(self, x):
        return self.block(x)
    
class VGGLikeNN1(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes

        self.block1 = VGGBlock(1, 16)
        self.block2 = VGGBlock(16, 64)

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(num_classes)
        )

    def forward(self, x, log_activations=False):
        activations = {}

        x = self.block1(x)
        if log_activations:
            activations['block1_out'] = x

        x = self.block2(x)
        if log_activations:
            activations['block2_out'] = x

        logits = self.classifier(x)
        if log_activations:
            activations['logits'] = logits
            return logits, activations
        
        return logits

In [11]:
### Parametrisation

model1 = VGGLikeNN1(63)
optimizer = optim.Adam(model1.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runsFineTune/vgg_training1")
epochs = 30

model1 = train_model(model1, train_loader, val_loader, epochs, criterion, optimizer, writer)

Epoch 10/30 | Train Loss: 0.6768, Train Acc: 0.7845 | Val Loss: 0.5069, Val Acc: 0.8260
Epoch 20/30 | Train Loss: 0.5895, Train Acc: 0.8020 | Val Loss: 0.4590, Val Acc: 0.8438
Epoch 30/30 | Train Loss: 0.5332, Train Acc: 0.8178 | Val Loss: 0.4051, Val Acc: 0.8588


#### **Model 2**

In [12]:
class VGGLikeNN(nn.Module):
    def __init__(self, channels_param, num_classes):
        super().__init__()
        
        self.features = nn.ModuleList()
        for in_chan, out_chan in channels_param:
            self.features.append(VGGBlock(in_chan, out_chan))

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(num_classes)
        )

    def forward(self, x, log_activations=False):
        activations = {}

        for i, block in enumerate(self.features):
            x = block(x)
            if log_activations:
                activations[f'block{i+1}_out'] = x

        logits = self.classifier(x)
        if log_activations:
            activations['logits'] = logits
            return logits, activations
        
        return logits

In [13]:
### Parametrisation

model1 = VGGLikeNN([(1, 32), (32, 128)], 63)
optimizer = optim.Adam(model1.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runsFineTune/vgg_training2")
epochs = 30

model1 = train_model(model1, train_loader, val_loader, epochs, criterion, optimizer, writer)

Epoch 10/30 | Train Loss: 0.6437, Train Acc: 0.7928 | Val Loss: 0.5073, Val Acc: 0.8341
Epoch 20/30 | Train Loss: 0.5275, Train Acc: 0.8190 | Val Loss: 0.4182, Val Acc: 0.8541
Epoch 30/30 | Train Loss: 0.4773, Train Acc: 0.8313 | Val Loss: 0.3858, Val Acc: 0.8606


#### **Model 3**

In [14]:
### Parametrisation

model = VGGLikeNN([(1, 16), (16, 64), (64, 128)], 63)
optimizer = optim.Adam(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runsFineTune/vgg_training3")
epochs = 30

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)

Epoch 10/30 | Train Loss: 0.5340, Train Acc: 0.8125 | Val Loss: 0.4346, Val Acc: 0.8427
Epoch 20/30 | Train Loss: 0.4489, Train Acc: 0.8333 | Val Loss: 0.3813, Val Acc: 0.8568
Epoch 30/30 | Train Loss: 0.4164, Train Acc: 0.8423 | Val Loss: 0.3679, Val Acc: 0.8622


#### **Model 4**

In [15]:
### Parametrisation

model = VGGLikeNN([(1, 32), (32, 128), (128, 256)], 63)
optimizer = optim.Adam(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runsFineTune/vgg_training4")
epochs = 30

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)

Epoch 10/30 | Train Loss: 0.4928, Train Acc: 0.8233 | Val Loss: 0.4136, Val Acc: 0.8515
Epoch 20/30 | Train Loss: 0.4139, Train Acc: 0.8441 | Val Loss: 0.3499, Val Acc: 0.8695
Epoch 30/30 | Train Loss: 0.3647, Train Acc: 0.8600 | Val Loss: 0.3485, Val Acc: 0.8633


#### **Model 5**

In [16]:
### Parametrisation

model = VGGLikeNN([(1, 32), (32, 256), (256, 1024)], 63)
optimizer = optim.Adam(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runsFineTune/vgg_training5")
epochs = 30

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)

Epoch 10/30 | Train Loss: 0.4827, Train Acc: 0.8250 | Val Loss: 0.4017, Val Acc: 0.8522
Epoch 20/30 | Train Loss: 0.3950, Train Acc: 0.8514 | Val Loss: 0.3779, Val Acc: 0.8617
Epoch 30/30 | Train Loss: 0.3471, Train Acc: 0.8649 | Val Loss: 0.3288, Val Acc: 0.8771


#### **Model 6**

In [17]:
### Parametrisation

model = VGGLikeNN([(1, 32), (32, 128), (128, 256), (256, 512)], 63)
optimizer = optim.Adam(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runsFineTune/vgg_training6")
epochs = 30

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)

Epoch 10/30 | Train Loss: 0.4580, Train Acc: 0.8277 | Val Loss: 0.3927, Val Acc: 0.8489
Epoch 20/30 | Train Loss: 0.3773, Train Acc: 0.8521 | Val Loss: 0.3416, Val Acc: 0.8677
Epoch 30/30 | Train Loss: 0.3303, Train Acc: 0.8681 | Val Loss: 0.3173, Val Acc: 0.8801


#### **Model 7**

In [18]:
### Parametrisation

model = VGGLikeNN([(1, 32), (32, 256), (256, 512), (512, 1024)], 63)
optimizer = optim.Adam(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter("runsFineTune/vgg_training7")
epochs = 30

model = train_model(model, train_loader, val_loader, epochs, criterion, optimizer, writer)

Epoch 10/30 | Train Loss: 0.4537, Train Acc: 0.8309 | Val Loss: 0.4060, Val Acc: 0.8429
Epoch 20/30 | Train Loss: 0.3709, Train Acc: 0.8542 | Val Loss: 0.3353, Val Acc: 0.8714
Epoch 30/30 | Train Loss: 0.3244, Train Acc: 0.8710 | Val Loss: 0.3067, Val Acc: 0.8814


#### **Performance evaluation**
---

In [19]:
torch.save(model.state_dict(), "fine_tuned_vgg.pth")

In [None]:
model_ = VGGLikeNN([(1, 32), (32, 256), (256, 512), (512, 1024)], 63)
model_.load_state_dict(torch.load("research/models/fine_tuned_vgg.pth"))

<All keys matched successfully>

In [24]:
def evaluate_model(model, data_loader, criterion):
    model.eval()
    test_loss, corr, total = 0.0, 0, 0

    with torch.no_grad():
        for inputs, labels in data_loader:

            result = model(inputs)
            outputs = result[0] if isinstance(result, tuple) else result

            loss = criterion(outputs, labels)
            test_loss += loss.item() * inputs.size(0)

            preds = outputs.argmax(dim=1)
            corrects = (preds == labels)
            corr += corrects.sum().item()
            total += inputs.size(0)

    avg_loss = test_loss / total
    accuracy = corr / total

    msg = f"Loss: {avg_loss:.4f}, Acc: {accuracy:.4f}"
    print(msg)

In [25]:
criterion = nn.CrossEntropyLoss()
evaluate_model(model_, test_loader, criterion)

Loss: 0.3294, Acc: 0.8744
