In [None]:
# ================================================================
# Offline Handwritten Text OCR — Fixed Seq2Seq with Attention (PyTorch Lightning)
# - Preprocess: drop NaNs / 'unreadable', crop top-left to 64x256, pad white
# - Encoder: CNN -> width sequence (+ context via BiLSTM inside EncoderCNN)
# - Decoder: GRU + Bahdanau attention, image-conditioned h0
# - Vocab: <pad>=0, <sos>=1, <eos>=2, then dataset chars
# - Training tweaks to avoid mode collapse:
#     * image-conditioned decoder init (from encoder mean)
#     * scheduled teacher forcing (0.7 * 0.97^epoch, floor 0.1)
#     * label smoothing = 0.1
#     * input normalization (mean=0.5, std=0.5)
# - Metrics: CER, 1-CER, ACC, WER
# - Saves loss plot: loss_curve_seq2seq_fixed.png
# - Checkpoint: seq2seq_fixed.ckpt
# ================================================================

import os
import math
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

import pytorch_lightning as pl

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

# ---------------------------
# Metrics
# ---------------------------

def levenshtein_distance(s1: str, s2: str) -> int:
    if len(s1) < len(s2):
        s1, s2 = s2, s1
    previous = list(range(len(s2) + 1))
    for i, c1 in enumerate(s1):
        current = [i + 1]
        for j, c2 in enumerate(s2):
            ins = previous[j + 1] + 1
            dele = current[j] + 1
            sub = previous[j] + (c1 != c2)
            current.append(min(ins, dele, sub))
        previous = current
    return previous[-1]

def compute_metrics(preds, truths):
    total_chars, total_char_errs = 0, 0
    total_words, total_word_errs = 0, 0
    exact = 0
    for gt, pr in zip(truths, preds):
        dist = levenshtein_distance(gt, pr)
        total_char_errs += dist
        total_chars += len(gt)
        total_words += 1
        total_word_errs += int(gt != pr)
        if gt == pr:
            exact += 1
    cer = (total_char_errs / total_chars) if total_chars > 0 else 0.0
    one_minus_cer = 1.0 - cer
    acc = (exact / total_words) if total_words > 0 else 0.0
    wer = (total_word_errs / total_words) if total_words > 0 else 0.0
    return cer, one_minus_cer, acc, wer

# ---------------------------
# Dataset (keeps your preprocessing)
# ---------------------------

class HandwritingDataset(Dataset):
    """
    - CSV must have 'FILENAME', 'IDENTITY'
    - Remove NaNs and label == 'unreadable'
    - Convert to grayscale, crop top-left to 64x256, pad white
    - Returns: image tensor [1, 64, 256], raw text string, filename
    """
    def __init__(self, csv_path, images_dir, transform=None,
                 crop_h=64, crop_w=256, char2idx=None):
        df = pd.read_csv(csv_path)
        if "IDENTITY" not in df.columns or "FILENAME" not in df.columns:
            raise ValueError(f"CSV {csv_path} must have columns 'FILENAME' and 'IDENTITY'")
        df = df.dropna(subset=["FILENAME", "IDENTITY"]).copy()
        df["IDENTITY"] = df["IDENTITY"].astype(str)
        df = df[df["IDENTITY"].str.strip().str.lower() != "unreadable"].reset_index(drop=True)

        self.df = df
        self.images_dir = images_dir
        self.transform = transform or transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5], std=[0.5]),
        ])
        self.crop_h = crop_h
        self.crop_w = crop_w

        # Vocab: <pad>=0, <sos>=1, <eos>=2, then your chars
        if char2idx is None:
            chars = sorted(list({c for text in self.df["IDENTITY"] for c in text}))
            self.char2idx = {"<pad>":0, "<sos>":1, "<eos>":2}
            for i, c in enumerate(chars, start=3):
                self.char2idx[c] = i
        else:
            self.char2idx = char2idx
        self.idx2char = {i:c for c,i in self.char2idx.items()}

    def __len__(self):
        return len(self.df)

    def _crop_pad_top_left(self, img: Image.Image) -> Image.Image:
        arr = np.array(img.convert("L"), dtype=np.uint8)
        h, w = arr.shape[:2]
        crop = arr[:min(h, self.crop_h), :min(w, self.crop_w)]
        out = np.ones((self.crop_h, self.crop_w), dtype=np.uint8) * 255
        out[:crop.shape[0], :crop.shape[1]] = crop
        return Image.fromarray(out, mode="L")

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.images_dir, row["FILENAME"])
        img = Image.open(img_path).convert("L")
        img = self._crop_pad_top_left(img)
        img_t = self.transform(img)  # [1,64,256]
        text = row["IDENTITY"]
        return img_t, text, row["FILENAME"]

    def encode_text(self, text):
        return [self.char2idx["<sos>"]] + [self.char2idx[c] for c in text if c in self.char2idx] + [self.char2idx["<eos>"]]

    def collate_fn(self, batch):
        imgs, texts, filenames = zip(*batch)
        imgs = torch.stack(imgs, dim=0)  # [B,1,64,256]

        seqs = [self.encode_text(t) for t in texts]
        max_len = max(len(s) for s in seqs)
        pad_idx = self.char2idx["<pad>"]

        dec_in, dec_tg, lengths = [], [], []
        for s in seqs:
            inp = s[:-1]  # includes <sos> ... last char
            tgt = s[1:]   # ... up to <eos>
            lengths.append(len(tgt))
            dec_in.append(inp + [pad_idx]*(max_len-1-len(inp)))
            dec_tg.append(tgt + [pad_idx]*(max_len-1-len(tgt)))

        dec_in = torch.tensor(dec_in, dtype=torch.long)   # [B,L]
        dec_tg = torch.tensor(dec_tg, dtype=torch.long)   # [B,L]
        lengths = torch.tensor(lengths, dtype=torch.long) # [B]
        return imgs, dec_in, dec_tg, lengths, filenames, texts

# ---------------------------
# Seq2Seq model (Encoder CNN + Bahdanau Attention GRU Decoder)
# ---------------------------

class EncoderCNN(nn.Module):
    """
    CNN downsampling H and W by 4, then optional BiLSTM on width sequence.
    Input:  [B,1,64,256]
    Output: enc_out [B, T, D] where T = 256/4 = 64
    """
    def __init__(self, img_height=64, d_model=256, rnn=True):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, 1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2,2),  # H/2, W/2
            nn.Conv2d(64, 128, 3, 1, 1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2,2),  # H/4, W/4
        )
        self.img_height = img_height
        feat_dim = 128 * (img_height // 4)  # 128 * 16 = 2048
        self.use_rnn = rnn
        if rnn:
            self.rnn = nn.LSTM(input_size=feat_dim, hidden_size=d_model//2,
                               num_layers=1, bidirectional=True, batch_first=True)
            self.proj = nn.Identity()
            self.out_dim = d_model
        else:
            self.rnn = None
            self.proj = nn.Linear(feat_dim, d_model)
            self.out_dim = d_model

    def forward(self, x):
        f = self.cnn(x)           # [B,128,H',W']
        f = f.permute(0,3,1,2)    # [B,W',128,H']
        B, T, C, Hp = f.shape
        f = f.contiguous().view(B, T, C*Hp)  # [B,T,feat_dim]
        if self.use_rnn:
            out, _ = self.rnn(f)  # [B,T,d_model]
        else:
            out = self.proj(f)    # [B,T,d_model]
        return out  # encoder sequence

class BahdanauAttention(nn.Module):
    def __init__(self, d_enc, d_dec, d_attn=256):
        super().__init__()
        self.W_enc = nn.Linear(d_enc, d_attn, bias=False)
        self.W_dec = nn.Linear(d_dec, d_attn, bias=False)
        self.v = nn.Linear(d_attn, 1, bias=False)

    def forward(self, enc_out, dec_state):
        # enc_out: [B,T,De], dec_state: [B,Dd]
        scores = self.v(torch.tanh(self.W_enc(enc_out) + self.W_dec(dec_state).unsqueeze(1)))  # [B,T,1]
        attn = torch.softmax(scores.squeeze(-1), dim=-1)  # [B,T]
        ctx = torch.bmm(attn.unsqueeze(1), enc_out).squeeze(1)  # [B,De]
        return ctx, attn

class DecoderRNN(nn.Module):
    def __init__(self, vocab_size, d_enc=256, d_dec=256, attn_dim=256, emb_dim=128, pad_idx=0):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, emb_dim, padding_idx=pad_idx)
        self.rnn = nn.GRU(emb_dim + d_enc, d_dec, batch_first=True)
        self.attn = BahdanauAttention(d_enc, d_dec, attn_dim)
        self.out = nn.Linear(d_dec, vocab_size)

    def forward_step(self, y_prev, h, enc_out):
        # y_prev: [B] token ids, h: [1,B,Dd], enc_out:[B,T,De]
        emb = self.emb(y_prev).unsqueeze(1)        # [B,1,E]
        ctx, attn = self.attn(enc_out, h.squeeze(0))  # ctx:[B,De]
        ctx = ctx.unsqueeze(1)                     # [B,1,De]
        rnn_in = torch.cat([emb, ctx], dim=-1)     # [B,1,E+De]
        out, h_next = self.rnn(rnn_in, h)          # out:[B,1,Dd]
        logits = self.out(out.squeeze(1))          # [B,V]
        return logits, h_next, attn

# ---------------------------
# Lightning Module with fixes
# ---------------------------

class Seq2SeqOCR(pl.LightningModule):
    def __init__(self, dataset: HandwritingDataset, d_model=256, lr=1e-3,
                 teacher_forcing_start=0.7, teacher_forcing_floor=0.1, tf_decay=0.97,
                 max_decode_len=40):
        super().__init__()
        self.save_hyperparameters(ignore=['dataset'])
        self.pad_idx = dataset.char2idx["<pad>"]
        self.sos_idx = dataset.char2idx["<sos>"]
        self.eos_idx = dataset.char2idx["<eos>"]
        self.vocab_size = len(dataset.char2idx)
        self.idx2char = dataset.idx2char

        self.encoder = EncoderCNN(img_height=64, d_model=d_model, rnn=True)
        self.decoder = DecoderRNN(self.vocab_size, d_enc=d_model, d_dec=d_model, pad_idx=self.pad_idx)

        # NEW: image-conditioned initial hidden state
        self.init_h = nn.Linear(d_model, d_model)

        # Label smoothing helps reduce overconfidence / collapse
        self.criterion = nn.CrossEntropyLoss(ignore_index=self.pad_idx, label_smoothing=0.1)

        # logging buffers
        self.train_epoch_losses, self.val_epoch_losses = [], []
        self._train_buf, self._val_buf = [], []

        self.val_preds, self.val_truths = [], []

    def current_teacher_forcing(self):
        # Scheduled TF: start high, decay each epoch, floor at 0.1
        tf = self.hparams.teacher_forcing_start * (self.hparams.tf_decay ** max(0, self.current_epoch))
        return max(self.hparams.teacher_forcing_floor, float(tf))

    def forward(self, imgs, y_inp, teacher_forcing=0.5):
        enc_out = self.encoder(imgs)              # [B,T,D]
        # image-conditioned h0
        g = enc_out.mean(dim=1)                   # [B,D]
        h0 = torch.tanh(self.init_h(g)).unsqueeze(0)  # [1,B,D]
        logits = self.decoder_with_h0(enc_out, y_inp, h0, teacher_forcing)  # [B,L,V]
        return logits

    def decoder_with_h0(self, enc_out, y_inp, h0, teacher_forcing=0.5):
        B, L = y_inp.shape
        device = y_inp.device
        h = h0
        logits_list = []
        y_t = y_inp[:,0]  # <sos>
        for t in range(1, L+1):
            logit, h, _ = self.decoder.forward_step(y_t, h, enc_out)
            logits_list.append(logit.unsqueeze(1))
            if t < L:
                if torch.rand(1).item() < teacher_forcing:
                    y_t = y_inp[:,t]
                else:
                    y_t = logit.argmax(dim=-1)
        logits = torch.cat(logits_list, dim=1)
        return logits

    def training_step(self, batch, batch_idx):
        imgs, dec_in, dec_tg, lengths, _, _ = batch
        tf = self.current_teacher_forcing()
        logits = self(imgs, dec_in, teacher_forcing=tf)
        loss = self.criterion(logits.reshape(-1, logits.size(-1)), dec_tg.reshape(-1))
        self.log("train_loss", loss, prog_bar=True, on_epoch=True, batch_size=imgs.size(0))
        self.log("teacher_forcing", tf, prog_bar=True, on_step=False, on_epoch=True)
        self._train_buf.append(loss.detach().cpu().item())
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, dec_in, dec_tg, lengths, filenames, raw_texts = batch
        logits = self(imgs, dec_in, teacher_forcing=0.0)  # eval w/o TF
        loss = self.criterion(logits.reshape(-1, logits.size(-1)), dec_tg.reshape(-1))
        self.log("val_loss", loss, prog_bar=True, on_epoch=True, batch_size=imgs.size(0))
        self._val_buf.append(loss.detach().cpu().item())

        # Greedy decode for metrics
        preds = self.greedy_decode(imgs, max_len=min(self.hparams.max_decode_len, dec_in.size(1)+5))
        self.val_preds.extend(preds)
        self.val_truths.extend(list(raw_texts))

        if batch_idx == 0:
            for i in range(min(3, len(preds))):
                self.print(f"VAL SAMPLE {filenames[i]} → pred: {preds[i]} | gt: {raw_texts[i]}")

    def on_train_epoch_end(self):
        if self._train_buf:
            self.train_epoch_losses.append(float(np.mean(self._train_buf)))
            self._train_buf = []

    def on_validation_epoch_end(self):
        if self._val_buf:
            self.val_epoch_losses.append(float(np.mean(self._val_buf)))
            self._val_buf = []
        if self.val_preds:
            cer, one_minus_cer, acc, wer = compute_metrics(self.val_preds, self.val_truths)
            self.log('val_CER', cer, prog_bar=True)
            self.log('val_1_minus_CER', one_minus_cer, prog_bar=True)
            self.log('val_ACC', acc, prog_bar=True)
            self.log('val_WER', wer, prog_bar=True)
            self.val_preds, self.val_truths = [], []

    def on_fit_end(self):
        if self.train_epoch_losses:
            plt.figure(figsize=(6,4))
            plt.plot(range(1, len(self.train_epoch_losses)+1), self.train_epoch_losses, label="Train Loss")
            if self.val_epoch_losses:
                plt.plot(range(1, len(self.val_epoch_losses)+1), self.val_epoch_losses, label="Val Loss")
            plt.xlabel("Epoch"); plt.ylabel("CE Loss"); plt.title("Seq2Seq (fixed) Loss")
            plt.legend(); plt.tight_layout()
            plt.savefig("loss_curve_seq2seq_fixed.png")
            self.print("[Saved] loss_curve_seq2seq_fixed.png")

    @torch.no_grad()
    def greedy_decode(self, imgs, max_len=40):
        self.eval()
        device = imgs.device
        enc_out = self.encoder(imgs)  # [B,T,D]
        g = enc_out.mean(dim=1)
        h = torch.tanh(self.init_h(g)).unsqueeze(0)     # [1,B,D]
        B = imgs.size(0)
        y_t = torch.full((B,), self.sos_idx, dtype=torch.long, device=device)
        finished = torch.zeros(B, dtype=torch.bool, device=device)
        out = [""] * B

        for _ in range(max_len):
            logit, h, _ = self.decoder.forward_step(y_t, h, enc_out)
            next_tok = logit.argmax(dim=-1)  # [B]
            for i in range(B):
                if finished[i]:
                    continue
                tok = next_tok[i].item()
                if tok == self.eos_idx:
                    finished[i] = True
                elif tok not in (self.pad_idx, self.sos_idx):
                    out[i] += self.idx2char.get(tok, "")
            y_t = next_tok
            if finished.all(): break
        return out

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.hparams.lr)

# ---------------------------
# Training / Testing harness
# ---------------------------

def make_loaders():
    train_ds = HandwritingDataset(
        "/kaggle/input/handwriting-recognition/written_name_train_v2.csv",
        "/kaggle/input/handwriting-recognition/train_v2/train",
        crop_h=64, crop_w=256
    )
    val_ds = HandwritingDataset(
        "/kaggle/input/handwriting-recognition/written_name_validation_v2.csv",
        "/kaggle/input/handwriting-recognition/validation_v2/validation",
        crop_h=64, crop_w=256,
        char2idx=train_ds.char2idx  # keep same vocab
    )
    test_ds = HandwritingDataset(
        "/kaggle/input/handwriting-recognition/written_name_test_v2.csv",
        "/kaggle/input/handwriting-recognition/test_v2/test",
        crop_h=64, crop_w=256,
        char2idx=train_ds.char2idx
    )

    train_loader = DataLoader(train_ds, batch_size=128, shuffle=True, num_workers=3,
                              pin_memory=True, collate_fn=train_ds.collate_fn)
    val_loader = DataLoader(val_ds, batch_size=128, shuffle=False, num_workers=3,
                            pin_memory=True, collate_fn=val_ds.collate_fn)
    test_loader = DataLoader(test_ds, batch_size=128, shuffle=False, num_workers=3,
                             pin_memory=True, collate_fn=test_ds.collate_fn)
    return train_ds, val_ds, test_ds, train_loader, val_loader, test_loader

def train_seq2seq_fixed():
    train_ds, val_ds, test_ds, train_loader, val_loader, test_loader = make_loaders()
    model = Seq2SeqOCR(
        train_ds,
        d_model=256,
        lr=1e-3,
        teacher_forcing_start=0.7,
        teacher_forcing_floor=0.1,
        tf_decay=0.97,
        max_decode_len=40
    )
    accelerator = "gpu" if torch.cuda.is_available() else "cpu"
    trainer = pl.Trainer(max_epochs=40, accelerator=accelerator, devices=1, log_every_n_steps=20)
    trainer.fit(model, train_loader, val_loader)
    trainer.save_checkpoint("seq2seq_fixed.ckpt")
    return model, test_loader, test_ds

@torch.no_grad()
def test_seq2seq(model, test_loader, idx2char):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    all_preds, all_truths = [], []
    for imgs, dec_in, dec_tg, lengths, filenames, raw_texts in tqdm(test_loader, desc="Testing (seq2seq fixed)"):
        imgs = imgs.to(device)
        preds = model.greedy_decode(imgs, max_len=dec_in.size(1)+5)
        all_preds.extend(preds)
        all_truths.extend(list(raw_texts))

    for i in range(min(5, len(all_preds))):
        print(f"GT: {all_truths[i]} | PRED: {all_preds[i]}")

    cer, one_minus_cer, acc, wer = compute_metrics(all_preds, all_truths)
    print(f"Test CER: {cer:.6f}")
    print(f"Test 1-CER (Char Acc): {one_minus_cer:.6f}")
    print(f"Test ACC (Exact): {acc:.6f}")
    print(f"Test WER: {wer:.6f}")

if __name__ == "__main__":
    model, test_loader, test_ds = train_seq2seq_fixed()
    # model = Seq2SeqOCR.load_from_checkpoint(
    #     "seq2seq_fixed.ckpt", dataset=test_ds, d_model=256, lr=1e-3,
    #     teacher_forcing_start=0.7, teacher_forcing_floor=0.1, tf_decay=0.97, max_decode_len=40
    # )
    test_seq2seq(model, test_loader, test_ds.idx2char)


2025-09-26 08:20:38.072039: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758874838.475783      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758874838.586238      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: DtWGDWGDWGDWGDWGDWGDWG | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: DtWGDWGDWGDWGDWGDWGDWG | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: DtWGDWGDWGDWGDWGDWGDWG | gt: LEA


Training: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.11/dist-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 128. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 54. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONNIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0002.jpg → pred: LAUMONIER | gt: LAUMIONIER
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA


Testing (seq2seq fixed): 100%|██████████| 323/323 [02:17<00:00,  2.34it/s]


GT: KEVIN | PRED: KEVIN
GT: CLOTAIRE | PRED: CIO
GT: LENA | PRED: LENA
GT: JULES | PRED: JULES
GT: CHERPIN | PRED: CHERPIN
Test CER: 0.095422
Test 1-CER (Char Acc): 0.904578
Test ACC (Exact): 0.699775
Test WER: 0.300225
