In [None]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import pytorch_lightning as pl
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import numpy as np

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

# ---------------------------
# Utilities: distances & metrics
# ---------------------------

def levenshtein_distance(s1, s2):
    """Compute Levenshtein distance between two strings."""
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)

    previous_row = list(range(len(s2) + 1))
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    return previous_row[-1]


def compute_metrics(preds, truths):
    total_chars, total_char_errs = 0, 0
    total_words, total_word_errs = 0, 0
    exact_matches = 0

    for gt, pred in zip(truths, preds):
        # CER (character-level)
        dist = levenshtein_distance(gt, pred)
        total_char_errs += dist
        total_chars += len(gt)

        # WER as single-token (one "word" per sample)
        total_word_errs += int(gt != pred)
        total_words += 1

        # Exact-match accuracy
        if gt == pred:
            exact_matches += 1

    cer = (total_char_errs / total_chars) if total_chars > 0 else 0.0
    wer = (total_word_errs / total_words) if total_words > 0 else 0.0
    acc = (exact_matches / total_words) if total_words > 0 else 0.0

    char_acc = 1.0 - cer
    return cer, wer, acc, char_acc


# ---------------------------
# Dataset
# ---------------------------

class HandwritingDataset(Dataset):
    """
    - Loads CSV with columns 'FILENAME' and 'IDENTITY'
    - Drops NaNs and rows where label is 'unreadable' (case-insensitive)
    - Converts to grayscale, crops top-left to 64x256, pads with 1s if smaller
    - Returns fixed-size tensor [1, 64, 256] and label indices
    """
    def __init__(self, csv_path, images_dir, char2idx=None, transform=None,
                 crop_h=64, crop_w=256):
        self.df = pd.read_csv(csv_path)

        # Ensure required columns exist
        if "IDENTITY" not in self.df.columns or "FILENAME" not in self.df.columns:
            raise ValueError(f"CSV {csv_path} must have columns 'FILENAME' and 'IDENTITY'")

        # Drop NaNs in these columns
        self.df = self.df.dropna(subset=["FILENAME", "IDENTITY"])

        # Remove 'unreadable' labels (case-insensitive, strip spaces)
        self.df["IDENTITY"] = self.df["IDENTITY"].astype(str)
        mask_unreadable = self.df["IDENTITY"].str.strip().str.lower() == "unreadable"
        self.df = self.df[~mask_unreadable].reset_index(drop=True)

        self.images_dir = images_dir
        self.transform = transform
        self.crop_h = crop_h
        self.crop_w = crop_w

        # Build vocabulary if not provided
        if char2idx is None:
            chars = sorted(list({c for text in self.df["IDENTITY"] for c in text}))
            # 0 is CTC blank
            self.char2idx = {c: i + 1 for i, c in enumerate(chars)}
            self.idx2char = {i: c for c, i in self.char2idx.items()}
        else:
            self.char2idx = char2idx
            self.idx2char = {i: c for c, i in self.char2idx.items()}

        # Default transform: ToTensor only (values in [0,1])
        if self.transform is None:
            self.transform = transforms.ToTensor()

    def __len__(self):
        return len(self.df)

    def _crop_pad_top_left(self, img: Image.Image) -> Image.Image:
        """Crop to top-left crop_h x crop_w. If smaller, pad with 255 (white)."""
        # Convert to numpy array (H, W)
        arr = np.array(img, dtype=np.uint8)  # grayscale already
        h, w = arr.shape[:2]

        # Crop top-left window
        crop = arr[:min(h, self.crop_h), :min(w, self.crop_w)]

        # Pad to (crop_h, crop_w) with white=255
        out = np.ones((self.crop_h, self.crop_w), dtype=np.uint8) * 255
        out[:crop.shape[0], :crop.shape[1]] = crop
        return Image.fromarray(out, mode="L")

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.images_dir, row["FILENAME"])
        img = Image.open(img_path).convert("L")

        # Fixed-size preprocess: crop top-left 64x256; pad with ones (white)
        img = self._crop_pad_top_left(img)

        # To tensor [1, 64, 256] in [0,1]
        img_t = self.transform(img)

        text = row["IDENTITY"]
        label = torch.tensor([self.char2idx[c] for c in text if c in self.char2idx],
                             dtype=torch.long)

        return img_t, label, row["FILENAME"]

    def collate_fn(self, batch):
        # All images are already the same size [1, 64, 256]
        imgs, labels, filenames = zip(*batch)
        imgs = torch.stack(imgs, dim=0)  # [B, 1, 64, 256]

        # Concatenate labels for CTC and keep per-sample lengths
        label_concat = torch.cat(labels)
        label_lengths = torch.tensor([len(lbl) for lbl in labels], dtype=torch.long)

        return imgs, label_concat, label_lengths, filenames


# ---------------------------
# Model (CRNN + CTC)
# ---------------------------

class CRNN(nn.Module):
    """
    CNN (downsample H and W by 4) -> BiLSTM -> Linear -> CTC
    Assumes input images are [B, 1, 64, 256]
    """
    def __init__(self, num_classes, img_height=64):
        super().__init__()
        self.img_height = img_height

        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, 1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # H/2, W/2
            nn.Conv2d(64, 128, 3, 1, 1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # H/4, W/4
        )

        # After two pools, height becomes img_height // 4
        rnn_in = 128 * (self.img_height // 4)  # 128 * 16 = 2048 for H=64
        self.rnn = nn.LSTM(rnn_in, 256, num_layers=2, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(512, num_classes + 1)  # +1 for CTC blank (index 0)

    def forward(self, x):
        """
        x: [B, 1, 64, 256]
        Returns:
          logits: [B, T, C] where T = W/4
          out_lengths: [B] lengths after downsampling (all same if input is fixed width)
        """
        b, c, h, w = x.shape
        feats = self.cnn(x)                  # [B, 128, H/4, W/4]
        feats = feats.permute(0, 3, 1, 2)    # [B, W', 128, H']
        b, w_, c_, h_ = feats.size()
        seq = feats.contiguous().view(b, w_, c_ * h_)  # [B, W', 128*(H/4)]

        # LSTM over width dimension
        out, _ = self.rnn(seq)               # [B, W', 512]
        logits = self.fc(out)                # [B, W', C]
        out_lengths = torch.full((b,), w_, dtype=torch.long, device=x.device)
        return logits, out_lengths


def ctc_greedy_decoder(logits, idx2char):
    """Convert CTC output to text strings. logits: [B, T, C]"""
    preds = logits.argmax(2)  # [B, T]
    texts = []
    for seq in preds:
        prev = -1
        text = ""
        for idx in seq:
            idx = idx.item()
            if idx != 0 and idx != prev:  # 0 = blank
                text += idx2char.get(idx, "")
            prev = idx
        texts.append(text)
    return texts


# ---------------------------
# Lightning module
# ---------------------------

class OCRModel(pl.LightningModule):
    def __init__(self, dataset: HandwritingDataset, lr=1e-3, img_height=64):
        super().__init__()
        self.save_hyperparameters(ignore=['dataset'])
        self.model = CRNN(num_classes=len(dataset.char2idx), img_height=img_height)
        self.loss_fn = nn.CTCLoss(blank=0, zero_infinity=True)
        self.idx2char = dataset.idx2char

        self.val_preds = []
        self.val_truths = []

        # --- NEW: track losses for plotting ---
        self._train_loss_buf = []
        self._val_loss_buf = []
        self.train_epoch_losses = []
        self.val_epoch_losses = []

    def training_step(self, batch, batch_idx):
        imgs, labels, label_lengths, _ = batch
        logits, out_lengths = self.model(imgs)
        log_probs = logits.log_softmax(2).permute(1, 0, 2)  # [T, B, C]
        loss = self.loss_fn(log_probs, labels, out_lengths, label_lengths)
        self.log('train_loss', loss, batch_size=imgs.size(0), prog_bar=True, on_step=False, on_epoch=True)
        self._train_loss_buf.append(loss.detach().cpu().item())
        return loss

    def validation_step(self, batch, batch_idx):
        imgs, labels, label_lengths, filenames = batch
        logits, out_lengths = self.model(imgs)
        log_probs = logits.log_softmax(2).permute(1, 0, 2)
        loss = self.loss_fn(log_probs, labels, out_lengths, label_lengths)
        self.log('val_loss', loss, batch_size=imgs.size(0), sync_dist=True, prog_bar=True, on_step=False, on_epoch=True)
        self._val_loss_buf.append(loss.detach().cpu().item())

        # Decode predictions
        preds = ctc_greedy_decoder(logits, self.idx2char)

        # Decode ground truths
        truths = []
        offset = 0
        for L in label_lengths:
            L = L.item()
            text = "".join([self.idx2char[i.item()] for i in labels[offset:offset+L]])
            truths.append(text)
            offset += L

        # accumulate predictions and truths for epoch-end metrics
        self.val_preds.extend(preds)
        self.val_truths.extend(truths)

        if batch_idx == 0:
            for i in range(min(3, len(preds))):
                self.print(f"VAL SAMPLE {filenames[i]} → pred: {preds[i]} | gt: {truths[i]}")

        return {"val_loss": loss}

    def on_train_epoch_end(self):
        if self._train_loss_buf:
            self.train_epoch_losses.append(float(np.mean(self._train_loss_buf)))
            self._train_loss_buf = []

    def on_validation_epoch_end(self):
        if self._val_loss_buf:
            self.val_epoch_losses.append(float(np.mean(self._val_loss_buf)))
            self._val_loss_buf = []

        if len(self.val_preds) > 0:
            cer, wer, acc, char_acc = compute_metrics(self.val_preds, self.val_truths)
            self.log('val_CER', cer, prog_bar=True, sync_dist=True)
            self.log('val_1_minus_CER', char_acc, prog_bar=True, sync_dist=True)
            self.log('val_ACC_exact', acc, prog_bar=True, sync_dist=True)

        # reset for next epoch
        self.val_preds, self.val_truths = [], []

    def on_fit_end(self):
        """Plot and save loss curves at the end of training."""
        if len(self.train_epoch_losses) == 0:
            return
        plt.figure(figsize=(7, 4))
        plt.plot(range(1, len(self.train_epoch_losses) + 1), self.train_epoch_losses, label="Train Loss")
        if len(self.val_epoch_losses) > 0:
            plt.plot(range(1, len(self.val_epoch_losses) + 1), self.val_epoch_losses, label="Val Loss")
        plt.xlabel("Epoch")
        plt.ylabel("CTC Loss")
        plt.title("Training/Validation Loss")
        plt.legend()
        plt.tight_layout()
        plt.savefig("loss_curve.png")
        self.print("[Saved] Loss curve -> loss_curve.png")

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.hparams.lr)


# ---------------------------
# Training & Testing
# ---------------------------

def train_model():
    train_ds = HandwritingDataset(
        "/kaggle/input/handwriting-recognition/written_name_train_v2.csv",
        "/kaggle/input/handwriting-recognition/train_v2/train",
        crop_h=64, crop_w=256
    )
    val_ds = HandwritingDataset(
        "/kaggle/input/handwriting-recognition/written_name_validation_v2.csv",
        "/kaggle/input/handwriting-recognition/validation_v2/validation",
        char2idx=train_ds.char2idx,
        crop_h=64, crop_w=256
    )

    train_loader = DataLoader(
        train_ds, batch_size=128, shuffle=True,
        collate_fn=train_ds.collate_fn, num_workers=3, pin_memory=True
    )
    val_loader = DataLoader(
        val_ds, batch_size=128, shuffle=False,
        collate_fn=val_ds.collate_fn, num_workers=3, pin_memory=True
    )

    model = OCRModel(train_ds, img_height=64)
    trainer = pl.Trainer(max_epochs=40, accelerator='gpu', devices=-1, log_every_n_steps=20)
    trainer.fit(model, train_loader, val_loader)
    trainer.save_checkpoint("ocr_model.ckpt")
    # loss_curve.png is saved from model.on_fit_end()


def test_model():
    # Load training ds to get vocab
    train_ds = HandwritingDataset(
        "/kaggle/input/handwriting-recognition/written_name_train_v2.csv",
        "/kaggle/input/handwriting-recognition/train_v2/train",
        crop_h=64, crop_w=256
    )
    test_ds = HandwritingDataset(
        "/kaggle/input/handwriting-recognition/written_name_test_v2.csv",
        "/kaggle/input/handwriting-recognition/test_v2/test",
        char2idx=train_ds.char2idx,
        crop_h=64, crop_w=256
    )

    test_loader = DataLoader(
        test_ds, batch_size=128, shuffle=False,
        collate_fn=test_ds.collate_fn, num_workers=3, pin_memory=True
    )

    # Load checkpoint with consistent vocab
    model = OCRModel.load_from_checkpoint("ocr_model.ckpt", dataset=test_ds, img_height=64)
    model.eval()
    model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    all_preds, all_truths = [], []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing", unit="batch"):
            imgs, labels, label_lengths, filenames = batch
            imgs = imgs.to(model.device)
            logits, _ = model.model(imgs)
            preds = ctc_greedy_decoder(logits.cpu(), model.idx2char)
            all_preds.extend(preds)

            # Decode ground truth
            offset = 0
            for L in label_lengths:
                L = int(L.item())
                text = "".join([test_ds.idx2char[i.item()] for i in labels[offset:offset+L]])
                all_truths.append(text)
                offset += L

    # Print a few examples
    for i in range(min(5, len(all_preds))):
        print(f"GT: {all_truths[i]} | PRED: {all_preds[i]}")

    return all_preds, all_truths


if __name__ == "__main__":
    # Train
    train_model()

    # Test
    preds, truths = test_model()

    cer, wer, acc, char_acc = compute_metrics(preds, truths)
    print(f"Test CER (Char Error Rate): {cer:.6f}")
    print(f"Test 1-CER (Char Accuracy): {char_acc:.6f}")
    print(f"Test ACC (Exact Match): {acc:.6f}")
    print(f"Test 1-ACC (Word Error Rate): {1-acc:.6f}")


2025-09-23 14:39:00.232947: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758638340.478041      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758638340.544730      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


VAL SAMPLE VALIDATION_0001.jpg → pred: RvUvU | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: RvUvUvU | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RvUzvUvU | gt: RUPP


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: AAE | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: A | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: AA | gt: RUPP


/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:434: It is recommended to use `self.log('train_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: ELE | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: AEY | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPR | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPR | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Validation: |          | 0/? [00:00<?, ?it/s]

VAL SAMPLE VALIDATION_0001.jpg → pred: BILEL | gt: BILEL
VAL SAMPLE VALIDATION_0003.jpg → pred: LEA | gt: LEA
VAL SAMPLE VALIDATION_0005.jpg → pred: RUPP | gt: RUPP


Testing: 100%|██████████| 323/323 [01:23<00:00,  3.88batch/s]


GT: KEVIN | PRED: KEVIN
GT: CLOTAIRE | PRED: CHOTIIAE
GT: LENA | PRED: LENA
GT: JULES | PRED: JULES
GT: CHERPIN | PRED: CHERPIN
Test CER (Char Error Rate): 0.059511
Test 1-CER (Char Accuracy): 0.940489
Test ACC (Exact Match): 0.790404
Test 1-ACC (Word Error Rate): 0.209596
