<a href="https://colab.research.google.com/github/LineIntegralx/CalligraNet/blob/main/Training_Scripts/CNN_Swin_CTC_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
!pip install -q timm

import os
import random
from pathlib import Path
from collections import Counter

import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as T
from PIL import Image
import timm

# ----------------- CONFIG -----------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

ROOT = Path("/content/drive/MyDrive/EECE693_Project")

DATA_ROOTS = {
    "D0_preprocessed": ROOT / "Preprocessed_HICMA",
    "D1_augmented":    ROOT / "Augmented_HICMA",
    "D2_synth":        ROOT / "HICMA_Plus_Synthetic",
}

CKPT_DIR = ROOT / "SwinCTC_Checkpoints"
CKPT_DIR.mkdir(parents=True, exist_ok=True)
print("Checkpoint dir:", CKPT_DIR)

BATCH_SIZE    = 8
NUM_EPOCHS    = 40
FREEZE_EPOCHS = 5          # epochs with Swin frozen
LR_MAIN       = 3e-4       # CNN stem + CTC head
LR_SWIN       = 1e-4       # Swin fine-tuning
WEIGHT_DECAY  = 1e-2
PATIENCE      = 7          # early stopping on val CER

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if device.type == "cuda":
    torch.cuda.manual_seed_all(SEED)


Device: cuda
Checkpoint dir: /content/drive/MyDrive/EECE693_Project/SwinCTC_Checkpoints


In [3]:
def load_split_dfs(base_dir: Path):
    dfs = {}
    for split in ["train", "val", "test"]:
        dfs[split] = pd.read_csv(base_dir / f"{split}_labels.csv")
    return dfs

for name, base in DATA_ROOTS.items():
    print(f"\n=== {name} ===")
    dfs = load_split_dfs(base)
    for split, df in dfs.items():
        df["label"] = df["label"].astype(str)
        print(f"{split}: {len(df)} rows, classes={df['class'].value_counts().to_dict()}")
        print(f"  avg label len = {df['label'].str.len().mean():.1f}")

# ---- build char vocab from richest dataset (D2_synth) ----
def build_vocab_from_dataset(base_dir: Path):
    labels = []
    for split in ["train", "val", "test"]:
        df = pd.read_csv(base_dir / f"{split}_labels.csv")
        labels.extend(df["label"].astype(str).tolist())
    all_text = "".join(labels)
    char_counter = Counter(all_text)
    chars = sorted(list(char_counter.keys()))
    print("\nNum unique chars:", len(chars))
    return chars

chars = build_vocab_from_dataset(DATA_ROOTS["D2_synth"])

BLANK_IDX = 0
stoi = {ch: i + 1 for i, ch in enumerate(chars)}   # chars start at 1
itos = {i + 1: ch for i, ch in enumerate(chars)}
vocab_size = len(chars) + 1  # + blank
print("vocab_size (including blank):", vocab_size)

class TextEncoder:
    def __init__(self, stoi, itos, blank_idx=0):
        self.stoi = stoi
        self.itos = itos
        self.blank_idx = blank_idx

    def encode(self, text: str):
        return [self.stoi[c] for c in text if c in self.stoi]

    def decode(self, ids):
        return "".join(self.itos[i] for i in ids if i in self.itos)

text_encoder = TextEncoder(stoi, itos, BLANK_IDX)



=== D0_preprocessed ===
train: 4020 rows, classes={'Naskh': 2988, 'Thuluth': 808, 'Diwani': 190, 'Kufic': 21, 'Muhaquaq': 13}
  avg label len = 42.5
val: 502 rows, classes={'Naskh': 373, 'Thuluth': 101, 'Diwani': 23, 'Kufic': 3, 'Muhaquaq': 2}
  avg label len = 41.6
test: 503 rows, classes={'Naskh': 374, 'Thuluth': 101, 'Diwani': 24, 'Muhaquaq': 2, 'Kufic': 2}
  avg label len = 44.5

=== D1_augmented ===
train: 20000 rows, classes={'Naskh': 4000, 'Thuluth': 4000, 'Diwani': 4000, 'Muhaquaq': 4000, 'Kufic': 4000}
  avg label len = 34.4
val: 502 rows, classes={'Naskh': 373, 'Thuluth': 101, 'Diwani': 23, 'Kufic': 3, 'Muhaquaq': 2}
  avg label len = 41.6
test: 503 rows, classes={'Naskh': 374, 'Thuluth': 101, 'Diwani': 24, 'Muhaquaq': 2, 'Kufic': 2}
  avg label len = 44.5

=== D2_synth ===
train: 27316 rows, classes={'Kufic': 6923, 'Diwani': 6296, 'Thuluth': 6097, 'Naskh': 4000, 'Muhaquaq': 4000}
  avg label len = 37.6
val: 1416 rows, classes={'Naskh': 373, 'Kufic': 368, 'Thuluth': 363, 'Di

In [30]:
# No geometric transforms – your preprocessing already did letterbox+padding.
train_transform = T.Compose([
    T.ToTensor(),                      # [1, H, W] in [0,1]
    T.Normalize(mean=[0.5], std=[0.5]) # ~[-1,1]
])

eval_transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.5], std=[0.5])
])

class HICMADataset(Dataset):
    def __init__(self, base_dir: Path, split: str, transform, text_encoder: TextEncoder):
        self.base_dir = base_dir
        self.split = split
        self.transform = transform
        self.text_encoder = text_encoder

        df = pd.read_csv(self.base_dir / f"{split}_labels.csv")
        self.df = df[["img_name", "class", "label"]].copy()
        self.df["label"] = self.df["label"].astype(str)

        self.img_dir = self.base_dir / split / "images"

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_name = row["img_name"]
        cls      = row["class"]
        text     = row["label"]

        img_path = self.img_dir / img_name
        image = Image.open(img_path).convert("L")   # grayscale
        image = self.transform(image)               # [1, H, W]

        target = torch.tensor(self.text_encoder.encode(text), dtype=torch.long)

        return {
            "image": image,
            "target": target,
            "text": text,
            "class": cls,
            "img_name": img_name,
        }

def ctc_collate(batch):
    images  = [b["image"] for b in batch]    # each: [1, H_i, W_i]
    targets = [b["target"] for b in batch]

    batch_size = len(images)
    C = images[0].shape[0]
    heights = [img.shape[1] for img in images]
    widths  = [img.shape[2] for img in images]

    max_h = max(heights)
    max_w = max(widths)

    # --- make them divisible by 4 for Swin patch size ---
    def round_up(x, m=4):
        return ((x + m - 1) // m) * m

    max_h = round_up(max_h, 4)
    max_w = round_up(max_w, 4)
    # ----------------------------------------------------

    pad_val = 1.0  # white in normalized space
    padded = torch.full((batch_size, C, max_h, max_w),
                        pad_val, dtype=images[0].dtype)

    for i, img in enumerate(images):
        h, w = img.shape[1], img.shape[2]
        padded[i, :, :h, :w] = img  # top-left placement

    target_lengths = torch.tensor([t.size(0) for t in targets], dtype=torch.long)
    targets_concat = torch.cat(targets, dim=0)

    meta = {
        "texts": [b["text"] for b in batch],
        "img_names": [b["img_name"] for b in batch],
        "classes": [b["class"] for b in batch],
        "widths": widths,
        "heights": heights,
    }

    return padded, targets_concat, target_lengths, meta


# Quick sanity check
for name, base in DATA_ROOTS.items():
    ds = HICMADataset(base, "train", train_transform, text_encoder)
    print(f"{name} train size:", len(ds))
    sample = ds[0]
    print(f"  one image shape: {sample['image'].shape}, text len={len(sample['text'])}")
    break


D0_preprocessed train size: 4020
  one image shape: torch.Size([1, 256, 4000]), text len=50


In [13]:
class CNNSwinCTC(nn.Module):
    def __init__(self, vocab_size, hidden_dim=256):
        super().__init__()

        # CNN stem: downsample width only, keep height
        self.cnn_stem = nn.Sequential(
            # 1 x H x W -> 32 x H x (W/2)
            nn.Conv2d(1, 32, kernel_size=3, stride=(1, 2), padding=1),
            nn.BatchNorm2d(32),
            nn.GELU(),

            # 32 x H x W/2 -> 64 x H x (W/4)
            nn.Conv2d(32, 64, kernel_size=3, stride=(1, 2), padding=1),
            nn.BatchNorm2d(64),
            nn.GELU(),

            # 64 x H x W/4 -> 3 x H x (W/8)
            nn.Conv2d(64, 3, kernel_size=3, stride=(1, 2), padding=1),
            nn.BatchNorm2d(3),
            nn.GELU(),
        )

        # Pretrained Swin-T backbone
        self.swin = timm.create_model(
            "swin_tiny_patch4_window7_224",
            pretrained=True,
            features_only=True,
            out_indices=[-1],
            in_chans=3,
            img_size=256,          # our typical height
            strict_img_size=False  # allow non-224 sizes
        )
        swin_out_ch = self.swin.feature_info[-1]["num_chs"]

        self.proj = nn.Linear(swin_out_ch, hidden_dim)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        """
        x: [B, 1, H, W]
        returns: log_probs [T, B, C], T (int)
        """
        x = self.cnn_stem(x)          # [B, 3, H, W/8]

        feat = self.swin(x)[0]        # [B, Hs, Ws, Cs]  (channels-last from timm)
        # average over height -> [B, Ws, Cs]
        feat = feat.mean(dim=1)

        # here: last dim = Cs (e.g. 768), matches swin_out_ch
        feat = self.proj(feat)        # [B, T, hidden]
        feat = self.dropout(feat)
        logits = self.classifier(feat)  # [B, T, vocab_size]

        log_probs = F.log_softmax(logits, dim=-1)
        T_len = logits.size(1)         # sequence length along width
        return log_probs.permute(1, 0, 2), T_len  # [T, B, C], T


In [14]:
def levenshtein(a, b):
    """Classic edit distance for CER/WER."""
    dp = [[0] * (len(b) + 1) for _ in range(len(a) + 1)]
    for i in range(len(a) + 1):
        dp[i][0] = i
    for j in range(len(b) + 1):
        dp[0][j] = j
    for i in range(1, len(a) + 1):
        for j in range(1, len(b) + 1):
            cost = 0 if a[i - 1] == b[j - 1] else 1
            dp[i][j] = min(
                dp[i-1][j] + 1,
                dp[i][j-1] + 1,
                dp[i-1][j-1] + cost,
            )
    return dp[-1][-1]

def compute_cer(preds, gts):
    total_dist, total_len = 0, 0
    for p, g in zip(preds, gts):
        total_dist += levenshtein(p, g)
        total_len  += len(g)
    return total_dist / max(total_len, 1)

def compute_wer(preds, gts):
    total_dist, total_len = 0, 0
    for p, g in zip(preds, gts):
        p_words = p.split()
        g_words = g.split()
        total_dist += levenshtein(p_words, g_words)
        total_len  += len(g_words)
    return total_dist / max(total_len, 1)

def greedy_decode(log_probs, text_encoder: TextEncoder):
    """
    log_probs: [T, B, C]
    return: list of predicted strings
    """
    max_ids = log_probs.argmax(dim=-1).transpose(0, 1)  # [B, T]
    pred_strs = []
    for seq in max_ids:
        prev = BLANK_IDX
        ids = []
        for i in seq.tolist():
            if i != prev and i != BLANK_IDX:
                ids.append(i)
            prev = i
        pred_strs.append(text_encoder.decode(ids))
    return pred_strs

def evaluate(model, loader, text_encoder: TextEncoder):
    model.eval()
    total_loss = 0.0
    all_preds, all_gts = [], []

    with torch.no_grad():
        for images, targets, target_lengths, meta in loader:
            images  = images.to(device)
            targets = targets.to(device)

            log_probs, T_len = model(images)  # [T, B, C]
            input_lengths = torch.full(
                (images.size(0),), T_len, dtype=torch.long, device=device
            )

            loss = F.ctc_loss(
                log_probs, targets,
                input_lengths, target_lengths,
                blank=BLANK_IDX, zero_infinity=True
            )
            total_loss += loss.item() * images.size(0)

            pred_strs = greedy_decode(log_probs.cpu(), text_encoder)
            all_preds.extend(pred_strs)
            all_gts.extend(meta["texts"])

    avg_loss = total_loss / len(loader.dataset)
    cer = compute_cer(all_preds, all_gts)
    wer = compute_wer(all_preds, all_gts)
    return avg_loss, cer, wer


In [21]:
from torch.utils.data import DataLoader

def make_dataloaders_for_experiment(base_dir: Path, text_encoder: TextEncoder):
    """Create train/val/test datasets + dataloaders for one experiment."""
    train_ds = HICMADataset(base_dir, "train", train_transform, text_encoder)
    val_ds   = HICMADataset(base_dir, "val",   eval_transform,  text_encoder)
    # Fixed test set from *original* HICMA (D0)
    test_ds  = HICMADataset(DATA_ROOTS["D0_preprocessed"], "test", eval_transform, text_encoder)

    # num_workers=0 is safer with Drive in Colab
    train_loader = DataLoader(
        train_ds, batch_size=BATCH_SIZE, shuffle=True,
        num_workers=0, pin_memory=True, collate_fn=ctc_collate
    )
    val_loader = DataLoader(
        val_ds, batch_size=BATCH_SIZE, shuffle=False,
        num_workers=0, pin_memory=True, collate_fn=ctc_collate
    )
    test_loader = DataLoader(
        test_ds, batch_size=BATCH_SIZE, shuffle=False,
        num_workers=0, pin_memory=True, collate_fn=ctc_collate
    )
    return train_ds, val_ds, test_ds, train_loader, val_loader, test_loader


def train_experiment(exp_name: str, base_dir: Path):
    print(f"\n\n########## {exp_name} on {base_dir.name} ##########")

    print("  -> Building dataloaders...")
    train_ds, val_ds, test_ds, train_loader, val_loader, test_loader = \
        make_dataloaders_for_experiment(base_dir, text_encoder)
    print("  -> Dataloaders ready. Building model...")

    model = CNNSwinCTC(vocab_size=vocab_size).to(device)
    print("  -> Model ready. Starting training...")

    # Stage 1: freeze Swin (only stem + head train at first)
    for p in model.swin.parameters():
        p.requires_grad = False

    # Optimizer with param groups: main vs Swin
    main_params = [p for n, p in model.named_parameters() if not n.startswith("swin.")]
    swin_params = [p for n, p in model.named_parameters() if n.startswith("swin.")]

    optimizer = torch.optim.AdamW(
        [
            {"params": main_params, "lr": LR_MAIN},
            {"params": swin_params, "lr": LR_SWIN},
        ],
        weight_decay=WEIGHT_DECAY
    )

    best_val_cer = float("inf")
    best_val_wer = None
    best_epoch   = -1
    best_state   = None
    patience_left = PATIENCE

    for epoch in range(1, NUM_EPOCHS + 1):
        # Unfreeze Swin after FREEZE_EPOCHS
        if epoch == FREEZE_EPOCHS + 1:
            print(">> Unfreezing Swin backbone for fine-tuning.")
            for p in model.swin.parameters():
                p.requires_grad = True

        model.train()
        running_loss = 0.0

        for images, targets, target_lengths, meta in train_loader:
            images  = images.to(device)
            targets = targets.to(device)
            target_lengths = target_lengths.to(device)

            optimizer.zero_grad()
            log_probs, T_len = model(images)
            input_lengths = torch.full(
                (images.size(0),), T_len, dtype=torch.long, device=device
            )

            loss = F.ctc_loss(
                log_probs, targets,
                input_lengths, target_lengths,
                blank=BLANK_IDX, zero_infinity=True
            )
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            running_loss += loss.item() * images.size(0)

        train_loss = running_loss / len(train_ds)
        val_loss, val_cer, val_wer = evaluate(model, val_loader, text_encoder)
        print(f"Epoch {epoch:02d}: "
              f"train_loss={train_loss:.4f}  "
              f"val_loss={val_loss:.4f}  "
              f"val_CER={val_cer:.4f}  "
              f"val_WER={val_wer:.4f}")

        # Early stopping on CER
        if val_cer < best_val_cer:
            best_val_cer = val_cer
            best_val_wer = val_wer
            best_epoch   = epoch
            best_state   = model.state_dict()
            patience_left = PATIENCE

            ckpt_path = CKPT_DIR / f"{exp_name}_best.pt"
            torch.save({
                "model_state": best_state,
                "epoch": best_epoch,
                "val_cer": best_val_cer,
                "val_wer": best_val_wer,
                "vocab_size": vocab_size,
                "chars": chars,
            }, ckpt_path)
            print(f"  >> New best model saved to: {ckpt_path}")
        else:
            patience_left -= 1
            if patience_left <= 0:
                print("Early stopping triggered.")
                break

    # Load best model and evaluate on test set
    if best_state is not None:
        model.load_state_dict(best_state)
    test_loss, test_cer, test_wer = evaluate(model, test_loader, text_encoder)
    print(f"[{exp_name}] BEST epoch={best_epoch}  "
          f"val_CER={best_val_cer:.4f}  val_WER={best_val_wer:.4f}  "
          f"TEST_CER={test_cer:.4f}  TEST_WER={test_wer:.4f}")

    return {
        "exp": exp_name,
        "train_size": len(train_ds),
        "val_size": len(val_ds),
        "test_size": len(test_ds),
        "best_epoch": best_epoch,
        "best_val_cer": best_val_cer,
        "best_val_wer": best_val_wer,
        "test_cer": test_cer,
        "test_wer": test_wer,
    }


In [22]:
results = []
for exp_name, base in DATA_ROOTS.items():
    res = train_experiment(exp_name, base)
    results.append(res)

results_df = pd.DataFrame(results)
results_df




########## D0_preprocessed on Preprocessed_HICMA ##########
  -> Building dataloaders...
  -> Dataloaders ready. Building model...
  -> Model ready. Starting training...
Epoch 01: train_loss=0.1135  val_loss=0.0822  val_CER=0.9148  val_WER=1.0000
  >> New best model saved to: /content/drive/MyDrive/EECE693_Project/SwinCTC_Checkpoints/D0_preprocessed_best.pt
Epoch 02: train_loss=0.0938  val_loss=0.0803  val_CER=0.9226  val_WER=0.9988
Epoch 03: train_loss=0.0910  val_loss=0.0853  val_CER=0.9188  val_WER=1.0000
Epoch 04: train_loss=0.0798  val_loss=0.0774  val_CER=0.8959  val_WER=0.9993
  >> New best model saved to: /content/drive/MyDrive/EECE693_Project/SwinCTC_Checkpoints/D0_preprocessed_best.pt
Epoch 05: train_loss=0.0846  val_loss=0.0831  val_CER=0.9117  val_WER=0.9995
>> Unfreezing Swin backbone for fine-tuning.
Epoch 06: train_loss=0.1016  val_loss=0.1045  val_CER=0.9472  val_WER=0.9979
Epoch 07: train_loss=0.0884  val_loss=0.0768  val_CER=0.9655  val_WER=0.9990
Epoch 08: train_lo

KeyboardInterrupt: 

In [31]:
results = []

exp_name = "D2_synth"
base_dir = DATA_ROOTS["D2_synth"]

res = train_experiment(exp_name, base_dir)
results.append(res)

results_df = pd.DataFrame(results)
results_df




########## D2_synth on HICMA_Plus_Synthetic ##########
  -> Building dataloaders...
  -> Dataloaders ready. Building model...
  -> Model ready. Starting training...


AssertionError: Input width (217) should be divisible by patch size (4).

In [23]:
# Build loaders just for D0_preprocessed
base_dir = DATA_ROOTS["D0_preprocessed"]

train_ds, val_ds, test_ds, train_loader, val_loader, test_loader = \
    make_dataloaders_for_experiment(base_dir, text_encoder)

len(train_ds), len(val_ds), len(test_ds)


(4020, 502, 503)

In [24]:
ckpt_path = CKPT_DIR / "D0_preprocessed_best.pt"
print("Loading checkpoint from:", ckpt_path)

ckpt = torch.load(ckpt_path, map_location=device)
print(ckpt.keys())  # should show: dict_keys(['model_state', 'epoch', 'val_cer', ...])

model = CNNSwinCTC(vocab_size=vocab_size).to(device)
model.load_state_dict(ckpt["model_state"])
model.eval()


Loading checkpoint from: /content/drive/MyDrive/EECE693_Project/SwinCTC_Checkpoints/D0_preprocessed_best.pt
dict_keys(['model_state', 'epoch', 'val_cer', 'val_wer', 'vocab_size', 'chars'])


CNNSwinCTC(
  (cnn_stem): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 2), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 2), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): GELU(approximate='none')
    (6): Conv2d(64, 3, kernel_size=(3, 3), stride=(1, 2), padding=(1, 1))
    (7): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): GELU(approximate='none')
  )
  (swin): FeatureListNet(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (layers_0): SwinTransformerStage(
      (downsample): Identity()
      (blocks): Sequential(
        (0): SwinTransformerBlock(
          (norm1): LayerNorm((96,), eps=1e

In [25]:
val_loss, val_cer, val_wer = evaluate(model, val_loader, text_encoder)
test_loss, test_cer, test_wer = evaluate(model, test_loader, text_encoder)

print(f"VAL  -> loss={val_loss:.4f}, CER={val_cer:.4f}, WER={val_wer:.4f}")
print(f"TEST -> loss={test_loss:.4f}, CER={test_cer:.4f}, WER={test_wer:.4f}")


VAL  -> loss=0.0774, CER=0.8959, WER=0.9993
TEST -> loss=0.0414, CER=0.9000, WER=0.9996


In [26]:
def show_sample_predictions(model, loader, text_encoder, num_batches=2):
    model.eval()
    with torch.no_grad():
        for b_idx, (images, targets, target_lengths, meta) in enumerate(loader):
            images = images.to(device)
            log_probs, T_len = model(images)     # [T, B, C]
            preds = greedy_decode(log_probs.cpu(), text_encoder)

            for gt, pred, img_name in zip(meta["texts"], preds, meta["img_names"]):
                print(f"IMG  : {img_name}")
                print(f"GT   : {gt}")
                print(f"PRED : {pred}")
                print("-" * 50)

            if b_idx + 1 >= num_batches:
                break

# Show a few from validation
show_sample_predictions(model, val_loader, text_encoder, num_batches=1)


IMG  : 177-3.png
GT   : من عبادك و اشرف المنادين لطرق رشادك
PRED : عالله
--------------------------------------------------
IMG  : Al-Anam-691-300x57_segment_no_0.png
GT   : قل الله ثم ذرهم في خوضهم يلعبون
PRED : علقللله
--------------------------------------------------
IMG  : 175-4.png
GT   : افضل قائم بحقك المبغوث بتيسيرك و رفقك
PRED : عمعمسلله
--------------------------------------------------
IMG  : 566-3.png
GT   : ولكن أكثرهم لا يعلمون وما من دابة في الأرض ولا
PRED : يلملململ
--------------------------------------------------
IMG  : Fatiha-2-Round-White_segment_no_0 - Copy (5).png
GT   : اهدنا الصراط المستقيم صراط الذين
PRED : عالله
--------------------------------------------------
IMG  : 105-3.png
GT   : منيع و حرز حصين من جميع خلقك حتى تبلغني
PRED : عيلله
--------------------------------------------------
IMG  : 50-16b_segment_no_0.png
GT   : و نحن أقرب إليه من حبل الوريد
PRED : لقيللله
--------------------------------------------------
IMG  : 253-1.png
GT   : على ابراهيم وبا