In [None]:
!pip install python-Levenshtein

Collecting python-Levenshtein
  Downloading python_levenshtein-0.27.1-py3-none-any.whl.metadata (3.7 kB)
Collecting Levenshtein==0.27.1 (from python-Levenshtein)
  Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.27.1->python-Levenshtein)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading python_levenshtein-0.27.1-py3-none-any.whl (9.4 kB)
Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m103.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected package

**Import libraries**

In [None]:
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from collections import defaultdict
import math
import os
import Levenshtein
from tqdm.notebook import tqdm
from datetime import datetime
import random
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
from torch.cuda.amp import autocast, GradScaler

**Labels encoding**

In [None]:
CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
CHAR_TO_LABEL = {char: idx + 1 for idx, char in enumerate(CHARS)}
LABEL_TO_CHAR = {idx + 1: char for idx, char in enumerate(CHARS)}
NUM_CLASSES = len(CHARS) + 1
BLANK_INDEX = NUM_CLASSES - 1

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**Dataset**

In [None]:
class CaptchaDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32).permute(0, 3, 1, 2)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

**Model**

In [None]:
class CaptchaModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1),nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1),nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.rnn = nn.LSTM(256 * 5, 128, num_layers=2, bidirectional=True)

        self.fc = nn.Linear(128 * 2, NUM_CLASSES)
        with torch.no_grad():
            self.fc.bias[NUM_CLASSES - 1] = -10.0

    def forward(self, x):

        x = self.cnn(x)
        b, c, h, w = x.size()
        x = x.permute(3, 0, 1, 2).contiguous().view(w, b, c * h)
        rnn_out, _ = self.rnn(x)
        return self.fc(rnn_out)

**Utility functions**

In [None]:
def get_input_lengths(logits):
    return torch.full(size=(logits.size(1),), fill_value=logits.size(0), dtype=torch.long)

def get_target_lengths(labels):
    return torch.full(size=(labels.size(0),), fill_value=labels.size(1), dtype=torch.long)

def compute_metrics(predictions, labels):
    total_exact = 0
    total_char = 0
    correct_char = 0
    total_edit_dist = 0

    for pred, true_label in zip(predictions, labels):
        target = "".join([LABEL_TO_CHAR[c.item()] for c in true_label])
        if pred == target:
            total_exact += 1
        for pc, tc in zip(pred, target):
            if pc == tc:
                correct_char += 1
        total_char += len(target)
        total_edit_dist += Levenshtein.distance(pred, target)

    exact_acc = total_exact / len(labels)
    char_acc = correct_char / total_char
    avg_edit = total_edit_dist / len(labels)

    return exact_acc, char_acc, avg_edit

def log_add(a, b):
    if a == -float("inf"): return b
    if b == -float("inf"): return a
    return max(a, b) + math.log1p(math.exp(-abs(a - b)))

def ctc_beam_decode(logits, beam_width=5):
    pred = logits.permute(1, 0, 2)
    B, T, C = pred.shape
    decoded_batch = []
    for b in range(B):
        log_probs = F.log_softmax(pred[b], dim=1)
        beams = [("", 0.0)]
        for t in range(T):
            next_beams = defaultdict(lambda: -float("inf"))
            for seq, score in beams:
                for c in range(C):
                    p = log_probs[t, c].item()
                    new_seq = seq
                    if c != BLANK_INDEX:
                        char = LABEL_TO_CHAR.get(c, "")
                        if len(seq) == 0 or char != seq[-1]:
                            new_seq = seq + char
                    next_beams[new_seq] = log_add(next_beams[new_seq], score + p)
            beams = sorted(next_beams.items(), key=lambda x: x[1], reverse=True)[:beam_width]
        decoded_batch.append(beams[0][0])
    return decoded_batch



def set_seed(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False



**Data Loading**

In [None]:
from google.colab import drive
drive.mount('/content/drive')


#---------SEED CHANGE-----------
#set_seed(1234)
data_dir = "/content/drive/MyDrive/captcha_np_data"

# Load the .npy files
X = np.load(f"{data_dir}/X_data.npy")
y = np.load(f"{data_dir}/y_labels.npy")+ 1  # +1 for CTC blank handling

X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.1, random_state=69)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1, random_state=69)

#---------SEED CHANGE-----------
#g = torch.Generator()
#g.manual_seed()

train_loader = DataLoader(CaptchaDataset(X_train, y_train), batch_size=64, shuffle=True)
val_loader   = DataLoader(CaptchaDataset(X_val, y_val), batch_size=64)
test_loader  = DataLoader(CaptchaDataset(X_test, y_test), batch_size=64)

Mounted at /content/drive


**Model functions**

In [None]:
def save_model(model, optimizer, epoch, seed):
    save_dir = "/content/drive/MyDrive/captcha_gold_models"
    os.makedirs(save_dir, exist_ok=True)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"final_model_seed{seed}_epoch{epoch}_{timestamp}.pt"

    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()
    }, f"{save_dir}/{filename}")

    print(f"\n Model saved as: {filename}")



def load_model(model_path):

    model = CaptchaModel().to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    checkpoint = torch.load(model_path, map_location=DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    print(f" Loaded model from: {model_path}")
    return model, optimizer

**Training**

In [None]:
#Load a pre existing model
model,optimizer = load_model("/content/drive/MyDrive/captcha_gold_models/model_seed42_epoch20_20250611_015556.pt")

ctc_loss = nn.CTCLoss(blank=NUM_CLASSES - 1, zero_infinity=True)

scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

#AMP
scaler = GradScaler()


train_losses = []
val_exact_accs = []
val_char_accs = []
val_edit_dists = []

num_epochs = 38

#Training
for epoch in range(num_epochs):


    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=True)

    for images, labels in train_loader:

        images, labels = images.to(DEVICE), labels.to(DEVICE)

        input_lengths  = torch.full((labels.size(0),), 18, dtype=torch.long, device=DEVICE)
        target_lengths = torch.full((labels.size(0),), 5,  dtype=torch.long, device=DEVICE)

        with autocast():
            logits     = model(images)
            log_probs  = logits.log_softmax(2)
            loss       = ctc_loss(log_probs, labels.view(-1), input_lengths, target_lengths)

        # ---------- Back-prop ----------
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad(set_to_none=True)


        total_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    train_losses.append(total_loss)

    #validation every 5 epochs
    # if (epoch + 1) % 5 == 0:

    #   model.eval()
    #   predictions_all = []
    #   labels_all = []
    #   with torch.no_grad():

    #       for images, labels in val_loader:

    #           images, labels = images.to(DEVICE), labels.to(DEVICE)
    #           logits = model(images)
    #           predictions = ctc_beam_decode(logits)
    #           predictions_all.extend(predictions)
    #           labels_all.extend(labels)

    #   exact_acc, char_acc, edit_dist = compute_metrics(predictions_all, labels_all)

    #   print(f"Epoch {epoch+1}: Loss = {total_loss:.4f} | Exact Acc = {exact_acc*100:.2f}% | Char Acc = {char_acc*100:.2f}% | Edit Dist = {edit_dist:.2f}")

    #   val_exact_accs.append(exact_acc)
    #   val_char_accs.append(char_acc)
    #   val_edit_dists.append(edit_dist)


    scheduler.step(total_loss) #was edit_dist

    # ===================== DIAGNOSTIC ZONE =====================
    # print("\n Diagnostics on 5 validation samples:")
    # with torch.no_grad():
    #     for images, labels in val_loader:
    #         images, labels = images.to(DEVICE), labels.to(DEVICE)
    #         logits = model(images)
    #         log_probs = F.log_softmax(logits, dim=2)
    #         predictions = ctc_beam_decode(logits)

    #         for i in range(min(5, images.size(0))):
    #             target = "".join([LABEL_TO_CHAR[c.item()] for c in labels[i]])
    #             pred = predictions[i]

    #             print("=" * 60)
    #             print(f" True:  {target}")
    #             print(f" Pred:  {pred}")

    #             probs = log_probs[:, i, :].exp()  # shape: (T, C)
    #             top_chars = probs.topk(1, dim=1).indices.squeeze(1).tolist()
    #             top_scores = probs.max(dim=1).values.tolist()

    #             for t, (char_idx, score) in enumerate(zip(top_chars, top_scores)):
    #                 char = LABEL_TO_CHAR.get(char_idx, "␣") if char_idx != BLANK_INDEX else "—"
    #                 print(f"Timestep {t:2d}: {char:2s} ({score:.2f})")
    #         break  # Only 1 batch
    # print("============================================================\n")
    # ============================================================

    #Save the model every 5 epochs
    # if (epoch + 1) % 5 == 0:
    #     save_model(model, optimizer, epoch + 1, seed=42)

# plot_training_metrics(train_losses, val_exact_accs, val_char_accs, val_edit_dists)



 Loaded model from: /content/drive/MyDrive/captcha_gold_models/model_seed42_epoch20_20250611_015556.pt


  scaler = GradScaler()


Epoch 1/38:   0%|          | 0/1431 [00:00<?, ?it/s]

  with autocast():


Epoch 2/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 3/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 4/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 5/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 6/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 7/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 8/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 9/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 10/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 11/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 12/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 13/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 14/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 15/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 16/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 17/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 18/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 19/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 20/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 21/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 22/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 23/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 24/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 25/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 26/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 27/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 28/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 29/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 30/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 31/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 32/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 33/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 34/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 35/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 36/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 37/38:   0%|          | 0/1431 [00:00<?, ?it/s]

Epoch 38/38:   0%|          | 0/1431 [00:00<?, ?it/s]

**Testing**

In [None]:

model.eval()
predictions_all = []
labels_all = []


with torch.no_grad():
    for images, labels in test_loader:



        images, labels = images.to(DEVICE), labels.to(DEVICE)
        logits = model(images)
        predictions = ctc_beam_decode(logits)


        predictions_all.extend(predictions)
        labels_all.extend(labels)

exact_acc, char_acc, edit_dist = compute_metrics(predictions_all, labels_all)
print(f"\n FINAL TEST RESULTS:\nExact Accuracy: {exact_acc*100:.2f}%\nChar Accuracy: {char_acc*100:.2f}%\nAvg. Edit Distance: {edit_dist:.2f}")



**Save the final model**

In [None]:
save_model(model, optimizer, 60, 42)


 Model saved as: final_model_seed42_epoch60_20250611_232143.pt
