In [2]:
# Breaking-CAPTCHAS-Pytorch.py
# PyTorch version of EE 467 Lab 2 CAPTCHA CNN

import os
import pickle
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from imutils import paths

# =========================
# Utilities (same as TF lab)
# =========================

def resize_to_fit(image, width, height):
    h, w = image.shape
    if w > h:
        image = cv2.resize(image, (width, int(h * width / w)))
    else:
        image = cv2.resize(image, (int(w * height / h), height))

    padW = (width - image.shape[1]) // 2
    padH = (height - image.shape[0]) // 2

    image = cv2.copyMakeBorder(
        image, padH, padH, padW, padW,
        cv2.BORDER_CONSTANT, value=255
    )

    return cv2.resize(image, (width, height))


def group_every(iterable, n):
    for i in range(0, len(iterable), n):
        yield iterable[i:i + n]


def make_feature(image):
    image = resize_to_fit(image, 20, 20)
    image = image[..., None]
    return image


def make_feature_label(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    feature = make_feature(image)
    label = image_path.split(os.path.sep)[-2]
    return feature, label


# =========================
# PyTorch CNN Model
# =========================

class CaptchaCNN(nn.Module):
    def __init__(self, n_classes):
        super().__init__()

        self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=2)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=2)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(50 * 5 * 5, 500)
        self.fc2 = nn.Linear(500, n_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)

        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# =========================
# Load Character Dataset
# =========================

CHAR_IMAGE_FOLDER = "./char-images-31528476"
LABELS_PATH = "./labels.pkl"

image_paths = list(paths.list_images(CHAR_IMAGE_FOLDER))
features, labels = zip(*(make_feature_label(p) for p in image_paths))

X = np.array(features, dtype="float32") / 255.0
y = np.array(labels)

lb = LabelBinarizer()
y_onehot = lb.fit_transform(y)
n_classes = len(lb.classes_)

with open(LABELS_PATH, "wb") as f:
    pickle.dump(lb, f)

X_train, X_val, y_train, y_val = train_test_split(
    X, y_onehot, test_size=0.25, random_state=955996
)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train).permute(0, 3, 1, 2)
X_val   = torch.tensor(X_val).permute(0, 3, 1, 2)

y_train = torch.tensor(np.argmax(y_train, axis=1), dtype=torch.long)
y_val   = torch.tensor(np.argmax(y_val, axis=1), dtype=torch.long)

train_loader = DataLoader(
    TensorDataset(X_train, y_train),
    batch_size=32,
    shuffle=True
)

val_loader = DataLoader(
    TensorDataset(X_val, y_val),
    batch_size=32
)

# =========================
# Training
# =========================

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CaptchaCNN(n_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

EPOCHS = 10

for epoch in range(EPOCHS):
    model.train()
    train_correct = 0
    total = 0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        outputs = model(xb)
        loss = criterion(outputs, yb)
        loss.backward()
        optimizer.step()

        preds = outputs.argmax(dim=1)
        train_correct += (preds == yb).sum().item()
        total += yb.size(0)

    train_acc = train_correct / total

    model.eval()
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            outputs = model(xb)
            preds = outputs.argmax(dim=1)
            val_correct += (preds == yb).sum().item()
            val_total += yb.size(0)

    val_acc = val_correct / val_total

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] "
        f"Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}"
    )

# =========================
# Save Model
# =========================

MODEL_PATH = "captcha_model_pytorch.pth"
torch.save(model.state_dict(), MODEL_PATH)

print("Model saved to", MODEL_PATH)


Epoch [1/10] Train Acc: 0.4499 | Val Acc: 0.8436
Epoch [2/10] Train Acc: 0.9464 | Val Acc: 0.9542
Epoch [3/10] Train Acc: 0.9784 | Val Acc: 0.9553
Epoch [4/10] Train Acc: 0.9896 | Val Acc: 0.9743
Epoch [5/10] Train Acc: 0.9963 | Val Acc: 0.9765
Epoch [6/10] Train Acc: 0.9944 | Val Acc: 0.9765
Epoch [7/10] Train Acc: 0.9985 | Val Acc: 0.9821
Epoch [8/10] Train Acc: 1.0000 | Val Acc: 0.9821
Epoch [9/10] Train Acc: 1.0000 | Val Acc: 0.9821
Epoch [10/10] Train Acc: 1.0000 | Val Acc: 0.9821
Model saved to captcha_model_pytorch.pth
