In [22]:
import os
from PIL import Image
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Set up the data directory
data_dir = 'Register Numbers/'

# Load image paths and labels
image_pairs = [
    (os.path.join(data_dir, f), f.split('.')[0]) 
    for f in os.listdir(data_dir) 
    if f.endswith('.png')
]

# Validate labels
def validate_label(label):
    if len(label) != 12 or not label.isdigit():
        raise ValueError(f"Invalid register number: {label}")
    return label

cleaned_pairs = [(path, validate_label(label[:12])) for path, label in image_pairs]

# Split the data
random.shuffle(cleaned_pairs)
total = len(cleaned_pairs)
train = cleaned_pairs[:int(0.8 * total)]
val = cleaned_pairs[int(0.8 * total):int(0.9 * total)]
test = cleaned_pairs[int(0.9 * total):]
print(f"Train: {len(train)}, Val: {len(val)}, Test: {len(test)}")

Train: 4276, Val: 535, Test: 535


In [23]:
class RegisterNumberDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('L')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        label_seq = [int(digit) + 1 for digit in label]  # 0->1, 1->2, ..., 9->10
        return image, torch.tensor(label_seq, dtype=torch.long)

# Custom transform for random brightness
class RandomBrightness(object):
    def __init__(self, delta=0.3):
        self.delta = delta

    def __call__(self, img):
        brightness = random.uniform(-self.delta, self.delta)
        img = torch.tensor(np.array(img)).float() / 255.0  # Convert to tensor [0,1]
        img = img + brightness
        img = torch.clamp(img, 0, 1)  # Keep values between 0 and 1
        return Image.fromarray((img.numpy() * 255).astype(np.uint8))

# Training transform with augmentation
train_transform = transforms.Compose([
    transforms.RandomRotation(15),  # Rotate between -15 and 15 degrees
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.8, 1.2)),  # Shift and scale
    RandomBrightness(0.3),  # Random brightness adjustment
    transforms.GaussianBlur(kernel_size=3),  # Slight blur
    transforms.Resize((32, 256)),  # Ensure consistent size
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
    transforms.Lambda(lambda x: x + 0.05 * torch.randn_like(x)),  # Add noise
    transforms.Lambda(lambda x: torch.clamp(x, -1, 1)),  # Clamp after noise
])

# Validation/test transform (no augmentation)
val_test_transform = transforms.Compose([
    transforms.Resize((32, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Create datasets
train_dataset = RegisterNumberDataset([p for p, _ in train], [l for _, l in train], transform=train_transform)
val_dataset = RegisterNumberDataset([p for p, _ in val], [l for _, l in val], transform=val_test_transform)
test_dataset = RegisterNumberDataset([p for p, _ in test], [l for _, l in test], transform=val_test_transform)

# Data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [24]:
import torch
import torch.nn as nn

class CRNN(nn.Module):
    def __init__(self, num_classes):
        super(CRNN, self).__init__()
        # CNN component with dropout
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # (N, 64, 16, W/2)
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # (N, 128, 8, W/4)
            nn.Dropout2d(0.3),  # Dropout after second maxpool
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d((2,1), (2,1)),  # (N, 256, 4, W/4)
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d((2,1), (2,1)),  # (N, 512, 2, W/4)
            nn.Dropout2d(0.3),  # Dropout after fourth maxpool
            nn.Conv2d(512, 512, kernel_size=(2,1)),  # (N, 512, 1, W/4)
            nn.BatchNorm2d(512),
            nn.ReLU(),
        )
        # LSTM with dropout between layers
        self.rnn = nn.LSTM(512, 256, num_layers=2, bidirectional=True, dropout=0.3)
        # Dropout before the fully connected layer
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(512, num_classes)  # 512 because bidirectional (256 * 2)

    def forward(self, x):
        # Pass through CNN
        x = self.cnn(x)  # (N, 512, 1, W/4)
        x = x.squeeze(2)  # (N, 512, W/4)
        x = x.permute(2, 0, 1)  # (W/4, N, 512) for LSTM
        # Pass through LSTM
        x, _ = self.rnn(x)  # (W/4, N, 512)
        # Apply dropout before FC
        x = self.dropout(x)
        # Fully connected layer for classification
        x = self.fc(x)  # (W/4, N, num_classes)
        return x

# Initialize the model (assuming 11 classes: blank + digits 0-9)
model = CRNN(num_classes=11)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

CRNN(
  (cnn): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Dropout2d(p=0.3, inplace=False)
    (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): ReLU()
    (15): MaxPool2d(kernel_size=

In [25]:
import torch
from torch.utils.data import DataLoader

# Define loss function and optimizer
criterion = nn.CTCLoss(blank=0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Lists to store metrics
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

# Decoding function (used in both training and evaluation)
def decode_predictions(outputs):
    outputs = outputs.softmax(2).argmax(2)  # (T, N)
    batch_size = outputs.size(1)
    decoded = []
    for b in range(batch_size):
        seq = outputs[:, b].cpu().numpy()
        prev = -1
        result = []
        for s in seq:
            if s != 0 and s != prev:
                result.append(s - 1)
            prev = s
        decoded.append(''.join(map(str, result)))
    return decoded

# Training loop with validation
num_epochs = 120
for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0
    train_correct = 0
    train_total = 0
    for images, targets in train_loader:
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)  # (W/4, N, num_classes)
        outputs_log = outputs.log_softmax(2)
        batch_size = images.size(0)
        input_lengths = torch.full((batch_size,), outputs.size(0), dtype=torch.long, device=device)
        target_lengths = torch.full((batch_size,), 12, dtype=torch.long, device=device)
        targets_flat = targets.view(-1)
        loss = criterion(outputs_log, targets_flat, input_lengths, target_lengths)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        # Compute training accuracy
        predictions = decode_predictions(outputs)
        target_labels = [''.join(str(d - 1) for d in t.tolist()) for t in targets]
        for pred, target in zip(predictions, target_labels):
            if pred == target:
                train_correct += 1
            train_total += 1

    train_loss = train_loss / len(train_loader)
    train_accuracy = train_correct / train_total * 100
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)

    # Validation phase
    model.eval()
    val_loss = 0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, targets in val_loader:
            images, targets = images.to(device), targets.to(device)
            outputs = model(images)
            outputs_log = outputs.log_softmax(2)
            batch_size = images.size(0)
            input_lengths = torch.full((batch_size,), outputs.size(0), dtype=torch.long, device=device)
            target_lengths = torch.full((batch_size,), 12, dtype=torch.long, device=device)
            targets_flat = targets.view(-1)
            loss = criterion(outputs_log, targets_flat, input_lengths, target_lengths)
            val_loss += loss.item()

            # Compute validation accuracy
            predictions = decode_predictions(outputs)
            target_labels = [''.join(str(d - 1) for d in t.tolist()) for t in targets]
            for pred, target in zip(predictions, target_labels):
                if pred == target:
                    val_correct += 1
                val_total += 1

    val_loss = val_loss / len(val_loader)
    val_accuracy = val_correct / val_total * 100
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)

    # Print metrics
    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, "
          f"Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

Epoch 1/120, Train Loss: 1.5511, Val Loss: 1.2096, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 2/120, Train Loss: 1.0808, Val Loss: 1.0542, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 3/120, Train Loss: 1.0141, Val Loss: 1.0166, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 4/120, Train Loss: 1.0013, Val Loss: 1.0188, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 5/120, Train Loss: 0.9872, Val Loss: 0.9689, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 6/120, Train Loss: 0.9493, Val Loss: 0.9418, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 7/120, Train Loss: 0.9322, Val Loss: 0.9417, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 8/120, Train Loss: 0.9240, Val Loss: 0.9376, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 9/120, Train Loss: 0.9235, Val Loss: 0.9365, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 10/120, Train Loss: 0.9154, Val Loss: 0.9248, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 11/120, Train Loss: 0.9238, Val Loss: 0.9363, Train Acc: 0.00%, Val Acc: 0.00%
Epoch 12/120, Train Loss: 0.9142, Val Loss: 0.9150, Train Acc: 0.00%, Val 

Epoch 97/120, Train Loss: 0.1671, Val Loss: 0.0815, Train Acc: 50.80%, Val Acc: 83.93%
Epoch 98/120, Train Loss: 0.1655, Val Loss: 0.0645, Train Acc: 51.08%, Val Acc: 87.10%
Epoch 99/120, Train Loss: 0.1783, Val Loss: 0.0757, Train Acc: 50.14%, Val Acc: 84.67%
Epoch 100/120, Train Loss: 0.1691, Val Loss: 0.0582, Train Acc: 51.40%, Val Acc: 87.10%
Epoch 101/120, Train Loss: 0.1578, Val Loss: 0.0581, Train Acc: 53.93%, Val Acc: 88.97%
Epoch 102/120, Train Loss: 0.1557, Val Loss: 0.0540, Train Acc: 54.30%, Val Acc: 89.53%
Epoch 103/120, Train Loss: 0.1484, Val Loss: 0.0561, Train Acc: 56.15%, Val Acc: 88.04%
Epoch 104/120, Train Loss: 0.1554, Val Loss: 0.0581, Train Acc: 53.79%, Val Acc: 89.16%
Epoch 105/120, Train Loss: 0.1498, Val Loss: 0.0584, Train Acc: 55.26%, Val Acc: 90.84%
Epoch 106/120, Train Loss: 0.1481, Val Loss: 0.0616, Train Acc: 57.20%, Val Acc: 88.04%
Epoch 107/120, Train Loss: 0.1517, Val Loss: 0.0588, Train Acc: 56.17%, Val Acc: 90.09%
Epoch 108/120, Train Loss: 0.1533, 

In [26]:
def decode_predictions(outputs):
    outputs = outputs.softmax(2).argmax(2)  # (T, N) -> e.g., (64, N)
    batch_size = outputs.size(1)
    decoded = []
    for b in range(batch_size):
        seq = outputs[:, b].cpu().numpy()  # (T,)
        prev = -1
        result = []
        for s in seq:
            if s != 0 and s != prev:
                result.append(s - 1)
            prev = s
        decoded.append(''.join(map(str, result)))
    return decoded

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, targets in test_loader:
        images = images.to(device)
        outputs = model(images)  # (T, N, num_classes)
        predictions = decode_predictions(outputs)
        target_labels = [''.join(str(d - 1) for d in t.tolist()) for t in targets]
        for pred, target in zip(predictions, target_labels):
            if pred == target:
                correct += 1
            total += 1
print(f"Test Accuracy: {correct/total*100:.2f}%")

Test Accuracy: 88.41%


In [None]:
#Test Accuracy: 92.15% for 70 epochs

In [27]:
def predict_register_number(model, image_path, device, transform):
    image = Image.open(image_path).convert('L')
    image = transform(image)
    image = image.unsqueeze(0)  # (1, 1, 32, 256)
    model.eval()
    with torch.no_grad():
        image = image.to(device)
        output = model(image)  # (T, 1, num_classes), e.g., (64, 1, 11)
        output = output.squeeze(1)  # (T, num_classes)
        output = output.softmax(1).argmax(1)  # (T,)
        seq = output.cpu().numpy()
        prev = -1
        result = []
        for s in seq:
            if s != 0 and s != prev:
                result.append(s - 1)
            prev = s
    return ''.join(map(str, result))

# Test with your image
image_path = 'my_reg.png'  # Replace with your image path
predicted_number = predict_register_number(model, image_path, device, val_test_transform)
print(f"Predicted Register Number: {predicted_number}")

Predicted Register Number: 212223240005


In [28]:
# Predicted Register Number: 212223240068 for 92.15% accurate model
# for my_reg.png