In [1]:
import os
from PIL import Image
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Set up the data directory
data_dir = 'subject codes/'

# Load image paths and labels
image_pairs = [
    (os.path.join(data_dir, f), f[:7].upper()) 
    for f in os.listdir(data_dir) 
    if f.lower().endswith(('.png', '.jpg', '.jpeg')) and len(f[:7]) == 7 and all(c.isalnum() for c in f[:7])
]

# Validate labels
def validate_label(label):
    if len(label) != 7 or not all(c.isalnum() for c in label):
        raise ValueError(f"Invalid subject code: {label}")
    return label

cleaned_pairs = [(path, validate_label(label)) for path, label in image_pairs]

In [2]:
cleaned_pairs

[('subject codes/19AI303.jpg', '19AI303'),
 ('subject codes/19AI303 (2).jpg', '19AI303'),
 ('subject codes/19AI303 (3).jpg', '19AI303'),
 ('subject codes/19AI303 (4).jpg', '19AI303'),
 ('subject codes/19AI303 (5).jpg', '19AI303'),
 ('subject codes/19AI303 (6).jpg', '19AI303'),
 ('subject codes/19AI303 (7).jpg', '19AI303'),
 ('subject codes/19AI303 (8).jpg', '19AI303'),
 ('subject codes/19AI303 (9).jpg', '19AI303'),
 ('subject codes/19AI303 (10).jpg', '19AI303'),
 ('subject codes/19AI401.jpg', '19AI401'),
 ('subject codes/19AI403.jpg', '19AI403'),
 ('subject codes/19AI403 (2).jpg', '19AI403'),
 ('subject codes/19AI403 (3).jpg', '19AI403'),
 ('subject codes/19AI403 (4).jpg', '19AI403'),
 ('subject codes/19AI403 (5).jpg', '19AI403'),
 ('subject codes/19AI403 (6).jpg', '19AI403'),
 ('subject codes/19AI403 (7).jpg', '19AI403'),
 ('subject codes/19AI403 (8).jpg', '19AI403'),
 ('subject codes/19AI403 (9).jpg', '19AI403'),
 ('subject codes/19AI403 (10).jpg', '19AI403'),
 ('subject codes/19AI40

In [3]:
# Split the data
random.shuffle(cleaned_pairs)
total = len(cleaned_pairs)
train = cleaned_pairs[:int(0.8 * total)]
val = cleaned_pairs[int(0.8 * total):int(0.9 * total)]
test = cleaned_pairs[int(0.9 * total):]
print(f"Train: {len(train)}, Val: {len(val)}, Test: {len(test)}")

class SubjectCodeDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('L')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        label_seq = [int(c) + 1 if c.isdigit() else ord(c) - ord('A') + 11 for c in label]
        return image, torch.tensor(label_seq, dtype=torch.long)


Train: 886, Val: 111, Test: 111


In [4]:
# Custom transform for random brightness
class RandomBrightness(object):
    def __init__(self, delta=0.3):
        self.delta = delta

    def __call__(self, img):
        brightness = random.uniform(-self.delta, self.delta)
        img = torch.tensor(np.array(img)).float() / 255.0  # Convert to tensor [0,1]
        img = img + brightness
        img = torch.clamp(img, 0, 1)  # Keep values between 0 and 1
        return Image.fromarray((img.numpy() * 255).astype(np.uint8))

# Training transform with augmentation
train_transform = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.8, 1.2)),
    RandomBrightness(0.3),
    transforms.GaussianBlur(kernel_size=3),
    transforms.Resize((32, 128)),  # Adjusted for smaller subject code images
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
    transforms.Lambda(lambda x: x + 0.05 * torch.randn_like(x)),
    transforms.Lambda(lambda x: torch.clamp(x, -1, 1)),
])

# Validation/test transform (no augmentation)
val_test_transform = transforms.Compose([
    transforms.Resize((32, 128)),  # Adjusted for smaller subject code images
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Create datasets
train_dataset = SubjectCodeDataset([p for p, _ in train], [l for _, l in train], transform=train_transform)
val_dataset = SubjectCodeDataset([p for p, _ in val], [l for _, l in val], transform=val_test_transform)
test_dataset = SubjectCodeDataset([p for p, _ in test], [l for _, l in test], transform=val_test_transform)

# Data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [5]:
# Define the CRNN model
class CRNN(nn.Module):
    def __init__(self, num_classes):
        super(CRNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # (N, 64, 16, W/2)
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # (N, 128, 8, W/4)
            nn.Dropout2d(0.3),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d((2,1), (2,1)),  # (N, 256, 4, W/4)
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d((2,1), (2,1)),  # (N, 512, 2, W/4)
            nn.Dropout2d(0.3),
            nn.Conv2d(512, 512, kernel_size=(2,1)),  # (N, 512, 1, W/4)
            nn.BatchNorm2d(512),
            nn.ReLU(),
        )
        self.rnn = nn.LSTM(512, 256, num_layers=2, bidirectional=True, dropout=0.3)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(512, num_classes)  # 512 because bidirectional (256 * 2)

    def forward(self, x):
        x = self.cnn(x)  # (N, 512, 1, W/4)
        x = x.squeeze(2)  # (N, 512, W/4)
        x = x.permute(2, 0, 1)  # (W/4, N, 512) for LSTM
        x, _ = self.rnn(x)  # (W/4, N, 512)
        x = self.dropout(x)
        x = self.fc(x)  # (W/4, N, num_classes)
        return x

# Initialize the model for 37 classes (blank + 0-9 + A-Z)
model = CRNN(num_classes=37)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = nn.CTCLoss(blank=0)
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [6]:
model

CRNN(
  (cnn): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Dropout2d(p=0.3, inplace=False)
    (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): ReLU()
    (15): MaxPool2d(kernel_size=

In [7]:
# Character mapping for decoding
char_map = {i: str(i-1) for i in range(1, 11)}  # 1-10 -> '0'-'9'
char_map.update({i: chr(i - 11 + ord('A')) for i in range(11, 37)})  # 11-36 -> 'A'-'Z'

# Decoding function
def decode_predictions(outputs):
    outputs = outputs.softmax(2).argmax(2)  # (T, N)
    batch_size = outputs.size(1)
    decoded = []
    for b in range(batch_size):
        seq = outputs[:, b].cpu().numpy()
        prev = 0
        result = []
        for s in seq:
            if s != 0 and s != prev:
                result.append(char_map.get(s, ''))
            prev = s
        decoded.append(''.join(result))
    return decoded

In [9]:
# Training loop with validation
num_epochs = 200
for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0
    train_correct = 0
    train_total = 0
    for images, targets in train_loader:
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)  # (W/4, N, num_classes)
        outputs_log = outputs.log_softmax(2)
        batch_size = images.size(0)
        input_lengths = torch.full((batch_size,), outputs.size(0), dtype=torch.long, device=device)
        target_lengths = torch.full((batch_size,), 7, dtype=torch.long, device=device)
        targets_flat = targets.view(-1)
        loss = criterion(outputs_log, targets_flat, input_lengths, target_lengths)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        # Compute training accuracy
        predictions = decode_predictions(outputs)
        target_labels = [''.join([char_map[d.item()] for d in t]) for t in targets]
        for pred, target in zip(predictions, target_labels):
            if pred == target:
                train_correct += 1
            train_total += 1

    train_loss = train_loss / len(train_loader)
    train_accuracy = train_correct / train_total * 100

    # Validation phase
    model.eval()
    val_loss = 0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, targets in val_loader:
            images, targets = images.to(device), targets.to(device)
            outputs = model(images)
            outputs_log = outputs.log_softmax(2)
            batch_size = images.size(0)
            input_lengths = torch.full((batch_size,), outputs.size(0), dtype=torch.long, device=device)
            target_lengths = torch.full((batch_size,), 7, dtype=torch.long, device=device)
            targets_flat = targets.view(-1)
            loss = criterion(outputs_log, targets_flat, input_lengths, target_lengths)
            val_loss += loss.item()

            # Compute validation accuracy
            predictions = decode_predictions(outputs)
            target_labels = [''.join([char_map[d.item()] for d in t]) for t in targets]
            for pred, target in zip(predictions, target_labels):
                if pred == target:
                    val_correct += 1
                val_total += 1

    val_loss = val_loss / len(val_loader)
    val_accuracy = val_correct / val_total * 100

    # Print metrics
    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, "
          f"Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

Epoch 1/200, Train Loss: 1.0675, Val Loss: 1.0457, Train Acc: 1.58%, Val Acc: 3.60%
Epoch 2/200, Train Loss: 1.0665, Val Loss: 1.0450, Train Acc: 1.92%, Val Acc: 4.50%
Epoch 3/200, Train Loss: 1.0647, Val Loss: 1.0473, Train Acc: 1.58%, Val Acc: 0.00%
Epoch 4/200, Train Loss: 1.0666, Val Loss: 1.0468, Train Acc: 1.13%, Val Acc: 3.60%
Epoch 5/200, Train Loss: 1.0645, Val Loss: 1.0430, Train Acc: 1.58%, Val Acc: 4.50%
Epoch 6/200, Train Loss: 1.0680, Val Loss: 1.0462, Train Acc: 1.35%, Val Acc: 0.00%
Epoch 7/200, Train Loss: 1.0642, Val Loss: 1.0509, Train Acc: 1.02%, Val Acc: 0.00%
Epoch 8/200, Train Loss: 1.0650, Val Loss: 1.0495, Train Acc: 1.69%, Val Acc: 0.00%
Epoch 9/200, Train Loss: 1.0639, Val Loss: 1.0436, Train Acc: 2.03%, Val Acc: 0.00%
Epoch 10/200, Train Loss: 1.0615, Val Loss: 1.0465, Train Acc: 1.24%, Val Acc: 0.00%
Epoch 11/200, Train Loss: 1.0646, Val Loss: 1.0550, Train Acc: 1.69%, Val Acc: 2.70%
Epoch 12/200, Train Loss: 1.0667, Val Loss: 1.0441, Train Acc: 0.56%, Val 

Epoch 98/200, Train Loss: 0.6821, Val Loss: 0.5135, Train Acc: 18.85%, Val Acc: 27.93%
Epoch 99/200, Train Loss: 0.6789, Val Loss: 0.5284, Train Acc: 20.54%, Val Acc: 29.73%
Epoch 100/200, Train Loss: 0.6628, Val Loss: 0.4721, Train Acc: 22.46%, Val Acc: 32.43%
Epoch 101/200, Train Loss: 0.6198, Val Loss: 0.4567, Train Acc: 24.15%, Val Acc: 28.83%
Epoch 102/200, Train Loss: 0.6389, Val Loss: 0.4554, Train Acc: 20.99%, Val Acc: 30.63%
Epoch 103/200, Train Loss: 0.6300, Val Loss: 0.4347, Train Acc: 21.90%, Val Acc: 31.53%
Epoch 104/200, Train Loss: 0.6377, Val Loss: 0.4318, Train Acc: 21.11%, Val Acc: 35.14%
Epoch 105/200, Train Loss: 0.6028, Val Loss: 0.4184, Train Acc: 23.81%, Val Acc: 34.23%
Epoch 106/200, Train Loss: 0.6028, Val Loss: 0.4473, Train Acc: 25.85%, Val Acc: 32.43%
Epoch 107/200, Train Loss: 0.5983, Val Loss: 0.3852, Train Acc: 24.60%, Val Acc: 33.33%
Epoch 108/200, Train Loss: 0.6156, Val Loss: 0.3961, Train Acc: 24.27%, Val Acc: 31.53%
Epoch 109/200, Train Loss: 0.5881,

Epoch 192/200, Train Loss: 0.2596, Val Loss: 0.1679, Train Acc: 54.51%, Val Acc: 71.17%
Epoch 193/200, Train Loss: 0.2575, Val Loss: 0.1765, Train Acc: 56.77%, Val Acc: 69.37%
Epoch 194/200, Train Loss: 0.2755, Val Loss: 0.1990, Train Acc: 55.08%, Val Acc: 70.27%
Epoch 195/200, Train Loss: 0.2634, Val Loss: 0.1606, Train Acc: 57.56%, Val Acc: 71.17%
Epoch 196/200, Train Loss: 0.2622, Val Loss: 0.1542, Train Acc: 53.95%, Val Acc: 72.07%
Epoch 197/200, Train Loss: 0.2619, Val Loss: 0.1662, Train Acc: 55.76%, Val Acc: 71.17%
Epoch 198/200, Train Loss: 0.2579, Val Loss: 0.1520, Train Acc: 58.24%, Val Acc: 74.77%
Epoch 199/200, Train Loss: 0.2409, Val Loss: 0.1601, Train Acc: 57.45%, Val Acc: 72.97%
Epoch 200/200, Train Loss: 0.2541, Val Loss: 0.1606, Train Acc: 56.66%, Val Acc: 70.27%


In [12]:
import torch
from PIL import Image
from torchvision import transforms

# Define character mapping for subject codes
char_map = {i: str(i-1) for i in range(1, 11)}  # 1-10 -> '0'-'9'
char_map.update({i: chr(i - 11 + ord('A')) for i in range(11, 37)})  # 11-36 -> 'A'-'Z'

# Assuming val_test_transform is defined as in training (update if different)
val_test_transform = transforms.Compose([
    transforms.Resize((32, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

def decode_predictions(outputs):
    outputs = outputs.softmax(2).argmax(2)  # (T, N) -> e.g., (32, N)
    batch_size = outputs.size(1)
    decoded = []
    for b in range(batch_size):
        seq = outputs[:, b].cpu().numpy()  # (T,)
        prev = -1
        result = []
        for s in seq:
            if s != 0 and s != prev:
                char = char_map.get(s, '')
                if char:
                    result.append(char)
            prev = s
        decoded.append(''.join(result))
    return decoded

# Test accuracy calculation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, targets in test_loader:
        images = images.to(device)
        outputs = model(images)  # (T, N, 37)
        predictions = decode_predictions(outputs)
        target_labels = [''.join([char_map[d.item()] for d in t]) for t in targets]
        for pred, target in zip(predictions, target_labels):
            if pred == target:
                correct += 1
            total += 1
print(f"Test Accuracy: {correct/total*100:.2f}%")

def predict_subject_code(model, image_path, device, transform):
    image = Image.open(image_path).convert('L')
    image = transform(image)
    image = image.unsqueeze(0)  # (1, 1, 32, 128)
    model.eval()
    with torch.no_grad():
        image = image.to(device)
        output = model(image)  # (T, 1, 37), e.g., (32, 1, 37)
        output = output.squeeze(1)  # (T, 37)
        output = output.softmax(1).argmax(1)  # (T,)
        seq = output.cpu().numpy()
        prev = -1
        result = []
        for s in seq:
            if s != 0 and s != prev:
                char = char_map.get(s, '')
                if char:
                    result.append(char)
            prev = s
    return ''.join(result)

# Test with your image
image_path = 'subject_code_2.jpg'  # Replace with your image path
predicted_code = predict_subject_code(model, image_path, device, val_test_transform)
print(f"Predicted Subject Code: {predicted_code}")

Test Accuracy: 78.38%
Predicted Subject Code: 19CS404
