In [12]:
import torch
import torch.nn as nn
from PIL import Image
from torchvision import transforms
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Define the CRNN class (must match the training code exactly)
class CRNN(nn.Module):
    def __init__(self, num_classes):
        super(CRNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # (N, 64, 16, W/2)
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # (N, 128, 8, W/4)
            nn.Dropout2d(0.3),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d((2,1), (2,1)),  # (N, 256, 4, W/4)
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d((2,1), (2,1)),  # (N, 512, 2, W/4)
            nn.Dropout2d(0.3),
            nn.Conv2d(512, 512, kernel_size=(2,1)),  # (N, 512, 1, W/4)
            nn.BatchNorm2d(512),
            nn.ReLU(),
        )
        self.rnn = nn.LSTM(512, 256, num_layers=2, bidirectional=True, dropout=0.3)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(512, num_classes)  # 512 because bidirectional (256 * 2)

    def forward(self, x):
        x = self.cnn(x)  # (N, 512, 1, W/4)
        x = x.squeeze(2)  # (N, 512, W/4)
        x = x.permute(2, 0, 1)  # (W/4, N, 512) for LSTM
        x, _ = self.rnn(x)  # (W/4, N, 512)
        x = self.dropout(x)
        x = self.fc(x)  # (W/4, N, num_classes)
        return x

# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model with num_classes=37 (blank + 0-9 + A-Z)
model = CRNN(num_classes=37)
model.to(device)

# Load the saved model (assuming it contains only the state_dict)
best_model_path = 'best_subject_model_final.pth'
checkpoint = torch.load(best_model_path, map_location=device)
model.load_state_dict(checkpoint)  # Load directly as state_dict
model.eval()

# Define the transform (matches val_test_transform from training)
val_test_transform = transforms.Compose([
    transforms.Resize((32, 128)),  # Matches subject code training
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Define the character mapping (matches training code)
char_map = {i: str(i-1) for i in range(1, 11)}  # 1-10 -> '0'-'9'
char_map.update({i: chr(i - 11 + ord('A')) for i in range(11, 37)})  # 11-36 -> 'A'-'Z'

# Prediction function for subject code
def predict_subject_code(model, image_path, device, transform, char_map):
    image = Image.open(image_path).convert('L')
    image = transform(image)
    image = image.unsqueeze(0)  # (1, 1, 32, 128)
    with torch.no_grad():
        image = image.to(device)
        output = model(image)  # (W/4, 1, 37)
        output = output.squeeze(1)  # (W/4, 37)
        output = output.softmax(1).argmax(1)  # (W/4,)
        seq = output.cpu().numpy()
        prev = 0  # Initialize with blank character index
        result = []
        for s in seq:
            if s != 0 and s != prev:  # Skip blanks and repeats
                result.append(char_map.get(s, ''))
            prev = s
    return ''.join(result)

# Test with an image
image_path = '19AI44.jpg'  # Replace with your image path
predicted_code = predict_subject_code(model, image_path, device, val_test_transform, char_map)
print(f"Predicted Subject Code: {predicted_code}")

Predicted Subject Code: 19AI44


In [None]:
########  IMPROVISED MODEL #######

In [11]:
import torch
import torch.nn as nn
from PIL import Image
from torchvision import transforms
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
from typing import Mapping

# Define the CRNN class (must match the training code exactly)
# This is the updated class provided by the user
class CRNN(nn.Module):
    def __init__(self, img_h, nc, nclass, nh, n_rnn=2, leaky_relu=False, rnn_type='LSTM'):
        """
        img_h: Input image height (e.g., 32)
        nc: Number of input channels (1 for grayscale)
        nclass: Number of output classes (e.g., 37: 26 letters + 10 digits + 1 blank)
        nh: Size of the RNN hidden state
        n_rnn: Number of RNN layers
        leaky_relu: Whether to use LeakyReLU instead of ReLU
        rnn_type: 'LSTM' or 'GRU'
        """
        super(CRNN, self).__init__()
        assert img_h % 16 == 0, 'img_h has to be a multiple of 16'
        self.num_classes = nclass

        # CNN Backbone
        ks = [3, 3, 3, 3, 3, 3, 2]
        ps = [1, 1, 1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1, 1, 1]
        nm = [64, 128, 256, 256, 512, 512, 512]

        cnn = nn.Sequential()
        def conv_relu_bn(i, leaky=False):
            n_in = nc if i == 0 else nm[i - 1]
            n_out = nm[i]
            cnn.add_module(f'conv{i}', nn.Conv2d(n_in, n_out, ks[i], ss[i], ps[i]))
            cnn.add_module(f'batchnorm{i}', nn.BatchNorm2d(n_out))
            activation = nn.LeakyReLU(0.2, inplace=True) if leaky else nn.ReLU(True)
            cnn.add_module(f'relu{i}', activation)

        # CNN layers with pooling
        conv_relu_bn(0, leaky=leaky_relu); cnn.add_module('pooling0', nn.MaxPool2d(2, 2)) # H=16
        conv_relu_bn(1, leaky=leaky_relu); cnn.add_module('pooling1', nn.MaxPool2d(2, 2)) # H=8
        conv_relu_bn(2, leaky=leaky_relu); conv_relu_bn(3, leaky=leaky_relu)
        cnn.add_module('pooling2', nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # H=4
        conv_relu_bn(4, leaky=leaky_relu); conv_relu_bn(5, leaky=leaky_relu)
        cnn.add_module('pooling3', nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # H=2
        conv_relu_bn(6, leaky=leaky_relu) # H=1

        self.cnn = cnn
        rnn_input_size = nm[-1] # 512

        # RNN Layers
        rnn_dropout = 0.3 if n_rnn > 1 else 0
        if rnn_type == 'LSTM':
            self.rnn = nn.LSTM(rnn_input_size, nh, num_layers=n_rnn,
                               bidirectional=True, dropout=rnn_dropout,
                               batch_first=False)
        elif rnn_type == 'GRU':
             self.rnn = nn.GRU(rnn_input_size, nh, num_layers=n_rnn,
                               bidirectional=True, dropout=rnn_dropout,
                               batch_first=False)
        else:
            raise ValueError(f"Unsupported RNN type: {rnn_type}")

        rnn_output_size = nh * 2 # Because bidirectional

        # Classifier
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(rnn_output_size, nclass)

    def forward(self, x):
        # CNN features
        conv = self.cnn(x) # (N, C, H, W) -> (N, 512, 1, W/4)
        n, c, h, w = conv.size(); assert h == 1
        conv = conv.squeeze(2).permute(2, 0, 1) # (W/4, N, 512)
        # RNN processing
        rnn_output, _ = self.rnn(conv) # (SeqLen, N, nh*2)
        # Apply dropout
        rnn_output = self.dropout(rnn_output)
        # Final classification layer
        output = self.fc(rnn_output) # (SeqLen, N, nclass)
        return output


# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the saved model (assuming it contains the entire model instance)
best_model_path = 'best_full_subject_code_crnn_model.pth'

# Check if the loaded object is a state_dict or the full model
checkpoint = torch.load(best_model_path, map_location=device)

if isinstance(checkpoint, Mapping):
    # If it's a state_dict, initialize the model first and then load the state_dict
    # Initialize the model with parameters matching training (assuming img_h=32, nc=1, nclass=37, nh=256, n_rnn=2, leaky_relu=False, rnn_type='LSTM')
    # You need to adjust these parameters if your training used different values
    model = CRNN(img_h=32, nc=1, nclass=37, nh=256, n_rnn=2, leaky_relu=False, rnn_type='LSTM')
    model.load_state_dict(checkpoint)
    print("Loaded model state_dict.")
elif isinstance(checkpoint, nn.Module):
    # If it's the full model instance, load it directly
    model = checkpoint
    print("Loaded full model instance.")
else:
    raise TypeError(f"Expected model checkpoint to be a state_dict or a Module, but got {type(checkpoint)}")


model.to(device)
model.eval()

# Define the transform (matches val_test_transform from training, assuming size is 32x128)
val_test_transform = transforms.Compose([
    transforms.Resize((32, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Define the character mapping (matches training code, assuming 0 is blank)
# 0: blank
# 1-10: '0'-'9'
# 11-36: 'A'-'Z'
char_map = {i: str(i-1) for i in range(1, 11)}  # 1-10 -> '0'-'9'
char_map.update({i: chr(i - 11 + ord('A')) for i in range(11, 37)})  # 11-36 -> 'A'-'Z'
# Adding the blank character mapping explicitly for completeness, though typically skipped
char_map[0] = '' # Represents the blank character

# Prediction function for subject code
def predict_subject_code(model, image_path, device, transform, char_map):
    image = Image.open(image_path).convert('L')
    image = transform(image)
    image = image.unsqueeze(0)  # Add batch dimension (1, 1, H, W)
    with torch.no_grad():
        image = image.to(device)
        output = model(image)  # (SeqLen, 1, num_classes)
        output = output.squeeze(1)  # (SeqLen, num_classes)
        # Get the index with the highest probability for each timestep
        output = output.softmax(1).argmax(1)  # (SeqLen,)
        seq = output.cpu().numpy()

        # Decode the sequence using CTC-like decoding (remove duplicates and blanks)
        result = []
        prev = 0  # Initialize with blank character index (assuming 0 is blank)
        for s in seq:
            if s != 0 and s != prev:  # Skip blanks (0) and consecutive duplicates
                 # Ensure s is in char_map before trying to access it
                 if s in char_map:
                    result.append(char_map[s])
            prev = s # Update previous character

    return ''.join(result)

# Test with an image
image_path = '19AI44.jpg'  # Replace with your image path
try:
    predicted_code = predict_subject_code(model, image_path, device, val_test_transform, char_map)
    print(f"Predicted Subject Code: {predicted_code}")
except FileNotFoundError:
    print(f"Error: Image file not found at {image_path}")
except Exception as e:
    print(f"An error occurred during prediction: {e}")

Loaded full model instance.
Predicted Subject Code: 19AI414
