In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms



In [4]:
ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
char2idx = {c: i+1 for i, c in enumerate(ALPHABET)}  # 0 reserved for blank
idx2char = {i+1: c for i, c in enumerate(ALPHABET)}


In [9]:
class OCRDataset(Dataset):
    def __init__(self, root):
        self.img_dir = os.path.join(root, "images")
        self.labels = []

        with open(os.path.join(root, "labels.txt")) as f:
            for line in f:
                name, text = line.strip().split("\t")
                self.labels.append((name, text))

        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((32, 100)),
            transforms.Grayscale(),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        name, text = self.labels[idx]
        img = cv2.imread(os.path.join(self.img_dir, name))
        img = self.transform(img)

        target = torch.tensor([char2idx[c] for c in text], dtype=torch.long)
        return img, target, len(target)
device = "cuda" if torch.cuda.is_available() else "cpu"

dataset = OCRDataset("/content/drive/MyDrive/ANPR_OCR_Dataset")

loader = DataLoader(
    dataset,
    batch_size=32,
    shuffle=True,
    collate_fn=collate_fn
)

print("Dataset size:", len(dataset))



Dataset size: 1337


In [6]:
def collate_fn(batch):
    imgs, targets, lengths = zip(*batch)
    imgs = torch.stack(imgs)
    targets = torch.cat(targets)
    lengths = torch.tensor(lengths)
    return imgs, targets, lengths


In [14]:
%cd /content

/content


In [15]:
!git clone https://github.com/JaidedAI/EasyOCR.git


Cloning into 'EasyOCR'...
remote: Enumerating objects: 2753, done.[K
remote: Counting objects: 100% (2/2), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 2753 (delta 0), reused 0 (delta 0), pack-reused 2751 (from 2)[K
Receiving objects: 100% (2753/2753), 157.84 MiB | 21.51 MiB/s, done.
Resolving deltas: 100% (1672/1672), done.
Updating files: 100% (313/313), done.


In [16]:
%cd /content/EasyOCR


/content/EasyOCR


In [17]:

class CRNN(nn.Module):
    def __init__(self, nclass):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(64, 128, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.rnn = nn.LSTM(128*8, 256, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(512, nclass)

    def forward(self, x):
        x = self.cnn(x)
        b, c, h, w = x.size()
        x = x.permute(0,3,1,2).contiguous()
        x = x.view(b, w, c*h)
        x,_ = self.rnn(x)
        x = self.fc(x)
        return x.log_softmax(2)


In [19]:
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

# -----------------------------
# Alphabet
# -----------------------------
ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
char2idx = {c: i+1 for i, c in enumerate(ALPHABET)}

# -----------------------------
# Dataset
# -----------------------------
class OCRDataset(Dataset):
    def __init__(self, root):
        self.img_dir = os.path.join(root, "images")
        self.samples = []

        with open(os.path.join(root, "labels.txt")) as f:
            for line in f:
                name, text = line.strip().split("\t")
                self.samples.append((name, text))

        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((32, 100)),
            transforms.Grayscale(),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        name, text = self.samples[idx]
        img = cv2.imread(os.path.join(self.img_dir, name))
        img = self.transform(img)
        target = torch.tensor([char2idx[c] for c in text], dtype=torch.long)
        return img, target, len(target)

def collate_fn(batch):
    imgs, targets, lengths = zip(*batch)
    imgs = torch.stack(imgs)
    targets = torch.cat(targets)
    lengths = torch.tensor(lengths)
    return imgs, targets, lengths

# -----------------------------
# Model
# -----------------------------
class CRNN(nn.Module):
    def __init__(self, nclass):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(64, 128, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.rnn = nn.LSTM(128*8, 256, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(512, nclass)

    def forward(self, x):
        x = self.cnn(x)
        b, c, h, w = x.size()
        x = x.permute(0,3,1,2).contiguous()
        x = x.view(b, w, c*h)
        x,_ = self.rnn(x)
        x = self.fc(x)
        return x.log_softmax(2)

# -----------------------------
# Setup
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"

dataset = OCRDataset("/content/drive/MyDrive/ANPR_OCR_Dataset")
loader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

model = CRNN(len(ALPHABET) + 1).to(device)
criterion = nn.CTCLoss(blank=0)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

print("Dataset size:", len(dataset))
print("Model initialized on:", device)


Dataset size: 1337
Model initialized on: cuda


In [22]:
for epoch in range(5):
    total_loss = 0.0

    for i, (imgs, targets, lengths) in enumerate(loader):
        if i % 20 == 0:
            print(f"Epoch {epoch+1} | Batch {i}/{len(loader)}")

        imgs = imgs.to(device)
        targets = targets.to(device)

        preds = model(imgs)
        T = preds.size(1)

        pred_lengths = torch.full(
            (imgs.size(0),), T, dtype=torch.long
        ).to(device)

        loss = criterion(
            preds.permute(1, 0, 2),
            targets,
            pred_lengths,
            lengths
        )

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(loader)
    print(f"Epoch {epoch+1} Loss: {avg_loss:.4f}")



Epoch 1 | Batch 0/42
Epoch 1 | Batch 20/42
Epoch 1 | Batch 40/42
Epoch 1 Loss: 3.2395
Epoch 2 | Batch 0/42
Epoch 2 | Batch 20/42
Epoch 2 | Batch 40/42
Epoch 2 Loss: 2.9739
Epoch 3 | Batch 0/42
Epoch 3 | Batch 20/42
Epoch 3 | Batch 40/42
Epoch 3 Loss: 2.8177
Epoch 4 | Batch 0/42
Epoch 4 | Batch 20/42
Epoch 4 | Batch 40/42
Epoch 4 Loss: 2.6642
Epoch 5 | Batch 0/42
Epoch 5 | Batch 20/42
Epoch 5 | Batch 40/42
Epoch 5 Loss: 2.5254


In [23]:
for epoch in range(5, 20):
    total_loss = 0.0
    for imgs, targets, lengths in loader:
        imgs = imgs.to(device)
        targets = targets.to(device)

        preds = model(imgs)
        T = preds.size(1)
        pred_lengths = torch.full(
            (imgs.size(0),), T, dtype=torch.long
        ).to(device)

        loss = criterion(
            preds.permute(1, 0, 2),
            targets,
            pred_lengths,
            lengths
        )

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1} Loss: {total_loss/len(loader):.4f}")



Epoch 6 Loss: 2.4225
Epoch 7 Loss: 2.3222
Epoch 8 Loss: 2.2016
Epoch 9 Loss: 2.0136
Epoch 10 Loss: 1.7705
Epoch 11 Loss: 1.5401
Epoch 12 Loss: 1.3602
Epoch 13 Loss: 1.1942
Epoch 14 Loss: 1.0454
Epoch 15 Loss: 0.9116
Epoch 16 Loss: 0.7917
Epoch 17 Loss: 0.6753
Epoch 18 Loss: 0.5740
Epoch 19 Loss: 0.4739
Epoch 20 Loss: 0.3932


In [24]:
torch.save(model.state_dict(), "/content/crnn_plate_ocr.pth")
print("Model saved")


Model saved


In [25]:
import torch

MODEL_PATH = "/content/crnn_plate_ocr.pth"
torch.save(model.state_dict(), MODEL_PATH)
print("✅ Model saved at:", MODEL_PATH)


✅ Model saved at: /content/crnn_plate_ocr.pth


In [26]:
def ctc_greedy_decode(preds, idx2char):
    """
    preds: (T, B, C) log-probabilities
    """
    preds = preds.argmax(2)  # (T, B)
    texts = []
    for b in range(preds.size(1)):
        prev = 0
        text = ""
        for t in preds[:, b]:
            t = t.item()
            if t != prev and t != 0:
                text += idx2char[t]
            prev = t
        texts.append(text)
    return texts


In [28]:
import cv2
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((32, 100)),
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

def crnn_predict(image_bgr):
    img = transform(image_bgr).unsqueeze(0).to(device)
    with torch.no_grad():
        out = model(img)                 # (B, T, C)
        out = out.permute(1, 0, 2)        # (T, B, C)
    return ctc_greedy_decode(out, idx2char)[0]

# Try one image from your OCR dataset
test_img_path = "/content/drive/MyDrive/ANPR_OCR_Dataset/images/000020.jpg"
img = cv2.imread(test_img_path)
print("Pred:", crnn_predict(img))


Pred: TR01N0481


In [29]:
import numpy as np

def cer(gt, pred):
    # simple Levenshtein
    import numpy as np
    dp = np.zeros((len(gt)+1, len(pred)+1), dtype=int)
    for i in range(len(gt)+1): dp[i][0] = i
    for j in range(len(pred)+1): dp[0][j] = j
    for i in range(1, len(gt)+1):
        for j in range(1, len(pred)+1):
            cost = 0 if gt[i-1] == pred[j-1] else 1
            dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]+cost)
    return dp[-1][-1] / max(1, len(gt))

exact = 0
cers = []

with torch.no_grad():
    for name, gt in dataset.samples:
        img = cv2.imread(os.path.join(dataset.img_dir, name))
        pred = crnn_predict(img)
        if pred == gt:
            exact += 1
        cers.append(cer(gt, pred))

print("Exact Match:", exact / len(dataset))
print("Mean CER   :", np.mean(cers))


Exact Match: 0.6379955123410621
Mean CER   : 0.05572714910785217
