<a href="https://colab.research.google.com/github/Shaik-vydyam-mohammad-khaleel/Campus-Placement-predictor/blob/main/License_Plate_Detection_and_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import zipfile

zip_path = "/content/drive/MyDrive/datasets/Licplatesdetection_train.zip"
extract_path = "/content/plates_unzipped"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Done! Files extracted to:", extract_path)



Done! Files extracted to: /content/plates_unzipped


In [3]:
import os
import pandas as pd
import cv2
import shutil
from sklearn.model_selection import train_test_split

# Paths
csv_path = "/content/drive/MyDrive/datasets/Licplatesdetection_train.csv"
images_dir = "/content/plates_unzipped/license_plates_detection_train"
output_dir = "/content/dataset"

# Create YOLO folders
for folder in ["images/train", "images/val", "labels/train", "labels/val"]:
    os.makedirs(os.path.join(output_dir, folder), exist_ok=True)

# Load CSV
df = pd.read_csv(csv_path)

# Get unique images
unique_images = df['img_id'].unique()

# Train/val split (80/20)
train_imgs, val_imgs = train_test_split(unique_images, test_size=0.2, random_state=42)

def convert_to_yolo(row, img_w, img_h):
    """Convert VOC bbox format to YOLO format"""
    xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']
    x_center = ((xmin + xmax) / 2) / img_w
    y_center = ((ymin + ymax) / 2) / img_h
    width = (xmax - xmin) / img_w
    height = (ymax - ymin) / img_h
    return [0, x_center, y_center, width, height]  # class 0 = license_plate

def process_images(img_list, split):
    for img_name in img_list:
        img_path = os.path.join(images_dir, img_name)
        if not os.path.exists(img_path):
            print(f" Missing image: {img_path}")
            continue

        # Copy image
        shutil.copy(img_path, os.path.join(output_dir, f"images/{split}", img_name))

        # Load image size
        img = cv2.imread(img_path)
        h, w = img.shape[:2]

        # Get all bboxes for this image
        labels = []
        for _, row in df[df['img_id'] == img_name].iterrows():
            yolo_box = convert_to_yolo(row, w, h)
            labels.append(" ".join(map(str, yolo_box)))

        # Save label file
        label_path = os.path.join(output_dir, f"labels/{split}", img_name.replace(".jpg", ".txt").replace(".png", ".txt"))
        with open(label_path, "w") as f:
            f.write("\n".join(labels))

# Process train and val sets
process_images(train_imgs, "train")
process_images(val_imgs, "val")

print(" YOLO dataset created at:", output_dir)


 YOLO dataset created at: /content/dataset


In [4]:
import yaml
import os

# Path where dataset is stored
dataset_path = "/content/dataset"
yaml_path = os.path.join(dataset_path, "data.yaml")

# Define YAML content
data = {
    "train": os.path.join(dataset_path, "images/train"),
    "val": os.path.join(dataset_path, "images/val"),
    "nc": 1,
    "names": ["license_plate"]
}

# Save YAML file
with open(yaml_path, "w") as f:
    yaml.dump(data, f, default_flow_style=False)

print(f"data.yaml created at: {yaml_path}")


data.yaml created at: /content/dataset/data.yaml


In [6]:
!pip install ultralytics

from ultralytics import YOLO

# Load a pre-trained YOLOv8 model (nano version is fastest)
model = YOLO("yolov8n.pt")

# Train on your dataset
model.train(
    data="/content/dataset/data.yaml",  # path to data.yaml
    epochs=30,                          # increase if needed
    imgsz=640,
    batch=16
)


Ultralytics 8.3.194 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/dataset/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train2, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7e06a3473320>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.048048, 

In [7]:
from ultralytics import YOLO

# Load trained model
trained_model = YOLO("runs/detect/train/weights/best.pt")

# Run predictions
results = trained_model.predict(
    source="/content/dataset/images/val",
    conf=0.25,
    save=True
)

# Get the folder where predictions are saved
print(" Predictions saved in:", trained_model.predictor.save_dir)



image 1/180 /content/dataset/images/val/103.jpg: 480x640 1 license_plate, 53.5ms
image 2/180 /content/dataset/images/val/12.jpg: 384x640 1 license_plate, 43.1ms
image 3/180 /content/dataset/images/val/121.jpg: 384x640 1 license_plate, 6.3ms
image 4/180 /content/dataset/images/val/126.jpg: 640x480 1 license_plate, 43.2ms
image 5/180 /content/dataset/images/val/127.jpg: 480x640 1 license_plate, 7.7ms
image 6/180 /content/dataset/images/val/129.jpg: 640x480 1 license_plate, 6.8ms
image 7/180 /content/dataset/images/val/134.jpg: 384x640 1 license_plate, 6.6ms
image 8/180 /content/dataset/images/val/139.jpg: 640x480 1 license_plate, 6.9ms
image 9/180 /content/dataset/images/val/143.jpg: 480x640 1 license_plate, 7.2ms
image 10/180 /content/dataset/images/val/148.jpg: 352x640 1 license_plate, 44.5ms
image 11/180 /content/dataset/images/val/153.jpg: 640x480 1 license_plate, 7.6ms
image 12/180 /content/dataset/images/val/156.jpg: 384x640 1 license_plate, 6.7ms
image 13/180 /content/dataset/ima

In [8]:
import cv2
import os

output_crop_dir = "/content/cropped_predicted"
os.makedirs(output_crop_dir, exist_ok=True)

for r in results:
    img = cv2.imread(r.path)
    for i, box in enumerate(r.boxes.xyxy.tolist()):  # xyxy = [xmin, ymin, xmax, ymax]
        xmin, ymin, xmax, ymax = map(int, box)
        crop = img[ymin:ymax, xmin:xmax]
        out_path = os.path.join(output_crop_dir, f"{os.path.basename(r.path)}_plate{i}.jpg")
        cv2.imwrite(out_path, crop)

print(" Cropped plates saved in:", output_crop_dir)


 Cropped plates saved in: /content/cropped_predicted


In [9]:
import zipfile

zip_path = "/content/drive/MyDrive/datasets/Licplatesrecognition_train.zip"
extract_path = "/content/plates_recognition_unzipped"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Done! Files extracted to:", extract_path)

Done! Files extracted to: /content/plates_recognition_unzipped


In [10]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split

# Paths
csv_path = "/content/drive/MyDrive/datasets/Licplatesrecognition_train1.csv"   # your CSV with (img_id, text)
images_dir = "/content/plates_recognition_unzipped/license_plates_recognition_train"          # folder with cropped license plate images
output_dir = "/content/ocr_annotations"
os.makedirs(output_dir, exist_ok=True)

# Load CSV
df = pd.read_csv(csv_path)

# Train/val split (80/20)
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

def save_txt(df, out_file):
    with open(out_file, "w", encoding="utf-8") as f:
        for _, row in df.iterrows():
            img_path = os.path.join(images_dir, str(row["img_id"]))
            text = str(row["text"]).strip()
            f.write(f"{img_path}\t{text}\n")

# Save files
save_txt(train_df, os.path.join(output_dir, "train.txt"))
save_txt(val_df, os.path.join(output_dir, "val.txt"))

print("Train/Val annotation files created in:", output_dir)


Train/Val annotation files created in: /content/ocr_annotations


In [33]:
%%bash
cat > /content/train.py <<'PY'
# train.py  — CRNN+CTC training for license plates (digits + 'T')
# Works with a CSV that has columns: img_id, text
# Example call:
#   python3 train.py \
#     --img_dir "/content/plates_recognition_unzipped/license_plates_recognition_train" \
#     --ann "/content/drive/MyDrive/datasets/Licplatesrecognition_train1.csv" \
#     --charset "0123456789T" \
#     --epochs 20 --batch 16 --out "/content/ocr_model.pth"

import os
import sys
import math
import argparse
import random
from typing import List, Tuple

import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms


# ----------------------------
# Utils
# ----------------------------
def seed_everything(seed: int = 2024):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def robust_read_annotations(path: str) -> pd.DataFrame:
    """
    Tries to read CSV with headers (img_id,text). If that fails,
    tries tab-separated without header and renames to (img_id,text).
    """
    try:
        df = pd.read_csv(path)
        if not {"img_id", "text"}.issubset(df.columns):
            raise ValueError("Columns not found")
        return df[["img_id", "text"]]
    except Exception:
        df = pd.read_csv(path, sep="\t", header=None, engine="python")
        if df.shape[1] < 2:
            raise ValueError("Annotation file must have at least two columns")
        df = df.iloc[:, :2]
        df.columns = ["img_id", "text"]
        return df


# ----------------------------
# Dataset
# ----------------------------
class OCRDataset(Dataset):
    def __init__(self, img_dir: str, ann_path: str, charset: str, transform=None):
        """
        img_dir: base folder with images (ignored if img_id is absolute path)
        ann_path: CSV/TSV with columns [img_id, text]
        charset: string of characters to recognize (blank handled separately)
        """
        self.img_dir = img_dir
        self.df = robust_read_annotations(ann_path)
        self.transform = transform

        self.charset = charset
        self.blank_idx = 0
        self.char2idx = {c: i + 1 for i, c in enumerate(self.charset)}  # 0 reserved for CTC blank
        self.idx2char = {i + 1: c for i, c in enumerate(self.charset)}

        # Filter rows with empty text (CTC needs label length > 0)
        before = len(self.df)
        self.df = self.df[self.df["text"].astype(str).str.len() > 0].reset_index(drop=True)
        after = len(self.df)
        if after < before:
            print(f"[WARN] Dropped {before - after} samples with empty labels.")

    def __len__(self):
        return len(self.df)

    def _resolve_path(self, img_id: str) -> str:
        if os.path.isabs(img_id) and os.path.exists(img_id):
            return img_id
        p = os.path.join(self.img_dir, img_id)
        return p

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        row = self.df.iloc[idx]
        img_id, label_str = str(row["img_id"]), str(row["text"])
        img_path = self._resolve_path(img_id)
        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Image not found: {img_path}")

        img = Image.open(img_path).convert("L")  # grayscale
        if self.transform:
            img = self.transform(img)

        # Encode label -> indices (skip chars not in charset)
        label_idx = [self.char2idx[c] for c in label_str if c in self.char2idx]
        if len(label_idx) == 0:
            # Ensure non-empty for CTC; if empty, put a dummy (will be ignored statistically)
            # but better to have no empty labels in data.
            label_idx = [self.char2idx[self.charset[0]]]
        return img, torch.tensor(label_idx, dtype=torch.long)


def ctc_collate_fn(batch):
    imgs, labels = zip(*batch)  # lists
    imgs = torch.stack(imgs, dim=0)  # (N, 1, H, W)

    label_lengths = torch.tensor([len(l) for l in labels], dtype=torch.long)
    labels = torch.cat(labels, dim=0)  # 1D concat of all labels
    return imgs, labels, label_lengths


# ----------------------------
# CRNN Model (VGG-like CNN + 2xBiLSTM + Linear)
# ----------------------------
class CRNN(nn.Module):
    def __init__(self, num_classes: int, img_h: int = 32):
        super().__init__()
        self.num_classes = num_classes  # includes CTC blank at index 0

        def conv_block(in_c, out_c, k=3, s=1, p=1, bn=True):
            layers = [nn.Conv2d(in_c, out_c, k, s, p), nn.ReLU(True)]
            if bn:
                layers.insert(1, nn.BatchNorm2d(out_c))
            return nn.Sequential(*layers)

        # CNN: reduce H to 1; preserve width as "time"
        self.cnn = nn.Sequential(
            conv_block(1, 64, bn=False),          # -> (64, H, W)
            nn.MaxPool2d(2, 2),                   # -> (64, H/2, W/2)

            conv_block(64, 128, bn=False),        # -> (128, H/2, W/2)
            nn.MaxPool2d(2, 2),                   # -> (128, H/4, W/4)

            conv_block(128, 256),                 # -> (256, H/4, W/4)
            conv_block(256, 256),                 # -> (256, H/4, W/4)
            nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1)),  # -> (256, H/8, W/4)

            conv_block(256, 512),                 # -> (512, H/8, W/4)
            conv_block(512, 512),                 # -> (512, H/8, W/4)
            nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1)),  # -> (512, H/16, W/4)

            # Reduce height to 1
            nn.Conv2d(512, 512, kernel_size=(img_h // 16, 3), stride=1, padding=(0, 1)),
            nn.ReLU(True),
        )
        # After CNN: (N, 512, 1, W') -> sequence len = W'
        self.rnn = nn.LSTM(
            input_size=512,
            hidden_size=256,
            num_layers=2,
            bidirectional=True,
            batch_first=False,  # we feed (T, N, C)
        )
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        # x: (N, 1, H, W)
        feats = self.cnn(x)            # (N, 512, 1, W')
        feats = feats.squeeze(2)       # (N, 512, W')
        feats = feats.permute(2, 0, 1) # (T=W', N, 512)
        y, _ = self.rnn(feats)         # (T, N, 512)
        y = self.fc(y)                 # (T, N, num_classes)
        return y


# ----------------------------
# Greedy CTC decoder (for quick sanity checks)
# ----------------------------
def ctc_greedy_decode(logits: torch.Tensor, idx2char: dict, blank_idx: int = 0) -> List[str]:
    """
    logits: (T, N, C) – raw (not softmaxed) or log-probs
    """
    probs = logits.detach().cpu().softmax(2)
    best = probs.argmax(2)  # (T, N)
    T, N = best.shape
    texts = []
    for n in range(N):
        prev = None
        s = []
        for t in range(T):
            k = best[t, n].item()
            if k != blank_idx and k != prev:
                if k in idx2char:
                    s.append(idx2char[k])
            prev = k
        texts.append("".join(s))
    return texts


# ----------------------------
# Train
# ----------------------------
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--img_dir", type=str, required=True,
                        help="Folder containing plate images (ignored if img_id has absolute paths)")
    parser.add_argument("--ann", type=str, required=True,
                        help="CSV/TSV with columns img_id,text (or tab-separated two columns)")
    parser.add_argument("--charset", type=str, default="0123456789T",
                        help="Characters to recognize (CTC blank is auto-added)")
    parser.add_argument("--epochs", type=int, default=20)
    parser.add_argument("--batch", type=int, default=16)
    parser.add_argument("--lr", type=float, default=1e-3)
    parser.add_argument("--height", type=int, default=32)
    parser.add_argument("--width", type=int, default=128)
    parser.add_argument("--workers", type=int, default=2)
    parser.add_argument("--out", type=str, default="ocr_model.pth")
    parser.add_argument("--seed", type=int, default=2024)
    args = parser.parse_args()

    print("Args:", vars(args))
    seed_everything(args.seed)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Device: {device}")

    transform = transforms.Compose([
        transforms.Resize((args.height, args.width)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5]),
    ])

    dataset = OCRDataset(args.img_dir, args.ann, args.charset, transform=transform)
    print(f"Dataset size: {len(dataset)} images")
    print(f"Charset: {args.charset} (len={len(args.charset)})")

    loader = DataLoader(
        dataset,
        batch_size=args.batch,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=(device == "cuda"),
        collate_fn=ctc_collate_fn,
        drop_last=False,
    )

    num_classes = len(args.charset) + 1  # +1 for CTC blank at index 0
    model = CRNN(num_classes=num_classes, img_h=args.height).to(device)

    criterion = nn.CTCLoss(blank=0, zero_infinity=True)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    torch.backends.cudnn.benchmark = True

    for epoch in range(1, args.epochs + 1):
        model.train()
        total_loss = 0.0

        for step, (imgs, labels, label_lengths) in enumerate(loader, start=1):
            imgs = imgs.to(device)
            labels = labels.to(device)

            logits = model(imgs)  # (T, N, C)
            log_probs = F.log_softmax(logits, dim=2)

            T, N, C = log_probs.shape
            pred_lengths = torch.full(size=(N,), fill_value=T, dtype=torch.long, device=log_probs.device)

            loss = criterion(log_probs, labels, pred_lengths, label_lengths)

            optimizer.zero_grad(set_to_none=True)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()

            total_loss += loss.item()

            if step % 50 == 0 or step == 1:
                # quick decode preview for first few samples
                with torch.no_grad():
                    sample_txt = ctc_greedy_decode(logits[:, : min(2, N), :], dataset.idx2char, blank_idx=0)
                print(f"Epoch {epoch}/{args.epochs} | Step {step}/{len(loader)} | "
                      f"Loss {loss.item():.4f} | Preview: {sample_txt}")

        avg = total_loss / max(1, len(loader))
        print(f"==> Epoch {epoch} done. Avg Loss: {avg:.4f}")

    # Save model + charset used
    torch.save({
        "state_dict": model.state_dict(),
        "charset": args.charset,
        "img_h": args.height,
        "img_w": args.width,
    }, args.out)
    with open(os.path.splitext(args.out)[0] + "_charset.txt", "w", encoding="utf-8") as f:
        f.write(args.charset)

    print(f"✅ Training finished. Model saved to: {args.out}")


if __name__ == "__main__":
    main()

PY


In [35]:
!python3 /content/train.py \
  --img_dir "/content/plates_recognition_unzipped/license_plates_recognition_train" \
  --ann "/content/drive/MyDrive/datasets/Licplatesrecognition_train1.csv" \
  --charset "0123456789T" \
  --epochs 20 \
  --batch 16 \
  --out "/content/ocr_model.pth"


Args: {'img_dir': '/content/plates_recognition_unzipped/license_plates_recognition_train', 'ann': '/content/drive/MyDrive/datasets/Licplatesrecognition_train1.csv', 'charset': '0123456789T', 'epochs': 20, 'batch': 16, 'lr': 0.001, 'height': 32, 'width': 128, 'workers': 2, 'out': '/content/ocr_model.pth', 'seed': 2024}
Device: cuda
Dataset size: 900 images
Charset: 0123456789T (len=11)
Epoch 1/20 | Step 1/57 | Loss 7.1445 | Preview: ['681565757', '16561651']
Epoch 1/20 | Step 50/57 | Loss 2.4899 | Preview: ['', '']
==> Epoch 1 done. Avg Loss: 2.6745
Epoch 2/20 | Step 1/57 | Loss 2.5051 | Preview: ['1', '1']
Epoch 2/20 | Step 50/57 | Loss 2.4164 | Preview: ['1', '1']
==> Epoch 2 done. Avg Loss: 2.4086
Epoch 3/20 | Step 1/57 | Loss 2.4078 | Preview: ['1', '1']
Epoch 3/20 | Step 50/57 | Loss 2.3755 | Preview: ['1', '1']
==> Epoch 3 done. Avg Loss: 2.3512
Epoch 4/20 | Step 1/57 | Loss 2.3308 | Preview: ['1', '1']
Epoch 4/20 | Step 50/57 | Loss 2.2766 | Preview: ['1', '1']
==> Epoch 4 done. 

In [36]:
import zipfile

zip_path = "/content/drive/MyDrive/datasets/test.zip"
extract_path = "/content/test_unzipped"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Done! Files extracted to:", extract_path)

Done! Files extracted to: /content/test_unzipped


In [44]:
import torch
from PIL import Image, ImageEnhance, ImageFilter
from torchvision import transforms
from pathlib import Path
import pandas as pd
import torch.nn as nn

# ------------------------
# CRNN model (same as train.py)
# ------------------------
class CRNN(nn.Module):
    def __init__(self, num_classes: int, img_h: int = 32):
        super().__init__()
        self.num_classes = num_classes
        def conv_block(in_c, out_c, k=3, s=1, p=1, bn=True):
            layers = [nn.Conv2d(in_c, out_c, k, s, p), nn.ReLU(True)]
            if bn: layers.insert(1, nn.BatchNorm2d(out_c))
            return nn.Sequential(*layers)

        self.cnn = nn.Sequential(
            conv_block(1, 64, bn=False), nn.MaxPool2d(2, 2),
            conv_block(64, 128, bn=False), nn.MaxPool2d(2, 2),
            conv_block(128, 256), conv_block(256, 256),
            nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1)),
            conv_block(256, 512), conv_block(512, 512),
            nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1)),
            nn.Conv2d(512, 512, kernel_size=(img_h // 16, 3), stride=1, padding=(0, 1)),
            nn.ReLU(True),
        )
        self.rnn = nn.LSTM(input_size=512, hidden_size=256, num_layers=2,
                           bidirectional=True, batch_first=False)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        feats = self.cnn(x)
        feats = feats.squeeze(2)
        feats = feats.permute(2, 0, 1)
        y, _ = self.rnn(feats)
        y = self.fc(y)
        return y

# ------------------------
# Load model checkpoint
# ------------------------
checkpoint_path = "/content/ocr_model.pth"
checkpoint = torch.load(checkpoint_path, map_location='cpu')
charset = checkpoint['charset']
img_h = checkpoint['img_h']
img_w = checkpoint['img_w']

num_classes = len(charset) + 1
model = CRNN(num_classes=num_classes, img_h=img_h)
model.load_state_dict(checkpoint['state_dict'])
model.eval()

# ------------------------
# Preprocessing function
# ------------------------
def preprocess_image(img_path, img_h, img_w, enhance_contrast=True, sharpen=False):
    img = Image.open(img_path).convert("L")

    # Contrast enhancement
    if enhance_contrast:
        img = ImageEnhance.Contrast(img).enhance(2.0)

    # Optional sharpening
    if sharpen:
        img = img.filter(ImageFilter.SHARPEN)

    # Resize and normalize
    transform = transforms.Compose([
        transforms.Resize((img_h, img_w)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])
    return transform(img).unsqueeze(0)  # add batch dimension

# ------------------------
# Greedy CTC decoding
# ------------------------
def ctc_greedy_decode(logits, idx2char, blank_idx=0):
    probs = logits.detach().cpu().softmax(2)
    best = probs.argmax(2)  # (T, N)
    T, N = best.shape
    texts = []
    for n in range(N):
        prev = None
        s = []
        for t in range(T):
            k = best[t, n].item()
            if k != blank_idx and k != prev:
                if k in idx2char:
                    s.append(idx2char[k])
            prev = k
        texts.append("".join(s))
    return texts

# ------------------------
# Run inference on a folder
# ------------------------
test_folder = "/content/test_unzipped/test/test/"
test_images = list(Path(test_folder).glob("*.jpg"))
idx2char = {i+1: c for i, c in enumerate(charset)}

results_dict = {}
for img_path in test_images:
    img_tensor = preprocess_image(img_path, img_h, img_w, enhance_contrast=True, sharpen=True)
    with torch.no_grad():
        logits = model(img_tensor)
    pred_text = ctc_greedy_decode(logits, idx2char)[0]
    results_dict[img_path.name] = pred_text
    print(f"{img_path.name}: {pred_text}")

# Save predictions
df = pd.DataFrame([(k, v) for k, v in results_dict.items()],
                  columns=['image', 'predicted_text'])
df.to_csv("/content/ocr_predictions_enhanced.csv", index=False)
print("✅ Enhanced predictions saved to /content/ocr_predictions_enhanced.csv")


926.jpg: 1T57
1036.jpg: 6T05
1033.jpg: 7T73
911.jpg: 4T1
933.jpg: T0
977.jpg: T7
1068.jpg: 14T7
1046.jpg: T08
992.jpg: 8T
1084.jpg: 1T44
1038.jpg: 1T
1019.jpg: 2T57
981.jpg: T4
1051.jpg: 7T
934.jpg: 1T45
1008.jpg: 0T0
989.jpg: 70T254
906.jpg: 7T8
1034.jpg: 72T8
930.jpg: 7T
990.jpg: 76T7
996.jpg: 3T9
952.jpg: T69
943.jpg: 4T971
963.jpg: 5T3
1077.jpg: T57
1035.jpg: 14T132
994.jpg: 0T
1111.jpg: 2T0
1040.jpg: 1T9
965.jpg: 1T2
1003.jpg: 39T1
958.jpg: 7T47
1009.jpg: 17T9
932.jpg: 9T
973.jpg: 7T
901.jpg: T57
954.jpg: 1T7
936.jpg: 4T1
1062.jpg: 10T34
924.jpg: 2T
914.jpg: 18T7
1099.jpg: 14T754
1018.jpg: 1T977
993.jpg: 1T
1102.jpg: 70T5
1014.jpg: 50T84
971.jpg: 7T
902.jpg: 54T67
1030.jpg: 1T805
1069.jpg: T
982.jpg: 1T07
1098.jpg: 5T3
927.jpg: 172T2
940.jpg: T74
1057.jpg: 8T9
950.jpg: 3T447
1082.jpg: 7T7
1101.jpg: 7T7
1002.jpg: 8T7
918.jpg: 13T12
1022.jpg: T42
1024.jpg: 5T
960.jpg: T75
1086.jpg: 0T
1011.jpg: 0T7
1005.jpg: 1T4
1037.jpg: 141T7
1083.jpg: 1T
1110.jpg: 8T9
986.jpg: T1
957.jpg: T7
917.

In [45]:
import pandas as pd
import re

# Load raw predictions
df = pd.read_csv("/content/ocr_predictions.csv")

# Define allowed charset
allowed_chars = "0123456789T"

def clean_plate(text):
    # Keep only allowed characters
    cleaned = "".join([c for c in text if c in allowed_chars])

    # Optional: ensure at most one 'T' (common in your dataset)
    if cleaned.count('T') > 1:
        # Keep first 'T', remove others
        first_T = cleaned.index('T')
        cleaned = cleaned[:first_T+1] + cleaned[first_T+1:].replace('T','')

    # Optional: basic pattern enforcement
    # Example: at least 2 characters, max 7
    if len(cleaned) < 2:
        cleaned = cleaned + "0"*(2-len(cleaned))
    elif len(cleaned) > 7:
        cleaned = cleaned[:7]

    return cleaned

# Apply cleaning
df['predicted_text_clean'] = df['predicted_text'].apply(clean_plate)

# Save cleaned predictions
df.to_csv("/content/ocr_predictions_clean.csv", index=False)
print("✅ Cleaned predictions saved to /content/ocr_predictions_clean.csv")
print(df[['image','predicted_text','predicted_text_clean']].head(15))


✅ Cleaned predictions saved to /content/ocr_predictions_clean.csv
       image predicted_text predicted_text_clean
0    926.jpg            T57                  T57
1   1036.jpg            1T9                  1T9
2   1033.jpg            2T9                  2T9
3    911.jpg            1T1                  1T1
4    933.jpg             T0                   T0
5    977.jpg             T2                   T2
6   1068.jpg         17T777               17T777
7   1046.jpg            1T9                  1T9
8    992.jpg              T                   T0
9   1084.jpg             1T                   1T
10  1038.jpg             1T                   1T
11  1019.jpg            2T2                  2T2
12   981.jpg            T42                  T42
13  1051.jpg            7T4                  7T4
14   934.jpg          14T45                14T45
