In [125]:
import os
import xml.etree.ElementTree as ET
import pandas as pd
import cv2
from pathlib import Path
import torch
from torch.utils.data import TensorDataset, DataLoader
import torchvision.transforms as transforms
import xml.etree.ElementTree as ET

In [126]:
class NumberPlateDataset(Dataset):
    def __init__(self, images_dir, annotations_dir, transform=None, max_images=None):
        self.images_dir = images_dir
        self.annotations_dir = annotations_dir
        self.transform = transform
        
        # Filter images that have a corresponding XML with <object>
        self.image_files = []
        for f in os.listdir(images_dir):
            if not f.endswith('.jpg'):
                continue
            xml_path = os.path.join(annotations_dir, f.replace('.jpg', '.xml'))
            if not os.path.exists(xml_path):
                continue
            try:
                tree = ET.parse(xml_path)
                root = tree.getroot()
                if root.find('object') is not None:  # include only if object exists
                    self.image_files.append(f)
            except ET.ParseError:
                # skip XML files that are malformed
                continue
        
        # Limit the number of images
        if max_images:
            self.image_files = self.image_files[:max_images]

    

    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        height, width, _ = image.shape
        
        # Parse XML
        xml_path = os.path.join(self.annotations_dir, img_name.replace('.jpg','.xml'))
        tree = ET.parse(xml_path)
        root = tree.getroot()
        obj = root.find('object')
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text) / width    # Normalize
        ymin = int(bbox.find('ymin').text) / height
        xmax = int(bbox.find('xmax').text) / width
        ymax = int(bbox.find('ymax').text) / height
        target = torch.tensor([xmin, ymin, xmax, ymax], dtype=torch.float32)
        
        if self.transform:
            image = self.transform(image)
        
        return image, target


In [127]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((128,128)),
    transforms.ToTensor()
])


In [128]:
dataset = NumberPlateDataset(
    images_dir='C:/Users/thili/OneDrive/Desktop/NumberPlateDetection&Blur/train', 
    annotations_dir='C:/Users/thili/OneDrive/Desktop/NumberPlateDetection&Blur/train', 
    transform=transform,
    max_images=1000
)

dataloader = DataLoader(dataset, batch_size=16, shuffle=True)


In [129]:
import torch.nn as nn

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3,16,3,padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(16,32,3,padding=1)
        self.fc1 = nn.Linear(32*32*32, 128)
        self.fc2 = nn.Linear(128, 4)  # output: [xmin, ymin, xmax, ymax]
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32*32*32)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN()


In [130]:
criterion = nn.SmoothL1Loss()  # Predict bbox coordinates
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


In [131]:
for epoch in range(20):  # example epochs
    for images, targets in dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")


Epoch 1, Loss: 0.0147
Epoch 2, Loss: 0.0132
Epoch 3, Loss: 0.0155
Epoch 4, Loss: 0.0124
Epoch 5, Loss: 0.0101
Epoch 6, Loss: 0.0050
Epoch 7, Loss: 0.0092
Epoch 8, Loss: 0.0050
Epoch 9, Loss: 0.0071
Epoch 10, Loss: 0.0030
Epoch 11, Loss: 0.0032
Epoch 12, Loss: 0.0061
Epoch 13, Loss: 0.0048
Epoch 14, Loss: 0.0009
Epoch 15, Loss: 0.0053
Epoch 16, Loss: 0.0019
Epoch 17, Loss: 0.0020
Epoch 18, Loss: 0.0014
Epoch 19, Loss: 0.0011
Epoch 20, Loss: 0.0010


In [132]:
model.eval()

SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=32768, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=4, bias=True)
)

In [139]:
import cv2
import torch
import os

# Path to test images folder
test_folder = "C:/Users/thili/OneDrive/Desktop/NumberPlateDetection&Blur/test"
test_images = [f for f in os.listdir(test_folder) if f.endswith('.jpg')][:20]  # first 20 images

max_size = 1000
min_size = 500

for img_file in test_images:
    img_path = os.path.join(test_folder, img_file)
    image = cv2.imread(img_path)
    height, width, _ = image.shape

    # Resize if smaller than 500x500, but not larger than 1000x1000
    scale = 1.0
    if width < min_size or height < min_size:
        scale = max(min_size / width, min_size / height)
    elif width > max_size or height > max_size:
        scale = min(max_size / width, max_size / height)

    if scale != 1.0:
        new_width = int(width * scale)
        new_height = int(height * scale)
        image = cv2.resize(image, (new_width, new_height))
        height, width = new_height, new_width

    # Convert to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Convert to tensor and normalize
    image_tensor = transform(image_rgb)  # Apply your training transforms
    image_tensor = image_tensor.unsqueeze(0)  # Add batch dimension

    with torch.no_grad():
        pred_bbox = model(image_tensor)

    # Convert predicted bbox to pixel coordinates
    pred_bbox = pred_bbox.squeeze(0).numpy()
    xmin = int(pred_bbox[0] * width)
    ymin = int(pred_bbox[1] * height)
    xmax = int(pred_bbox[2] * width)
    ymax = int(pred_bbox[3] * height)

    # Draw rectangle
    cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
    cv2.imshow("Prediction", image)
    cv2.waitKey(0)

cv2.destroyAllWindows()
