# 1. Data Preprocessing Code

In [57]:
import os
import cv2
import torch
from torch.utils.data import Dataset
import xml.etree.ElementTree as ET
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np

class CustomALPRDataset(Dataset):
    def __init__(self, image_dir, annotation_dir, transform=None):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.transform = transform
        self.image_list = os.listdir(image_dir)

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        image_name = self.image_list[idx]
        image_path = os.path.join(self.image_dir, image_name)
        annotation_path = os.path.join(self.annotation_dir, image_name.replace('.png', '.xml'))

        # Load the image using OpenCV
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB format
        og_height, og_width, og_channels = image.shape # save values so we can recalculate bounding boxes later

        # modify
        if self.transform:
            image = self.transform(image)

        # Parse the XML annotation file to extract bounding box coordinates
        # Load the XML annotation file
        root = ET.parse(annotation_path).getroot()
        
        # Iterate through the XML and extract bounding box coordinates
        for obj in root.findall('.//object'):
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
        
        x_scale = 224 / og_width
        y_scale = 224 / og_height
        bounding_box_coordinates = (xmin * x_scale, ymin * y_scale, xmax * x_scale, ymax * y_scale)

        # Create a dictionary containing image and target information
        target = {
            "image": torch.tensor(image, dtype=torch.float32),
            "bbox": torch.tensor(bounding_box_coordinates, dtype=torch.float32),  # Replace with actual bounding box coordinates
          #  "labels": torch.tensor([1], dtype=torch.int64),  # Assuming there is only one class (license plate)
        }

        # view changed image
        to_pil = transforms.ToPILImage()
        image_np = np.array(to_pil(image))

        if image_np.dtype != np.uint8:
            image_np = image_np.astype(np.uint8)

        x,y,x1,y1 = bounding_box_coordinates
        cv2.rectangle(image_np, (int(x), int(y)), (int(x1), int(y1)), (255, 255, 255), 2)
        cv2.imshow("image", image_np)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

        return target

# Define the transform for image preprocessing (resize, normalization, etc.)
transform = transforms.Compose([
    transforms.ToPILImage(),  # Convert to PIL Image
    transforms.Resize((224, 224)),  # Resize the image to the desired size
    transforms.ToTensor(),  # Convert to PyTorch tensor
   # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
])

# Create an instance of the custom dataset
lpr_training_dataset = CustomALPRDataset(image_dir='data/kaggle-dataset-433/train/images', annotation_dir='data/kaggle-dataset-433/train/annotations', transform=transform)

for i in range(10):
    image = lpr_training_dataset[i].get("image")

  "image": torch.tensor(image, dtype=torch.float32),


Create a training & test dataset

Training is the dataset we just build, testing set will be Declan's photos

In [None]:
# batch size = how many we want to pass into our model at a time, generally use 8 - 64 - the larger, the quicker we can train, but not too big or accuracy might be low
# we have to batch because data is so big we can't fit it all in at once
train_set = torch.utils.data.DataLoader(lpr_training_dataset, batch_size=10, shuffle=True)
#test_set = torch.utils.data.DataLoader(test_data, batch_size=10, shuffle=False)

# 2. Model Definition

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()

        # Define layers

# 3. Training

# 4. Testing

# 5. Validating / Predictions