## Training Dataset Preprocessing

In [86]:
import os
import cv2
import torch
import numpy as np
from tqdm import tqdm
import xml.etree.ElementTree as ET

class LPR_Training_Dataset_Processed():
    IMAGE_PATH = "data/kaggle-dataset-433/train/images"
    ANNOTATION_PATH = "data/kaggle-dataset-433/train/annotations"
    TARGET_IMAGE_SIZE = 224

    training_data = []

    def create_training_data(self):
        self.img_list = os.listdir(self.IMAGE_PATH)
        for img in tqdm(self.img_list):
            img_path = os.path.join(self.IMAGE_PATH, img) #get image file path so we can load it with opencv
            img_path = os.path.join(self.IMAGE_PATH, img) #get image file path so we can load it with opencv
            annotation_path = os.path.join(self.ANNOTATION_PATH, img.replace('.png', '.xml')) # get required image annotations

            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) # read image as grayscale
            og_img_height, og_img_width = img.shape # store original shape of image so we can resize boudning box later

            img = cv2.resize(img, (self.TARGET_IMAGE_SIZE, self.TARGET_IMAGE_SIZE)) # resize image so they're all the same width and height

            # Parse the XML annotation file to extract bounding box coordinates
            root = ET.parse(annotation_path).getroot()
            
            # Iterate through the XML and extract bounding box coordinates
            for obj in root.findall('.//object'):
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
            
            # calculate new scale ratio
            x_scale = self.TARGET_IMAGE_SIZE / og_img_width 
            y_scale = self.TARGET_IMAGE_SIZE / og_img_height
            bounding_box_coordinates = (xmin * x_scale, ymin * y_scale, xmax * x_scale, ymax * y_scale) # resize bounding box to fit resized image

            target = {
                "image": torch.tensor(img, dtype=torch.float32),
                "bbox": torch.tensor(bounding_box_coordinates, dtype=torch.float32),  # Replace with actual bounding box coordinates
            }
            
            self.training_data.append([np.array(img), bounding_box_coordinates])
            #self.training_data.append(target)

        np.random.shuffle(self.training_data)

training_dataset = LPR_Training_Dataset_Processed()
training_dataset.create_training_data()

100%|██████████| 433/433 [00:09<00:00, 44.77it/s]


In [87]:
# preview first few images so we can make sure our data was processed correctly
for i in range(0, 3):
    img = training_dataset.training_data[i][0]
    x,y,x1,y1 = training_dataset.training_data[i][1]
    cv2.rectangle(img, (int(x), int(y)), (int(x1), int(y1)), (255, 255, 255), 2)
    cv2.imshow(f"{i}", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

## Model Definition

In [96]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class LPR_Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)

        x = torch.randn(224, 224).view(-1, 1, 224, 224)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 4)
    
    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        if self._to_linear is None: # used to flatten it since pytorch doesn't have tensorflow's flatten function
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]

        return x
    
    def forward(self, x):
        x = self.convs(x) # pass through all convulutional layers
        x = x.view(-1, self._to_linear) # flatten it
        x = F.relu(self.fc1(x)) # pass through fully connected (dense) layer
        x = self.fc2(x)
        # return F.softmax(x, dim = 1) # renable this when we move to the gpu
        return x
    
net = LPR_Net()

## Get data and split between test and training data

In [114]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

X = torch.Tensor([i[0] for i in training_dataset.training_data]).view(-1, 224, 224) # image values
X = X / 255.0
y = torch.Tensor([i[1] for i in training_dataset.training_data]) # bounding box values

VAL_PCT = 0.2 # percent of data we want to use for testing vs training
val_size = int(len(X) * VAL_PCT)

# create test and training splits
train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]

print(len(train_X))
print(len(test_X))

347
86


## Train!

In [115]:
BATCH_SIZE = 200 # reduce if memory errors
EPOCHS = 1

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
        batch_X = train_X[i:i + BATCH_SIZE].view(-1, 1, 224, 224)
        batch_y = train_y[i:i + BATCH_SIZE]
        
        optimizer.zero_grad()
        outputs = net(batch_X)

        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()

print(loss)


 50%|█████     | 1/2 [00:09<00:09,  9.99s/it]

In [113]:
correct = 0
total = 0

with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_bbox = torch.argmax(test_y[i])
        net_out = net(test_X[i].view(-1, 1, 224, 224))[0]
        predicted_bbox = torch.argmax(net_out)
        if predicted_bbox == real_bbox:
            correct+= 1
        total += 1
        #print(real_bbox, net_out)

print("Accuracy:", round((correct / total) * 100, 3), "%")

  5%|▍         | 4/86 [00:00<00:02, 34.58it/s]

tensor(16073) tensor([ 97.4372, 128.0085, 141.3612, 152.1040])
tensor(1908) tensor([ 93.9569, 121.5841, 133.6109, 143.9028])
tensor(16) tensor([117.3144, 155.3068, 172.8628, 184.4475])
tensor(18801) tensor([ 92.3751, 121.8094, 133.6267, 144.7334])
tensor(32833) tensor([ 97.8014, 128.7879, 139.5008, 152.5691])
tensor(25361) tensor([ 88.3401, 117.8878, 127.7101, 140.4465])
tensor(4138) tensor([ 94.9349, 123.3897, 135.2036, 147.4751])
tensor(317) tensor([ 95.3929, 124.0430, 139.6458, 148.0752])


 14%|█▍        | 12/86 [00:00<00:02, 34.85it/s]

tensor(17797) tensor([ 93.1445, 125.5879, 139.1537, 150.6273])
tensor(9893) tensor([ 93.0103, 121.2170, 134.5473, 144.0713])
tensor(5050) tensor([103.0174, 136.3095, 149.0741, 162.7751])
tensor(253) tensor([108.1301, 143.5415, 153.0275, 169.9879])
tensor(25577) tensor([ 95.8196, 126.7485, 139.0870, 150.3461])
tensor(60) tensor([ 97.2283, 127.8736, 141.5655, 152.1610])
tensor(29638) tensor([ 95.3721, 128.0282, 141.6579, 153.8211])
tensor(36833) tensor([ 85.1366, 115.7788, 123.9361, 138.8779])


 23%|██▎       | 20/86 [00:00<00:01, 34.84it/s]

tensor(1) tensor([ 98.1077, 126.6055, 140.1721, 150.0782])
tensor(15339) tensor([101.1724, 133.2067, 146.9442, 159.7884])
tensor(18801) tensor([ 92.3751, 121.8094, 133.6267, 144.7334])
tensor(22230) tensor([ 95.8672, 126.2827, 136.7087, 150.0280])
tensor(18321) tensor([ 96.6528, 129.4136, 142.5584, 154.3954])
tensor(0) tensor([ 95.5313, 123.0348, 137.6064, 146.1949])
tensor(0) tensor([ 88.1194, 115.2480, 124.8194, 136.4398])
tensor(37513) tensor([ 93.0694, 126.3182, 136.1596, 150.5154])


 33%|███▎      | 28/86 [00:00<00:01, 35.90it/s]

tensor(2759) tensor([105.5791, 137.2527, 151.2492, 162.4810])
tensor(15053) tensor([ 98.2353, 130.8247, 142.2071, 156.3328])
tensor(16047) tensor([102.3482, 138.6501, 151.9633, 166.5702])
tensor(4071) tensor([ 86.9932, 112.5907, 125.7916, 134.1803])
tensor(20673) tensor([ 98.0258, 129.6974, 144.6016, 155.5723])
tensor(1197) tensor([109.2874, 144.4073, 161.8487, 174.7138])
tensor(25680) tensor([ 98.0312, 128.7420, 142.8754, 153.9659])


 42%|████▏     | 36/86 [00:01<00:01, 36.45it/s]

tensor(9338) tensor([ 91.4903, 116.7402, 130.1763, 139.2974])
tensor(6293) tensor([ 97.6923, 126.0343, 138.9505, 149.2017])
tensor(6) tensor([111.0835, 145.0697, 161.5834, 172.4886])
tensor(9731) tensor([ 93.4820, 122.7396, 135.4609, 145.8431])
tensor(25386) tensor([ 98.3274, 133.7108, 144.0914, 159.7731])
tensor(615) tensor([100.3376, 129.5964, 145.1265, 154.4207])
tensor(6047) tensor([104.8359, 140.1143, 151.6188, 168.0882])
tensor(15671) tensor([ 92.4558, 121.4620, 135.7469, 144.8132])


 51%|█████     | 44/86 [00:01<00:01, 36.67it/s]

tensor(4016) tensor([ 92.4730, 126.2923, 139.1069, 152.1047])
tensor(16047) tensor([102.3482, 138.6501, 151.9633, 166.5702])
tensor(10774) tensor([109.5835, 142.1909, 158.7019, 169.0581])
tensor(213) tensor([105.7385, 140.2079, 157.0114, 168.2103])
tensor(15003) tensor([ 96.5297, 131.0375, 143.7894, 157.2905])
tensor(40201) tensor([ 85.3851, 114.8348, 124.2730, 136.9311])
tensor(1111) tensor([ 93.1914, 124.5922, 134.7867, 147.7395])
tensor(7068) tensor([ 89.2671, 117.1594, 131.5582, 140.6028])


 60%|██████    | 52/86 [00:01<00:00, 36.09it/s]

tensor(813) tensor([ 98.2098, 127.9973, 140.6530, 152.0562])
tensor(30102) tensor([104.7642, 139.0849, 149.0810, 164.5651])
tensor(34003) tensor([ 99.9776, 132.8760, 144.5134, 158.6061])
tensor(25680) tensor([ 98.0312, 128.7420, 142.8754, 153.9659])
tensor(4) tensor([102.4706, 131.6171, 145.1293, 155.6521])
tensor(0) tensor([106.2045, 138.3496, 149.8191, 163.0529])
tensor(4819) tensor([ 90.1949, 115.1445, 130.8206, 138.1930])
tensor(4138) tensor([ 94.9349, 123.3897, 135.2036, 147.4751])


 70%|██████▉   | 60/86 [00:01<00:00, 36.22it/s]

tensor(4) tensor([102.4706, 131.6171, 145.1293, 155.6521])
tensor(169) tensor([ 94.9581, 124.6580, 137.3971, 147.9001])
tensor(6293) tensor([ 97.6923, 126.0343, 138.9505, 149.2017])
tensor(23499) tensor([107.2231, 141.6014, 152.3703, 166.7341])
tensor(12297) tensor([ 94.9508, 125.1794, 140.0413, 149.8656])
tensor(18475) tensor([ 91.5642, 123.5812, 135.8500, 148.5611])
tensor(1873) tensor([ 97.9181, 128.8577, 142.7215, 153.8973])
tensor(10158) tensor([ 99.1023, 128.8680, 142.1537, 152.9218])


 79%|███████▉  | 68/86 [00:01<00:00, 36.83it/s]

tensor(6051) tensor([100.0227, 130.8968, 141.6629, 154.4566])
tensor(4491) tensor([106.1635, 134.8085, 153.1910, 161.4216])
tensor(0) tensor([ 94.4917, 120.6218, 133.6364, 142.3935])
tensor(183) tensor([112.1805, 144.8567, 161.7956, 172.1727])
tensor(34205) tensor([102.6337, 136.7654, 149.4545, 163.2578])
tensor(9463) tensor([ 88.7242, 118.2353, 128.5640, 140.4069])
tensor(828) tensor([ 93.3626, 122.7237, 138.4367, 147.7518])
tensor(10855) tensor([ 95.5941, 126.6404, 141.8030, 151.6874])
tensor(6665) tensor([ 95.3010, 126.2136, 137.3140, 149.9783])


 93%|█████████▎| 80/86 [00:02<00:00, 37.68it/s]

tensor(28748) tensor([ 98.3017, 130.5105, 144.3410, 156.3541])
tensor(0) tensor([107.9059, 140.3332, 156.3988, 167.0840])
tensor(196) tensor([ 96.3481, 125.0222, 140.1848, 148.7571])
tensor(133) tensor([ 98.5281, 127.6897, 142.9275, 152.2394])
tensor(1427) tensor([106.2618, 140.9383, 152.3415, 167.1160])
tensor(7310) tensor([ 94.2236, 126.1532, 139.2620, 152.4997])
tensor(32672) tensor([ 96.5761, 128.0743, 136.8374, 151.1428])
tensor(6594) tensor([103.5564, 136.0187, 151.4154, 162.8703])


100%|██████████| 86/86 [00:02<00:00, 36.52it/s]

tensor(12297) tensor([ 94.9508, 125.1794, 140.0413, 149.8656])
tensor(183) tensor([112.1805, 144.8567, 161.7956, 172.1727])
tensor(0) tensor([ 97.7727, 125.5524, 139.4683, 148.1193])
tensor(17805) tensor([100.1403, 128.1829, 142.8204, 152.5555])
tensor(150) tensor([ 98.4621, 128.7306, 142.6078, 153.3545])
tensor(6372) tensor([ 99.0179, 131.3127, 142.2078, 156.9124])
Accuracy: 0.0 %



