## Data Augementation

In [41]:
import os
import cv2
import glob
import random
from PIL import Image
from tqdm import tqdm
import numpy as np
import torchvision
import torchvision.transforms as transforms
from PIL import Image
import xml.etree.ElementTree as ET

class Data_Augmentor():
    IMAGE_PATH = "data/kaggle-dataset-433/train/images"
    IMAGE_OUTPUT_PATH = "data/kaggle-dataset-433/train/images-processed/"
    ANNOTATION_PATH = "data/kaggle-dataset-433/train/annotations"
    ANNOTATION_OUTPUT_PATH = "data/kaggle-dataset-433/train/annotations-processed/"
    def __init__ (self, IMAGE_PATH, ANNOTATION_PATH, OUTPUT_PATH):
        self.IMAGE_PATH = IMAGE_PATH
        self.IMAGE_OUTPUT_PATH = OUTPUT_PATH
        self.ANNOTATION_PATH = ANNOTATION_PATH
    
    def augment(self):
        transform = transforms.ColorJitter(brightness=(0.5,1.5),contrast=(1),saturation=(0.5,1.5),hue=(-0.1,0.1))
        transform1 = transforms.GaussianBlur(15)
        transform2 = transforms.RandomPerspective(.65)

        self.new_images = []
        self.new_annotations = []
        self.img_list = os.listdir(self.IMAGE_PATH)

        # delete all existing processed images
       # files = glob.glob(f'{self.IMAGE_OUTPUT_PATH}*')
       # print("Deleting Existing Files:")
       # for f in tqdm(files):
       #     os.remove(f)

        print("Augmenting Data:")
        for img in tqdm(self.img_list):
            img_path = os.path.join(self.IMAGE_PATH, img) #get image file path so we can load it with opencv
            annotation_path = os.path.join(self.ANNOTATION_PATH, img.replace('.png', '.xml')) # get required image annotations
            tree = ET.parse(annotation_path)
            
            img = Image.open(img_path)
            self.new_images.append(img) # add the existing image without modifications
            self.new_annotations.append(tree)

            img1 = transform(img)
            self.new_images.append(img1)
            self.new_annotations.append(tree)

            img2 = transform1(img)
            self.new_images.append(img2)
            self.new_annotations.append(tree)

            img3 = transform2(img)
            self.new_images.append(img3)
            self.new_annotations.append(tree)
        
        print(f"New Training Data Size: {len(self.new_images)}")
       # index = 0
       # print("Saving Files:")
       # for n in tqdm(self.new_images):
       #     # other things you need to do snipped
       #     n.save(f'{self.IMAGE_OUTPUT_PATH}Cars{index}.png')
       #     index += 1
        

data_aug = Data_Augmentor(IMAGE_PATH="data/kaggle-dataset-433/train/images", ANNOTATION_PATH="data/kaggle-dataset-433/train/annotations", OUTPUT_PATH="data/kaggle-dataset-433/train/images-processed/")      
data_aug.augment()  

Augmenting Data:


100%|██████████| 433/433 [00:20<00:00, 20.92it/s]

New Training Data Size: 1732





In [42]:
# preview first few images so we can make sure our data was processed correctly
for i in range(0, 8):
    img = data_aug.new_images[i]
    #x,y,x1,y1 = data_aug.new_annotations
    #cv2.rectangle(img, (int(x), int(y)), (int(x1), int(y1)), (255, 255, 255), 2)
    cv2.imshow(f"{i}", np.asarray(img))
    cv2.waitKey(0)
    cv2.destroyAllWindows()

## Training Dataset Preprocessing

In [30]:
import os
import cv2
import torch
import numpy as np
from tqdm import tqdm
import xml.etree.ElementTree as ET

class LPR_Training_Dataset_Processed():
    IMAGE_PATH = "data/kaggle-dataset-433/train/images-processed"
    ANNOTATION_PATH = "data/kaggle-dataset-433/train/annotations"
    TARGET_IMAGE_SIZE = 224

    training_data = []

    def create_training_data(self):
        self.img_list = os.listdir(self.IMAGE_PATH)
        for img in tqdm(self.img_list):
            img_path = os.path.join(self.IMAGE_PATH, img) #get image file path so we can load it with opencv
            annotation_path = os.path.join(self.ANNOTATION_PATH, img.replace('.png', '.xml')) # get required image annotations

            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) # read image as grayscale
            og_img_height, og_img_width = img.shape # store original shape of image so we can resize boudning box later

            img = cv2.resize(img, (self.TARGET_IMAGE_SIZE, self.TARGET_IMAGE_SIZE)) # resize image so they're all the same width and height

            # Parse the XML annotation file to extract bounding box coordinates
            root = ET.parse(annotation_path).getroot()
            
            # Iterate through the XML and extract bounding box coordinates
            for obj in root.findall('.//object'):
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
            
            # calculate new scale ratio
            x_scale = self.TARGET_IMAGE_SIZE / og_img_width 
            y_scale = self.TARGET_IMAGE_SIZE / og_img_height
            bounding_box_coordinates = (xmin * x_scale, ymin * y_scale, xmax * x_scale, ymax * y_scale) # resize bounding box to fit resized image
            
            self.training_data.append([np.array(img), bounding_box_coordinates])
            #self.training_data.append(target)

        np.random.shuffle(self.training_data)

training_dataset = LPR_Training_Dataset_Processed()
training_dataset.create_training_data()

0it [00:00, ?it/s]


In [2]:
# preview first few images so we can make sure our data was processed correctly
for i in range(0, 3):
    img = training_dataset.training_data[i][0]
    x,y,x1,y1 = training_dataset.training_data[i][1]
    cv2.rectangle(img, (int(x), int(y)), (int(x1), int(y1)), (255, 255, 255), 2)
    cv2.imshow(f"{i}", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

## Model Definition

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class LPR_Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)

        x = torch.randn(224, 224).view(-1, 1, 224, 224)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 4)
    
    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        if self._to_linear is None: # used to flatten it since pytorch doesn't have tensorflow's flatten function
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]

        return x
    
    def forward(self, x):
        x = self.convs(x) # pass through all convulutional layers
        x = x.view(-1, self._to_linear) # flatten it
        x = F.relu(self.fc1(x)) # pass through fully connected (dense) layer
        x = self.fc2(x)
        # return F.softmax(x, dim = 1) # renable this when we move to the gpu
        return x
    
net = LPR_Net()

## Get data and split between test and training data

In [4]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

X = torch.Tensor([i[0] for i in training_dataset.training_data]).view(-1, 224, 224) # image values
X = X / 255.0
y = torch.Tensor([i[1] for i in training_dataset.training_data]) # bounding box values

VAL_PCT = 0.2 # percent of data we want to use for testing vs training
val_size = int(len(X) * VAL_PCT)

# create test and training splits
train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]

print(len(train_X))
print(len(test_X))

347
86


  X = torch.Tensor([i[0] for i in training_dataset.training_data]).view(-1, 224, 224) # image values


## Train!

In [5]:
BATCH_SIZE = 100 # reduce if memory errors
EPOCHS = 1

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
        batch_X = train_X[i:i + BATCH_SIZE].view(-1, 1, 224, 224)
        batch_y = train_y[i:i + BATCH_SIZE]
        
        optimizer.zero_grad()
        outputs = net(batch_X)

        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()

print(loss)


100%|██████████| 4/4 [00:22<00:00,  5.63s/it]

tensor(9973.1279, grad_fn=<MseLossBackward0>)





In [6]:
correct = 0
total = 0

with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_bbox = torch.argmax(test_y[i])
        net_out = net(test_X[i].view(-1, 1, 224, 224))[0]
        predicted_bbox = torch.argmax(net_out)
        if predicted_bbox == real_bbox:
            correct+= 1
        total += 1
        #print(real_bbox, net_out)

print("Accuracy:", round((correct / total) * 100, 3), "%")

100%|██████████| 86/86 [00:02<00:00, 38.49it/s]

Accuracy: 54.651 %



