In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import glob
from PIL import Image
from bs4 import BeautifulSoup
import torch
import torchvision.models as models
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset

In [None]:
images_path = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector/images"
annots_path = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector/annotations"

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
print(device)

cuda:0


In [None]:
def createLabelFromXml(annot_path):
    fp = open(annot_path, "r")
    soup = BeautifulSoup(fp, "xml")
    
    width = int(soup.find('width').text)
    height = int(soup.find('height').text)

    xmin = int(soup.find('xmin').text)
    xmax = int(soup.find('xmax').text)
    ymin = int(soup.find('ymin').text)
    ymax = int(soup.find('ymax').text)

    xmin /= width
    xmax /= width
    ymin /= height
    ymax /= height
    
    box =  [xmin, xmax, ymin, ymax] # used as an input of the model
    
    return box

In [None]:
class LicensePlateDataset(Dataset):
    def __init__(self, mode='train', split_ratio=0.8, transform=None):
        self.mode = mode
        self.transform = transform
        self.images = glob.glob(f"{images_path}/*.png")
        self.length = len(self.images)
        self.split_ratio = split_ratio
        self.len_train = int(self.length * self.split_ratio)
        self.len_test = self.length - self.len_train
        
    def __getitem__(self, idx):
        if self.mode == 'train':
          image_path = f"{images_path}/Cars{idx}.png"
          annot_path = f"{annots_path}/Cars{idx}.xml"
        else:
          idx = self.len_train + idx
          image_path = f"{images_path}/Cars{idx}.png"
          annot_path = f"{annots_path}/Cars{idx}.xml"

        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        label = createLabelFromXml(annot_path)
        tLabel = torch.as_tensor(label, device=device)
        

        return image, tLabel

    def __len__(self):
        if self.mode == 'train':
          return self.len_train
        return self.len_test

In [None]:
transform = torchvision.transforms.Compose([
                  torchvision.transforms.Resize((224, 224)),
                  torchvision.transforms.ToTensor()
])

train_dataset = LicensePlateDataset(mode='train', split_ratio=0.9, transform=transform)
valid_dataset = LicensePlateDataset(mode='valid', split_ratio=0.1, transform=transform)

In [None]:
import torch.nn as nn
model = models.resnet101(pretrained=True)
model = nn.Sequential(
    model,
    nn.Linear(1000, 500),
    nn.ReLU(),
    nn.Linear(500, 250),
    nn.ReLU(),
    nn.Linear(250, 4),
    nn.Sigmoid(),
)
model.to(device)

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth


  0%|          | 0.00/171M [00:00<?, ?B/s]

Sequential(
  (0): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0)

In [None]:
learning_rate = 0.001
num_epochs = 1000

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.LambdaLR(optimizer=optimizer,
                                        lr_lambda=lambda epoch: 0.95 ** epoch,
                                        last_epoch=-1,
                                        verbose=False)

In [None]:
batch_size = 4

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

In [None]:
filepath = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector"
modelpath = "model"

model.load_state_dict(torch.load(f"{filepath}/{modelpath}/model_state_dict.pt"))
optimizer.load_state_dict(torch.load(f"{filepath}/{modelpath}/optim_state_dict.pt"))

In [None]:


import time

start = time.time()
count = 0
error = nn.MSELoss()

globalMinLoss = float('inf')
model.train()
print("-------Running-------")
for epoch in range(num_epochs):

    for (images, labels) in train_loader:
        #print(f"({count}/{len(train_loader)})")
        images = images.to(device)

        optimizer.zero_grad()
        outputs = model(images.view(batch_size, 3, 224, 224))
        
        loss = error(outputs, labels)
        
        loss.backward()
        
        optimizer.step()

        count += 1
  
    if count % 10 == 0:
        with torch.no_grad():
              # predicted = torch.max(outputs.data, 1)[1]
            # valid_loss = error(outputs, labels)

              valid_loss = 0
              valid_count = 0
              # Iterate through test dataset
              for images, labels in valid_loader:
                  images = images.to(device)
                  # Forward propagation
                  outputs = model(images.view(batch_size, 3, 224, 224))
                  valid_loss += error(outputs, labels)
                  valid_count += 1
                  
              valid_loss /= valid_count
              if valid_loss < globalMinLoss:
                  globelMinLoss = valid_loss
                  best_model_state = model.state_dict()
                  best_optim_state = optimizer.state_dict()
                  torch.save(best_model_state, f"{filepath}/{modelpath}/model_state_dict.pt")
                  torch.save(best_optim_state, f"{filepath}/{modelpath}/optim_state_dict.pt")
                  print("Successfully saved.")
        

        print(f"""{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} || [{epoch}/{num_epochs}], train_loss = {loss.data}, valid_loss = {valid_loss.data}""")
print("-------End------")
elapsed = time.time() - start
print(f"End of training, elapsed time : {elapsed // 60} min {elapsed % 60} sec.")

In [None]:
filepath = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector"
modelpath = "model"
if best_model_state is not None and best_optim_state is not None:
    torch.save(best_model_state, f"{filepath}/{modelpath}/model_state_dict.pt")
    torch.save(best_optim_state, f"{filepath}/{modelpath}/optim_state_dict.pt")
    print("Successfully saved.")

Successfully saved.


In [None]:
def customEval(idx):
    
    images_path = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector/images"
    annots_path = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector/annotations"

    image_path = f"{images_path}/Cars{idx}.png"
    annot_path = f"{annots_path}/Cars{idx}.xml"

    image = Image.open(image_path)
    imageTensor = image.convert("RGB")
    imageTensor = transform(imageTensor).to(device)

    label = createLabelFromXml(annot_path)

    model.eval()

    output = model(imageTensor.view(1, 3, 224, 224))
    return output.tolist()[0], label, image_path

In [None]:
import numpy as np
import cv2
from google.colab.patches import cv2_imshow



[xmin_pred, xmax_pred, ymin_pred, ymax_pred], label, _ = customEval(6)

img_org = cv2.imread(image_path)
img = img_org.copy()
col, row, _ = img.shape

print(f"Image shape is {col, row}")

xmin_pred = int(row * xmin_pred)
xmax_pred = int(row * xmax_pred)
ymin_pred = int(col * ymin_pred)
ymax_pred = int(col * ymax_pred)

img[ymin_pred, xmin_pred:xmax_pred, :] = [255, 255, 0]
img[ymax_pred, xmin_pred:xmax_pred, :] = [255, 255, 0]
img[ymin_pred:ymax_pred, xmin_pred, :] = [255, 255, 0]
img[ymin_pred:ymax_pred, xmax_pred, :] = [255, 255, 0]
cv2_imshow(img)

NameError: ignored

In [None]:
!sudo apt install tesseract-ocr
!pip install pytesseract

In [None]:
import pytesseract as pt

In [None]:
idx = 4
output, label, image_path = customEval(idx)
image = cv2.imread(image_path)
col, row, _ = image.shape
xmin = int(output[0]*row)
xmax = int(output[1]*row)
ymin = int(output[2]*col)
ymax = int(output[3]*col)

plateImage = image[ymin:ymax, xmin:xmax, :]
cv2_imshow(plateImage)

In [None]:
text = pt.image_to_string(plateImage)
text

NameError: ignored

In [None]:
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

def canny(image):
    return cv2.Canny(image, 100, 200)

In [None]:
gray = get_grayscale(plateImage)
thresh = thresholding(gray)
opening = opening(gray)
canny = canny(gray)
cv2_imshow(canny)

NameError: ignored

In [None]:
text = pt.image_to_string(canny)
text

NameError: ignored