In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import glob
from PIL import Image
from bs4 import BeautifulSoup
import torch
import torchvision.models as models
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset

In [3]:
images_path = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector/images"
annots_path = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector/annotations"

In [4]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
print(device)

cuda:0


In [5]:
def createLabelFromXml(annot_path):
    fp = open(annot_path, "r")
    soup = BeautifulSoup(fp, "xml")
    
    width = int(soup.find('width').text)
    height = int(soup.find('height').text)

    xmin = int(soup.find('xmin').text)
    xmax = int(soup.find('xmax').text)
    ymin = int(soup.find('ymin').text)
    ymax = int(soup.find('ymax').text)

    xmin /= width
    xmax /= width
    ymin /= height
    ymax /= height
    
    box =  [xmin, xmax, ymin, ymax] # used as an input of the model
    
    return box

In [6]:
class LicensePlateDataset(Dataset):
    def __init__(self, mode='train', split_ratio=0.8, transform=None):
        self.mode = mode
        self.transform = transform
        self.images = glob.glob(f"{images_path}/*.png")
        self.length = len(self.images)
        self.split_ratio = split_ratio
        self.len_train = int(self.length * self.split_ratio)
        self.len_test = self.length - self.len_train
        
    def __getitem__(self, idx):
        if self.mode == 'train':
          image_path = f"{images_path}/Cars{idx}.png"
          annot_path = f"{annots_path}/Cars{idx}.xml"
        else:
          idx = self.len_train + idx
          image_path = f"{images_path}/Cars{idx}.png"
          annot_path = f"{annots_path}/Cars{idx}.xml"

        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        label = createLabelFromXml(annot_path)
        tLabel = torch.as_tensor(label, device=device)
        

        return image, tLabel

    def __len__(self):
        if self.mode == 'train':
          return self.len_train
        return self.len_test

In [7]:
transform = torchvision.transforms.Compose([
                  torchvision.transforms.Resize((224, 224)),
                  torchvision.transforms.ToTensor()
])

train_dataset = LicensePlateDataset(mode='train', split_ratio=0.8, transform=transform)
valid_dataset = LicensePlateDataset(mode='valid', split_ratio=0.2, transform=transform)

In [8]:
import torch.nn as nn
model = models.resnet101(pretrained=True)
model = nn.Sequential(
    model,
    nn.Linear(1000, 500),
    nn.ReLU(),
    nn.Linear(500, 250),
    nn.ReLU(),
    nn.Linear(250, 4),
    nn.Sigmoid(),
)
model.to(device)



Sequential(
  (0): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0)

In [9]:
learning_rate = 0.001
num_epochs = 100

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.LambdaLR(optimizer=optimizer,
                                        lr_lambda=lambda epoch: 0.95 ** epoch,
                                        last_epoch=-1,
                                        verbose=False)

In [10]:
model.train()

Sequential(
  (0): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0)

In [20]:
batch_size = 4

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

In [25]:
filepath = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector"
modelpath = "model"

model.load_state_dict(torch.load(f"{filepath}/{modelpath}/model_state_dict.pt"))
optimizer.load_state_dict(torch.load(f"{filepath}/{modelpath}/optim_state_dict.pt"))

In [33]:
import time

start = time.time()
count = 0
error = nn.MSELoss()

globalMinLoss = float('inf')
print("-------Running-------")
for epoch in range(num_epochs):
    for (images, labels) in train_loader:
        #print(f"({count}/{len(train_loader)})")
        images = images.to(device)

        optimizer.zero_grad()
        outputs = model(images.view(batch_size, 3, 224, 224))
        
        loss = error(outputs, labels)
        
        loss.backward()
        
        optimizer.step()

        count += 1
  
    with torch.no_grad():
          # predicted = torch.max(outputs.data, 1)[1]
          # valid_loss = error(outputs, labels)

          valid_loss = 0
          valid_count = 0
          # Iterate through test dataset
          for images, labels in valid_loader:
              images = images.to(device)
              # Forward propagation
              outputs = model(images.view(batch_size, 3, 224, 224))
              valid_loss += error(outputs, labels)
              valid_count += 1
              
          valid_loss /= valid_count
          if valid_loss < globalMinLoss:
              globelMinLoss = valid_loss
              best_model_state = model.state_dict()
              best_optim_state = optimizer.state_dict()
    print(f"""{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} || [{epoch}/{num_epochs}], train_loss = {loss.data}, valid_loss = {valid_loss.data}""")
print("-------End------")
elapsed = time.time() - start
print(f"End of training, elapsed time : {elapsed // 60} min {elapsed % 60} sec.")

-------Running-------
2022-12-27 07:35:30 || [0/100], train_loss = 0.053440824151039124, valid_loss = 0.024240726605057716
2022-12-27 07:35:50 || [1/100], train_loss = 0.008265111595392227, valid_loss = 0.02377413772046566
2022-12-27 07:36:09 || [2/100], train_loss = 0.029508762061595917, valid_loss = 0.02327939309179783
2022-12-27 07:36:28 || [3/100], train_loss = 0.035046063363552094, valid_loss = 0.022633709013462067
2022-12-27 07:36:47 || [4/100], train_loss = 0.03529733419418335, valid_loss = 0.022419214248657227
2022-12-27 07:37:07 || [5/100], train_loss = 0.010951386764645576, valid_loss = 0.021453382447361946
2022-12-27 07:37:26 || [6/100], train_loss = 0.010484341531991959, valid_loss = 0.02102936990559101
2022-12-27 07:37:46 || [7/100], train_loss = 0.01588531769812107, valid_loss = 0.020458292216062546
2022-12-27 07:38:06 || [8/100], train_loss = 0.02943352796137333, valid_loss = 0.019967271015048027
2022-12-27 07:38:26 || [9/100], train_loss = 0.016529332846403122, valid_lo

KeyboardInterrupt: ignored

In [34]:
filepath = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector"
modelpath = "model"
if best_model_state is not None and best_optim_state is not None:
    torch.save(best_model_state, f"{filepath}/{modelpath}/model_state_dict.pt")
    torch.save(best_optim_state, f"{filepath}/{modelpath}/optim_state_dict.pt")
    # model.load_state_dict(best_model_state)
    # torch.save(model, f"{filepath}/{experiment}/best_model.pt")
    print("Successfully saved.")

Successfully saved.


In [36]:
images_path = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector/images"
annots_path = "/content/drive/MyDrive/Colab Notebooks/2022/Car License Plate Detector/annotations"

idx = 100

image_path = f"{images_path}/Cars{idx}.png"
annot_path = f"{annots_path}/Cars{idx}.xml"

image = Image.open(image_path).convert("RGB")
image = transform(image).to(device)

label = createLabelFromXml(annot_path)

model.eval()

output = model(image.view(1, 3, 224, 224))
print(f"Predicted = {output}")
print(f"Label = {label}")

Predicted = tensor([[0.4078, 0.5211, 0.5940, 0.7356]], device='cuda:0',
       grad_fn=<SigmoidBackward0>)
Label = [0.4375, 0.535, 0.42696629213483145, 0.49063670411985016]
