## Training YOLOv5 on dataset of 440 images containing license plates.

#### We will get the best and last weights after training the cloned model on the dataset over 100 epochs and store them in the working directory for further use.

### 1. Importing Required Libraries

In [1]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
import os

# Dealing with XML files containing anchor points for respective images
import xml.etree.ElementTree as ET

# Copying,Pasting and Opening files
import shutil
from glob import glob

# For progress bar in loops
from tqdm import tqdm

# Dealing with images
import matplotlib.pyplot as plt
import cv2

# For training and importing dataset as well as model
import yaml
import torch

In [2]:
# Creating arrays for storing file names/paths (strings) and dictionaries to store size and bounding box proportions.
filenames = []

size_props = {
    'height':[],
    'width':[]
}

bounding_box_props = {
    'xmin':[],
    'ymin':[],
    'xmax':[],
    'ymax':[]
}

### 2. Extracting Bounding Box and Size parameters from XML file

In [3]:
annotations_path = '../input/car-plate-detection/annotations'

for file in tqdm(os.listdir(annotations_path)):
    # Parsing the XML files from annotations folder
    annotation = ET.parse(os.path.join(annotations_path, file))
    
    # Appending specific filenames to the array
    filenames.append(os.path.join(annotations_path, file))
    size = annotation.find('size')
    
    # Appending bounding box proportion values to the arrays in dictionary
    for name, prop_list in size_props.items():
        prop_value = size.find(name).text
        size_props[name].append(int(prop_value))
    bounding_box = annotation.find('object').find('bndbox')
    for name, prop_list in bounding_box_props.items():
        prop_value = bounding_box.find(name).text
        bounding_box_props[name].append(int(prop_value))

100%|██████████| 433/433 [00:01<00:00, 285.95it/s]


### 3. Creating original dataset with extracted values 

In [4]:
df = pd.DataFrame({
    'file':filenames,
    'width':size_props['width'],
    'height':size_props['height'],
    'xmin':bounding_box_props['xmin'],
    'ymin':bounding_box_props['ymin'],
    'xmax':bounding_box_props['xmax'],
    'ymax':bounding_box_props['ymax']
})

In [5]:
df.head()

Unnamed: 0,file,width,height,xmin,ymin,xmax,ymax
0,../input/car-plate-detection/annotations/Cars3...,500,300,209,135,283,169
1,../input/car-plate-detection/annotations/Cars1...,400,268,191,147,242,169
2,../input/car-plate-detection/annotations/Cars7...,400,267,115,115,277,153
3,../input/car-plate-detection/annotations/Cars1...,400,221,36,175,62,186
4,../input/car-plate-detection/annotations/Cars2...,517,303,71,205,215,246


### 4. Creating new parameters 'Center X and Y' and 'BoundingBox Height and Width'

#### Center X and Y : Centre coordinates of the true bounding boxes/anchors of the images.
#### BB Height and Width : Height and width of bounding boxes.

In [6]:
df['center_x'] = (df['xmax'] + df['xmin'])/(2*df['width'])
df['center_y'] = (df['ymax'] + df['ymin'])/(2*df['height'])

df['bb_width'] = (df['xmax'] - df['xmin'])/df['width']
df['bb_height'] = (df['ymax'] - df['ymin'])/df['height']

In [7]:
df.head()

Unnamed: 0,file,width,height,xmin,ymin,xmax,ymax,center_x,center_y,bb_width,bb_height
0,../input/car-plate-detection/annotations/Cars3...,500,300,209,135,283,169,0.492,0.506667,0.148,0.113333
1,../input/car-plate-detection/annotations/Cars1...,400,268,191,147,242,169,0.54125,0.589552,0.1275,0.08209
2,../input/car-plate-detection/annotations/Cars7...,400,267,115,115,277,153,0.49,0.501873,0.405,0.142322
3,../input/car-plate-detection/annotations/Cars1...,400,221,36,175,62,186,0.1225,0.816742,0.065,0.049774
4,../input/car-plate-detection/annotations/Cars2...,517,303,71,205,215,246,0.276596,0.744224,0.27853,0.135314


### 5. Creating Training and Test datasets from df

In [8]:
yolo_df = df[['file', 'center_x', 'center_y', 'bb_width', 'bb_height']]
df_train, df_test = train_test_split(yolo_df, test_size=0.2)

In [9]:
train_path = os.path.join('Images', 'train')
test_path = os.path.join('Images', 'test')
images_path = '../input/car-plate-detection/images'

if not os.path.exists(train_path):
    os.makedirs(train_path)
    print('Made folder for train set')
if not os.path.exists(test_path):
    os.makedirs(test_path)
    print('Made folder for test set')

Made folder for train set
Made folder for test set


### 6. Copying and Moving required files from dataset.

#### Moving images from folder in input directory to the working directory alongwith text files containing the annotations for the images.

In [10]:
for _, row  in tqdm(df_train.iterrows()):
    annotation_path = row['file']
    image_name = os.path.split(annotation_path)[-1].replace('.xml','')
    image_src = os.path.join(images_path, f'{image_name}.png')
    image_dst = os.path.join(train_path, f'{image_name}.png')
    shutil.copy2(image_src, image_dst)
    label_text = f"0 {row['center_x']} {row['center_y']} {row['bb_width']} {row['bb_height']}"
    with open(os.path.join(train_path, f'{image_name}.txt'), 'w') as f:
        f.write(label_text)

346it [00:04, 74.88it/s]


In [11]:
for _, row  in tqdm(df_test.iterrows()):
    annotation_path = row['file']
    image_name = os.path.split(annotation_path)[-1].replace('.xml','')
    image_src = os.path.join(images_path, f'{image_name}.png')
    image_dst = os.path.join(test_path, f'{image_name}.png')
    shutil.copy2(image_src, image_dst)
    label_text = f"0 {row['center_x']} {row['center_y']} {row['bb_width']} {row['bb_height']}"
    with open(os.path.join(test_path, f'{image_name}.txt'), 'w') as f:
        f.write(label_text)

87it [00:00, 96.20it/s]


### 7. Cloning YOLOv5 repo from Ultralytics using shell command

In [12]:
! git clone https://github.com/ultralytics/yolov5.git

Cloning into 'yolov5'...
remote: Enumerating objects: 13071, done.[K
remote: Counting objects: 100% (246/246), done.[K
remote: Compressing objects: 100% (123/123), done.[K
remote: Total 13071 (delta 154), reused 205 (delta 123), pack-reused 12825[K
Receiving objects: 100% (13071/13071), 12.41 MiB | 16.40 MiB/s, done.
Resolving deltas: 100% (8982/8982), done.


### 8. Installing requirements from requirements.txt

In [13]:
! pip install -r yolov5/requirements.txt

Collecting thop>=0.1.1
  Downloading thop-0.1.1.post2207130030-py3-none-any.whl (15 kB)
Installing collected packages: thop
Successfully installed thop-0.1.1.post2207130030
[0m

### 9. Creating dictionary from Training dataset and writing it to yaml format.

In [14]:
data = {
    'names':['License Plate'],
    'nc':1,
    'train':os.path.abspath(train_path),
    'val':os.path.abspath(test_path)
}

with open('data.yaml', 'w') as f:
    yaml.dump(data, f)

### 10. Training pretrained model on dataset suing BGD for 100 epochs

In [15]:
!python ./yolov5/train.py --data ./data.yaml  --batch-size 8  --epochs 100 --weights yolov5/yolov5s.pt

[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5/yolov5s.pt, cfg=, data=./data.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=100, batch_size=8, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.1-316-g916bdb1 Pytho

### 11. Conclusion

#### Final Precision = 0.916
#### Final Recall = 0.92
#### Final MAP = 0.918

#### Best MAP = 0.936 (Epoch 41)
#### Best Precision = 0.931 (Epoch 41)
#### Best Recall = 0.909 (Epoch 28)