In [1]:
import cv2
import os
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import torchvision
from PIL import Image
import pandas as pd
import numpy as np

In [2]:
# Set base image directory
os.chdir("../")
root = os.getcwd()
base_imdir = os.path.join(root,"pipeline-check")
print("Root directory: {}".format(root))

Root directory: /home/noone/smartcatalonia-2021/lector-matricules


In [3]:
os.chdir(root)
!git clone https://github.com/ultralytics/yolov5  # clone repo
!git pull https://github.com/ultralytics/yolov5 # update repo
%cd yolov5
%pip install -qr requirements.txt  # install dependencies

fatal: destination path 'yolov5' already exists and is not an empty directory.
From https://github.com/ultralytics/yolov5
 * branch            HEAD       -> FETCH_HEAD
fatal: refusing to merge unrelated histories
/home/noone/smartcatalonia-2021/lector-matricules/yolov5
Note: you may need to restart the kernel to use updated packages.


In [4]:
# Install kaggle package for dataset downloading
!pip install kaggle --upgrade --quiet

In [5]:
os.chdir(root)

if not os.path.isdir("datasets"):
    os.mkdir("datasets")

os.chdir("datasets")

# Download kaggle dataset for license plate detection
if not os.path.isdir("car-plate-detection"):
    os.mkdir("car-plate-detection")
    os.chdir("car-plate-detection")
    !kaggle datasets download -d andrewmvd/car-plate-detection
    # Extract the files
    !unzip -q car-plate-detection.zip
    !rm car-plate-detection.zip

In [6]:
# Read xml file into a dictionary

# Skip if annotations.csv exists
if not os.path.isfile("annotations.csv"):

    # Set the working paths
    data_path = os.path.join(root,"datasets","car-plate-detection")
    img_path = os.path.join(data_path,"images")
    ann_path = os.path.join(data_path,"annotations")

    # Do a swipe and get all the xml files with annotations
    (_,_,xmlfiles) = next(os.walk(ann_path))

    os.chdir(ann_path)
    ann_dict = {"filename": [],
                "filepath": [],
                "xmin": [],
                "xmax": [],
                "ymin": [],
                "ymax": [],
                "height": [],
                "width":[],
                "x": [],
                "y": []
               }

    # Enter each xml file discovered, get some data and store it in the dictionary
    for xmlfile in xmlfiles:
        with open(xmlfile) as file:
            data = file.read()

            # String extraction of the important data
            filename = data.split("<filename>")[-1].split("</filename>")[0].split(".png")[0]
            filepath = os.path.join(img_path,filename)+".png"
            xmin = int( data.split("<xmin>")[-1].split("</xmin>")[0] )
            xmax = int( data.split("<xmax>")[-1].split("</xmax>")[0] )
            ymin = int( data.split("<ymin>")[-1].split("</ymin>")[0] )
            ymax = int( data.split("<ymax>")[-1].split("</ymax>")[0] ) 
            im_width = int( data.split("<width>")[-1].split("</width>")[0] )
            im_height = int( data.split("<height>")[-1].split("</height>")[0] )


            # Normalized center of the bounding box in both axis
            x = ((xmax-xmin)/2 + xmin) / im_width
            y = ((ymax-ymin)/2 + ymin) / im_height

            # Normalized height and width
            width = (xmax-xmin) / im_width
            height = (ymax-ymin) / im_height

            # Writing into dictionary
            ann_dict["filename"].append(filename)
            ann_dict["filepath"].append(filepath)
            ann_dict["xmin"].append(xmin)
            ann_dict["xmax"].append(xmax)
            ann_dict["ymin"].append(ymin)
            ann_dict["ymax"].append(ymax)
            ann_dict["x"].append(x)
            ann_dict["y"].append(y)
            ann_dict["width"].append(width)
            ann_dict["height"].append(height)
            
    # Convert dictionary to DF and export as CSV
    os.chdir(data_path)
    ann_df = pd.DataFrame.from_dict(ann_dict)
    ann_df.to_csv("annotations.csv",index=False)

In [7]:
label_df = pd.read_csv("annotations.csv",usecols=["filename","x","y","width","height"])

# Add the label column, not really useful in this 1 class detection.
label_df["label"] = "license plate"

# Encoding the classes as target numbers
label_df["class"] = label_df["label"]                       # Copy the class column into a new one called target
label_df["class"] = label_df["class"].astype("category")   # Change target column data type to categorical data
labels_mapping = dict( enumerate(label_df['class'].cat.categories)) # Make a dictionary of the assigned labels to each target
label_df["class"] = label_df["class"].cat.codes            # Assign a number to each category (one for each unique class)
label_df["class"] = label_df["class"].astype("object")     # Change target column data type to object (original type)

# Reorder columns
label_df = label_df[["filename","label","class","x","y","width","height"]]

label_df

Unnamed: 0,filename,label,class,x,y,width,height
0,Cars310,license plate,0,0.490833,0.740113,0.178333,0.090395
1,Cars378,license plate,0,0.465000,0.372964,0.340000,0.146580
2,Cars258,license plate,0,0.485000,0.781955,0.180000,0.180451
3,Cars33,license plate,0,0.500000,0.500000,0.530000,0.502222
4,Cars152,license plate,0,0.311563,0.903333,0.229122,0.086667
...,...,...,...,...,...,...,...
428,Cars269,license plate,0,0.321250,0.693333,0.092500,0.060000
429,Cars57,license plate,0,0.685615,0.737603,0.118329,0.136364
430,Cars249,license plate,0,0.817500,0.878543,0.135000,0.072874
431,Cars128,license plate,0,0.522500,0.528889,0.485000,0.480000


In [8]:
# Label creation
label_path = os.path.join(data_path,"labels")

if not os.path.isdir(label_path):
    os.mkdir(label_path)
    
os.chdir(label_path)

# We now have to create the label files in a yolo format, which is
# a txt file named as the image, containing these columns
# label x_center_normalized y_center_normalized width_normalized height_normalized


for i in range(len(label_df)):
    
    # Get the filename without the image extension and add the txt extension
    txt_file_name = str(label_df["filename"][i])+".txt"
    
    with open(txt_file_name,"w") as txtfile:
        txtfile.write(str(label_df["class"][i])+" "+str(label_df["x"][i])+" "+str(label_df["y"][i])+" "+str(label_df["width"][i])+" "+str(label_df["height"][i]))
        

In [9]:
# Split the data into train/val

os.chdir(data_path)

msk = np.random.rand(len(label_df)) < 0.8
train = label_df[msk].reset_index(drop = True)
train.to_csv(os.path.join(data_path, 'train_df.csv'),index = False)

val = label_df[~msk].reset_index(drop = True)
val.to_csv(os.path.join(data_path, 'val_df.csv'),index = False)


# Define and create folders for image training data
train_img_path = os.path.join(img_path,"train")
val_img_path = os.path.join(img_path,"val")

if not os.path.isdir(train_img_path):
    os.mkdir(train_img_path)
    
    # Move files from image folder to each training or validation folder
    for i in range(len(train)):
        filename = train["filename"][i]+".png"
        filepath0 = os.path.join(img_path,filename)
        filepath1 = os.path.join(train_img_path,filename)
        os.replace(filepath0,filepath1)
    
if not os.path.isdir(val_img_path):
    os.mkdir(val_img_path)
    
    # Move files from image folder to each training or validation folder
    for i in range(len(val)):
        filename = val["filename"][i]+".png"
        filepath0 = os.path.join(img_path,filename)
        filepath1 = os.path.join(val_img_path,filename)
        os.replace(filepath0,filepath1)
    
# Define and create folders for label training data
train_label_path = os.path.join(label_path,"train")
val_label_path = os.path.join(label_path,"val")

if not os.path.isdir(train_label_path):
    os.mkdir(train_label_path)
    
    # Move files from image folder to each training or validation folder
    for i in range(len(train)):
        filename = train["filename"][i]+".txt"
        filepath0 = os.path.join(label_path,filename)
        filepath1 = os.path.join(train_label_path,filename)
        os.replace(filepath0,filepath1)
    
if not os.path.isdir(val_label_path):
    os.mkdir(val_label_path)
    
    # Move files from image folder to each training or validation folder
    for i in range(len(val)):
        filename = val["filename"][i]+".txt"
        filepath0 = os.path.join(label_path,filename)
        filepath1 = os.path.join(val_label_path,filename)
        os.replace(filepath0,filepath1)

In [10]:
yolo_dir = os.path.join(root,"yolov5")
os.chdir(yolo_dir)

# Create dataset configuration YAML file
if not os.path.isfile("dataset.yaml"):
    with open("dataset.yaml","w") as file:
        file.write("train: "+train_img_path+"\n")
        file.write("val: "+val_img_path+"\n")
        file.write("nc: 1\n")
        file.write("names: [""license_plate""]")

In [11]:
# Start tensorboard (optional)
%load_ext tensorboard
%tensorboard --logdir runs/

!python train.py --img 640 --batch 20 --epochs 500 --data dataset.yaml --cfg models/yolov5s.yaml --name v2_simple_training --workers 16

YOLOv5 🚀 v5.0-14-g238583b torch 1.8.1+cu111 CUDA:0 (GeForce RTX 2070, 7982.3125MB)

Namespace(weights='yolov5s.pt', cfg='models/yolov5s.yaml', data='dataset.yaml', hyp='data/hyp.scratch.yaml', epochs=100, batch_size=4, img_size=[1024, 1024], rect=False, resume=False, nosave=False, notest=False, noautoanchor=False, evolve=False, bucket='', cache_images=False, image_weights=False, device='', multi_scale=False, single_cls=False, adam=False, sync_bn=False, local_rank=-1, workers=16, project='runs/train', entity=None, name='v1_pipeline_check', exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, artifact_alias='latest', save_dir='runs/train/v1_pipeline_check8', world_size=1, global_rank=-1, total_batch_size=4)
[34m[1mtensorboard: [0mStart with 'tensorboard --logdir runs/train', view at http://localhost:6006/
[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.2, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_m

In [18]:
!python detect.py --weights runs/train/v1_pipeline_check8/weights/last.pt --img 640 --source /home/noone/smartcatalonia-2021/lector-matricules/pipeline-check/cars.mp4

Namespace(weights=['runs/train/v1_pipeline_check8/weights/last.pt'], source='/home/noone/smartcatalonia-2021/lector-matricules/pipeline-check/cars.mp4', img_size=640, conf_thres=0.25, iou_thres=0.45, device='', view_img=False, save_txt=False, save_conf=False, nosave=False, classes=None, agnostic_nms=False, augment=False, update=False, project='runs/detect', name='exp', exist_ok=False)
YOLOv5 🚀 v5.0-14-g238583b torch 1.8.1+cu111 CUDA:0 (GeForce RTX 2070, 7982.3125MB)

Fusing layers... 
Model Summary: 224 layers, 7053910 parameters, 0 gradients, 16.3 GFLOPS
video 1/1 (1/3000) /home/noone/smartcatalonia-2021/lector-matricules/pipeline-check/cars.mp4: 384x640 Done. (0.017s)
video 1/1 (2/3000) /home/noone/smartcatalonia-2021/lector-matricules/pipeline-check/cars.mp4: 384x640 Done. (0.010s)
video 1/1 (3/3000) /home/noone/smartcatalonia-2021/lector-matricules/pipeline-check/cars.mp4: 384x640 Done. (0.010s)
video 1/1 (4/3000) /home/noone/smartcatalonia-2021/lector-matricules/pipeline-check/car