# Custom Yolo Object Detector

This notebook allow you to create a yolo5 classifier. A virtual environment is required to simplify the creation. 

## Virtual environnement 
Creation virtual environnement and install package to use jupyter notebook:
```console
python -m venv env
source ./env/bin/activate
python -m pip install --upgrade pip
pip install ipykernel
python -m ipykernel install --user --name=env
```

Then one the jupyter notebook select the correct kernel.


## Create Folders

In [1]:
import os
DIR_PATH = os.getcwd()

In [2]:
paths = {
    'DATA_PATH' : os.path.join(DIR_PATH,'data'),
    'DOWNLOADED_IMAGES' : os.path.join(DIR_PATH,'data','images','download'),
    'IMAGES_PATH' : os.path.join(DIR_PATH,'data', 'images'),
    'LABELIMG_PATH' : os.path.join(DIR_PATH, 'labelimg'),
    'MODELS_PATHS' : os.path.join(DIR_PATH,'data','models'),
    'TRAIN_PATH' : os.path.join(DIR_PATH,'data','train'),
    'TRAIN_IMAGES_PATH' : os.path.join(DIR_PATH,'data','train','images'),
    'TRAIN_LABELS_PATH' : os.path.join(DIR_PATH,'data','train','labels'),
    'TEST_PATH' : os.path.join(DIR_PATH,'data','test'),
    'TEST_IMAGES_PATH' : os.path.join(DIR_PATH,'data','test','images'),
    'TEST_LABELS_PATH' : os.path.join(DIR_PATH,'data','test','labels'),
    'YOLO5_PATH' : os.path.join(DIR_PATH,'yolov5'),
}

In [3]:
files = {
    'CASCADE_TXT' : os.path.join(paths['DATA_PATH'],'cascade.txt'),
    'CLASSES_TXT' : os.path.join(paths['IMAGES_PATH'],'classes.txt'),
    'DATASET_YAML' : os.path.join(paths['DATA_PATH'],'dataset.yaml'),
    'EXPORT_PY' : os.path.join(paths['YOLO5_PATH'],'export.py'),    
    'FILTER_PY' : os.path.join(DIR_PATH,'filter.py'),
    'LABELIMG_PY' : os.path.join(paths['LABELIMG_PATH'],'labelImg.py'),
    'PREDIFINED_CLASSES_TXT' : os.path.join(paths['LABELIMG_PATH'],'data','predifined_classes.txt'),
    'TRAIN_PY' : os.path.join(paths['YOLO5_PATH'],'train.py'),
    'VALIDATE_PY' : os.path.join(paths['YOLO5_PATH'],'val.py'),
    'YOLON_PT' : os.path.join(paths['YOLO5_PATH'],'yolov5n.pt'),
    'YOLOS_PT' : os.path.join(paths['YOLO5_PATH'],'yolov5s.pt')
}

In [4]:
for path in paths.values():
    if not os.path.exists(path):
        if os.name == 'posix':
            !mkdir -p {path}
        if os.name == 'nt':
            !mkdir {path}

## Install Dependencies

See pytorch [page](https://pytorch.org/get-started/locally/) to download the correct version with or without gpu.

In [None]:
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
!pip install protobuf==3.19.6 opencv-python pyyaml comet_ml bs4 requests matplotlib
!pip install pyqt5 lxml --upgrade

Here we download the yolo5 repository. Only the 5s model is there but you can see the [github](https://github.com/ultralytics/yolov5) and download more model.

In [None]:
if not os.listdir(paths['YOLO5_PATH']):
    !git clone https://github.com/ultralytics/yolov5 {paths['YOLO5_PATH']}
    !cd {paths['YOLO5_PATH']} && pip install -r requirements.txt

In [17]:
if not os.listdir(paths['LABELIMG_PATH']):
    !git clone https://github.com/tzutalin/labelImg {paths['LABELIMG_PATH']}
    !cd {paths['LABELIMG_PATH']} && pyrcc5 -o libs/resources.py resources.qrc

## Import Packages

In [None]:
import shutil
import time

import numpy as np
import random
from matplotlib import pyplot as plt

import requests 
from bs4 import *

import torch
import cv2

import yaml
import contextlib

## Download Images from URL

See this [link](https://www.123rf.com/) from where you can download images

In [34]:
def download_images(images, folder_name):
    print(f"Total {len(images)} Image Found!")
    if len(images) != 0:
        count = 0
        for image in images:
            try:
                image_link = image["data-srcset"]
            except Exception:
                try:
                    image_link = image["data-src"]
                except Exception:
                    try:
                        image_link = image["data-fallback-src"]
                    except Exception:
                        with contextlib.suppress(Exception):
                            image_link = image["src"]

            with contextlib.suppress(Exception):
                r = requests.get(image_link).content
                try:
                    r = str(r, 'utf-8')
                except UnicodeDecodeError:
                    with open(f'{folder_name}/{time.strftime("%Y%m%d_%H%M%S")}.jpg', "wb+") as f:
                        f.write(r)
                    count += 1
        print(f"Total {count} Images Downloaded Out of {len(images)}")

In [65]:
url = ''

In [None]:
response = requests.get(url)
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    images = soup.findAll('img')
    download_images(images,folder_name=os.path.join(paths['IMAGES_PATH'],'URL'))
else :
    print('URL not valide')

## Filter Downloaded Images

Images are downloaded from internet, it might be wrong images. Launch the next command (on a terminal) and you can select between tomato, background or deleting the image. It will go throw all the images downloaded and move the one you want to keep in the images folder.

If you want to use it for other label, you need to modify  the [filter](./filter.py) file. 

In the images folder, you image need to be name **label.img_name.jpg**

In [None]:
command = f'python {files["FILTER_PY"]} -i {paths["DOWNLOADED_IMAGES"]} -s {paths["IMAGES_PATH"]}'
print(command)

## Resize image

In [32]:
def resize_image(img_path,dimension):
    img = cv2.imread(img_path)
    img = cv2.resize(img,dimension,cv2.INTER_AREA)
    cv2.imwrite(img_path,img)


In [8]:
dimension = (416,416)
folder = paths['TEST_IMAGES_PATH']

In [37]:
for file in os.listdir(folder):
    if file.endswith('.jpg'):
        img_path = os.path.join(folder,file)
        resize_image(img_path,dimension)

## Label Images

Check that format is YOLO when you labelize your images

In [38]:
if os.path.exists(files['CLASSES_TXT']):
    classes_file_path = files['CLASSES_TXT']
else :
    classes_file_path = files['PREDIFINED_CLASSES_TXT']


In [None]:
command = f'python {files["LABELIMG_PY"]} {paths["IMAGES_PATH"]} {classes_file_path} {paths["IMAGES_PATH"]}'
print(command)

Transfert images in train and test folders

In [8]:
def round_up(n):
    return int(n)+bool(n%1)

In [88]:
test_size = 0.2

In [89]:
list_files = os.listdir(paths['IMAGES_PATH'])
try:
    list_files.remove('classes.txt')
except Exception:
    print('No classes defined. Please label images')
list_names = [file.split('.') for file in list_files if file.endswith('txt')]
list_names = {f'{name[0]}.{name[1]}' for name in list_names}
test =  set(random.sample(list_names, round_up(test_size*len(list_names))))
train = list_names - test

In [90]:
for file_name in list(train) :
    src_path = os.path.join(paths['IMAGES_PATH'],f'{file_name}.txt')
    dst_path = os.path.join(paths['TRAIN_LABELS_PATH'],f'{file_name}.txt')
    shutil.move(src_path, dst_path)
    src_path = os.path.join(paths['IMAGES_PATH'],f'{file_name}.jpg')
    dst_path = os.path.join(paths['TRAIN_IMAGES_PATH'],f'{file_name}.jpg')
    shutil.move(src_path, dst_path)

In [91]:
for file_name in list(test) :
    src_path = os.path.join(paths['IMAGES_PATH'],f'{file_name}.txt')
    dst_path = os.path.join(paths['TEST_LABELS_PATH'],f'{file_name}.txt')
    shutil.move(src_path, dst_path)
    src_path = os.path.join(paths['IMAGES_PATH'],f'{file_name}.jpg')
    dst_path = os.path.join(paths['TEST_IMAGES_PATH'],f'{file_name}.jpg')
    shutil.move(src_path, dst_path)

No backgroung (image without label) have been added but it can be done manually

## Images Augmentation

In case you dataset is not big enough, you can increase it by : 
- flip some images horizontally or vertically if possible
- rescale and crop image to keep the define size

Here, only horizontally flipping has been implemented. The label file is also generated from the one of the image.

In [40]:
flipped_proba = 0.4

In [41]:
list_files = os.listdir(paths['TRAIN_IMAGES_PATH'])
list_names = [file.split('.') for file in list_files if 'tomato' in file]
list_names = {f'{name[0]}.{name[1]}' for name in list_names}
flipped =  set(random.sample(list_names, round_up(flipped_proba*len(list_names)))))
unmove = list_names-flipped

In [None]:
for img_name in list(flipped):

    img_path = os.path.join(paths['TRAIN_IMAGES_PATH'],f'{img_name}.jpg')
    label_path = os.path.join(paths['TRAIN_LABELS_PATH'],f'{img_name}.txt')
    img_flip_path = os.path.join(paths['TRAIN_IMAGES_PATH'],'output',f'{img_name}_flipped.jpg')
    label_flip_path = os.path.join(paths['TRAIN_LABELS_PATH'],'output',f'{img_name}_flipped.txt')

    img = cv2.imread(img_path)
    img_flip = cv2.flip(img, 1)

    with open(label_path) as f:
        labels = [line.rstrip() for line in f]
    with open(label_flip_path,'w') as f:
        for line in labels:
            line_split = line.split(' ')
            line_split[1] = str(1-float(line_split[1]))
            line = " ".join(line_split)

            f.write(line)
            f.write('\n')
    cv2.imwrite(img_flip_path, img_flip)
    print(f'Image {img_path} flipped into {img_name}_flipped')
    time.sleep(1)


## Write Dataset File

In [14]:
with open(files['CLASSES_TXT']) as f:
    labels = [line.rstrip() for line in f]

In [15]:
dataset_yaml = {
    'path' : paths['DATA_PATH'],
    'train' : os.path.join('train','images'),
    'val' : os.path.join('test','images'),
    'test' : os.path.join('test','images'),
    'names' : {i:label for i,label in enumerate(labels)}
}

In [16]:
with open(files['DATASET_YAML'], 'w') as file:
    documents = yaml.dump(dataset_yaml, file)

## Train Model

It is better to run the training command in a terminal to see the output but last case will print the command and you just have to copy paste.

In [9]:
img_size = 416
batch = 16
epochs = 350
workers = 2
yolo_model_type = 'n' #n or s
use_previous_model = False

In [18]:
# select model from where we start (if use_previous model)
model_number = 0
model_path = os.path.join(paths['MODELS_PATHS'],'train',f'model_{model_number}')

In [46]:
if use_previous_model:
    pt_model_path = os.path.join(model_path,'weights','best.pt')
elif yolo_model_type == 'n':
    pt_model_path = files['YOLON_PT']
elif yolo_model_type == 's':
    pt_model_path = files['YOLOS_PT']

In [47]:
# set new model path
model_number = max(int(name[-1]) for name in os.listdir(os.path.join(paths['MODELS_PATHS'],'train')))+1
new_model_path = os.path.join(paths['MODELS_PATHS'],'train',f'model_{model_number}')

In [None]:
print(pt_model_path)
print(new_model_path)

In [None]:
command = f'python {files["TRAIN_PY"]} --img {img_size} --batch {batch} --epochs {epochs} --data {files["DATASET_YAML"]} --weights {pt_model_path} --workers {workers} --project {paths["MODELS_PATHS"]} --name {new_model_path}'
print(command)

## Validate Model

In [7]:
model_number = 0

In [8]:
test_model_path = os.path.join(paths['MODELS_PATHS'],'test',f'model_{model_number}')
weigths_path =  os.path.join(paths['MODELS_PATHS'],'train',f'model_{model_number}','weights','best.pt')

In [None]:
command = f'python {files["VALIDATE_PY"]} --weights {weigths_path} --data {files["DATASET_YAML"]} --img {img_size} --project {paths["MODELS_PATHS"]} --name {test_model_path}'
print(command)

In [None]:
!{command}

## Test Custom Model

In [None]:
!tensorboard --logdir {new_model_path}

In [11]:
model_number = 0
if os.listdir(os.path.join(paths['MODELS_PATHS'],'train')):
    model_number = max(int(name[-1]) for name in os.listdir(os.path.join(paths['MODELS_PATHS'],'train')))

In [10]:
model_number = 10

In [11]:
model_path = os.path.join(paths['MODELS_PATHS'],'train',f'model_{model_number}')
weigths_path = os.path.join(model_path,'weights','best.pt')

In [None]:
print(model_path)
print(weigths_path)

In [None]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path=weigths_path, force_reload=True)

In [15]:
model.conf = 0.5

In [None]:
for image in os.listdir(paths['TEST_IMAGES_PATH']):
    results = model(os.path.join(paths['TEST_IMAGES_PATH'],image))
    results.print()
    %matplotlib inline 
    plt.imshow(np.squeeze(results.render()))
    plt.show()

## Live Detection

In [16]:
cap = cv2.VideoCapture(0)

prev_frame_time = 0
new_frame_time = 0

while cap.isOpened():
    ret, frame = cap.read()
    frame = cv2.resize(frame,dimension)
    # Make detections 
    results = model(frame)
    results_frame = np.squeeze(results.render())

    new_frame_time = time.time()
    fps = round(1/(new_frame_time-prev_frame_time),2)
    prev_frame_time = new_frame_time
    cv2.putText(results_frame, f'FPS : {fps}Hz', (2, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (100, 255, 0), 1, cv2.LINE_AA)
    cv2.imshow('YOLO', results_frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

## Yolo Label To Cascade Label

In [19]:
def yolo2cascade(cascade_file,img_path,label_path):
    for file in os.listdir(label_path):
        if file not in ['classes.txt', 'output']:

            # get path image
            jpg = file.split('.')
            jpg = os.path.join(img_path,f'{jpg[0]}.{jpg[1]}.jpg')

            # get yolo txt and image
            with open(os.path.join(label_path,file)) as yolo_file:
                yolo_format = [line.rstrip() for line in yolo_file]
            yolo_format = [tomato.split(' ') for tomato in yolo_format]
            img = cv2.imread(jpg)
            img_shape = img.shape

            # yolo format to cascade format
            cascade_format = []
            for tomato in yolo_format:
                x_c,y_c = int(float(tomato[1])*img_shape[1]),int(float(tomato[2])*img_shape[0])
                w,h = int(float(tomato[3])*img_shape[1]),int(float(tomato[4])*img_shape[0])
                x,y = int(x_c-w/2), int(y_c-h/2)
                cascade_format.append([str(x),str(y),str(w),str(h)])

            # write in text file
            line = [file]
            for tomato in cascade_format:
                line.extend(tomato)
            line = " ".join(line)
            cascade_file.write(line)
            cascade_file.write('\n')

In [20]:
with open(files['CASCADE_TXT'],'w') as cascade_file:
    yolo2cascade(cascade_file,paths['TRAIN_IMAGES_PATH'],paths['TRAIN_LABELS_PATH'])

In [21]:
with open('./data/cascade_test.txt','w') as cascade_file:
    yolo2cascade(cascade_file,paths['TEST_IMAGES_PATH'],paths['TEST_LABELS_PATH'])

## Stat label size

In [22]:
with open(files['CASCADE_TXT']) as f:
    cascade_file = [line.rstrip() for line in f]
with open('./data/cascade_test.txt') as f:
    cascade_file += [line.rstrip() for line in f]
tomatoes,x_distrib,y_distrib,w_distrib,h_distrib = [],[],[],[],[]
for line in cascade_file:
    tomato_list = line.split(' ')[1:] 
    tomato_list = [tomato_list[4*i:4*(i+1)] for i in range(len(tomato_list)//4)]
    for tomato in tomato_list : 
        x_distrib.append(int(tomato[0]))
        y_distrib.append(int(tomato[1]))
        w_distrib.append(int(tomato[2]))
        h_distrib.append(int(tomato[3]))
    

In [None]:
figure, axis = plt.subplots(2, 2, figsize=(12, 7))
figure.suptitle('Label distribution', fontsize=16)

# For Sine Function
axis[0, 0].hist(x_distrib, bins = 50,color = 'blue', edgecolor = 'black')
axis[0, 0].set_title("X center",size=10)
axis[0, 0].set_xlabel("x (pixel)")
axis[0, 0].set_ylabel("count")

# For Cosine Function
axis[0, 1].hist(y_distrib, bins = 50,color = 'blue', edgecolor = 'black')
axis[0, 1].set_title("Y center",size=10)
axis[0, 1].set_xlabel("y (pixel)")
axis[0, 1].set_ylabel("count")

# For Tangent Function
axis[1, 0].hist(w_distrib, bins = 100,color = 'blue', edgecolor = 'black')
axis[1, 0].set_title("Width",size=10)
axis[1, 0].set_xlabel("w (pixel)")
axis[1, 0].set_ylabel("count")

# For Tanh Function
axis[1, 1].hist(h_distrib, bins = 100,color = 'blue', edgecolor = 'black')
axis[1, 1].set_title("Height",size=10)
axis[1, 1].set_xlabel("h (pixel)")
axis[1, 1].set_ylabel("count")

# Combine all the operations and display
figure.tight_layout()
plt.show()

In [24]:
def filter_array(array,offset):
  filter_array = []
  # go through each element in arr
  for element in array:
    # if the element is higher than 42, set the value to True, otherwise False:
    if element < offset:
      filter_array.append(True)
    else:
      filter_array.append(False)
  array = np.array(array)
  array = array[filter_array]
  return array

In [25]:
w_filter = filter_array(w_distrib,100)
h_filter = filter_array(h_distrib,100)

In [None]:
figure, axis = plt.subplots(2, 1, figsize=(12, 7))
figure.suptitle('Label distribution', fontsize=16)

# For Sine Function
axis[0].hist(w_filter, bins = 100,color = 'blue', edgecolor = 'black')
axis[0].set_title("Width",size=10)
axis[0].set_xlabel("w (pixel)")
axis[0].set_ylabel("count")

# For Cosine Function
axis[1].hist(h_filter, bins = 100,color = 'blue', edgecolor = 'black')
axis[1].set_title("Height",size=10)
axis[1].set_xlabel("h (pixel)")
axis[1].set_ylabel("count")

# Combine all the operations and display
figure.tight_layout()
plt.show()