<h1 style="text-align: center;"><b>Object detection</b></h1>

## Tip: Use GPU Acceleration

If you are running this notebook in Google Colab, navigate to `Edit` -> `Notebook settings` -> `Hardware accelerator`, set it to `GPU`, and then click `Save`. This will ensure your notebook uses a `GPU`, which will significantly speed up model training times.

## Preparing a custom dataset

Loading necessary libraries

In [None]:
from PIL import Image
import numpy as np
import torch
import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt
import os
import os.path
from os import path
import shutil
import tqdm
import cv2

I will get <a href="http://shuoyang1213.me/WIDERFACE/">Wider Train</a> from my Google Drive

In [None]:
HOME = os.getcwd()
HOME

In [None]:
from google.colab import drive
drive.mount(f'{HOME}/drive')

For training *YoloV8* we need to preprocess data so that it looks:  
![](https://raw.githubusercontent.com/IvanPodoynikov/YoloV8-Object-Detection/main/assets/YoloV8DataFormat.jpeg)


Making directories to fit the format

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
if os.path.exists(f'{HOME}/Face_dataset') == False:
  os.mkdir(f'{HOME}/Face_dataset')

In [None]:
if os.path.exists(f'{HOME}/Face_dataset/train') == False:
  os.mkdir(f'{HOME}/Face_dataset/train')
if os.path.exists(f'{HOME}/Face_Dataset/valid') == False:
  os.mkdir(f'{HOME}/Face_dataset/valid')

In [None]:
if os.path.exists(f'{HOME}/Face_dataset/train/images') == False:
  os.mkdir(f'{HOME}/Face_dataset/train/images')
if os.path.exists(f'{HOME}/Face_dataset/train/labels') == False:
  os.mkdir(f'{HOME}/Face_dataset/train/labels')

if os.path.exists(f'{HOME}/Face_dataset/valid/images') == False:
  os.mkdir(f'{HOME}/Face_dataset/valid/images')
if os.path.exists(f'{HOME}/Face_dataset/valid/labels') == False:
  os.mkdir(f'{HOME}/Face_dataset/valid/labels')

In [None]:
shutil.unpack_archive(f"{HOME}/drive/MyDrive/ColabNotebooks/WIDER_train.zip", f'{HOME}') # There indicate your path to WIDER_train
shutil.unpack_archive(f"{HOME}/drive/MyDrive/ColabNotebooks/WIDER_val.zip", f'{HOME}')   # There indicate your path to WIDER_val

In [None]:
shutil.unpack_archive(f"{HOME}/drive/MyDrive/ColabNotebooks/wider_face_split.zip", f'{HOME}') # There indicate your path to wider_face_split

Making YAML

In [None]:
with open("dataV8.yaml", "w") as f:
    f.write(f"names:\n- face\nnc: 1\n\ntrain: {HOME}/Face_dataset/train/images\nval: {HOME}/Face_dataset/valid/images")

If You use MAC, uncomment next line

In [None]:
#!rm -rf __MACOSX

I want to move files to *Face_dataset* from *WIDER_train* and *WIDER_val*

In [None]:
shutil.move(f'{HOME}/dataV8.yaml', f'{HOME}/Face_dataset')

In [None]:
directory = f'{HOME}/WIDER_train/images'
for folder in os.listdir(directory):
    for image in os.listdir(str(directory + '/' + folder)):
        shutil.move(str(directory + '/' + folder + '/' + image), f'{HOME}/Face_dataset/train/images')
directory = f'{HOME}/WIDER_val/images'
for folder in os.listdir(directory):
    for image in os.listdir(str(directory + '/' + folder)):
        shutil.move(str(directory + '/' + folder + '/' + image), f'{HOME}/Face_dataset/valid/images')

In [None]:
shutil.rmtree(f'{HOME}/WIDER_train')
shutil.rmtree(f'{HOME}/WIDER_val')

Let's see, what *wider_face_split* contains

In [None]:
path = f'{HOME}/wider_face_split/wider_face_train_bbx_gt.txt'
df_train = pd.read_csv(path, header = None)
df_train.head()

In [None]:
path = f'{HOME}/wider_face_split/wider_face_val_bbx_gt.txt'
df_val = pd.read_csv(path, header = None)
df_val.head()

Getting bboxes for each image

In [None]:
def make_filenames_bboxes(df):
  filenames = []
  bboxes = {}
  i = 0
  while i < len(df):
      if df[0][i].split(".")[-1] == "jpg":

          cur_filename = df[0][i].split("/")[-1]
          filenames.append(cur_filename)

          count_bboxes = int(df[0][i+1])
          bboxes[cur_filename] = []
          i = i + 1

          for j in range(count_bboxes):
              bboxes[cur_filename].append(df[0][i+j+1].split(' ')[:4])

          bboxes[cur_filename] = np.array(bboxes[cur_filename])
          i = i + count_bboxes

      i = i + 1
  filenames.sort()
  return (filenames, bboxes)

In [None]:
filenames_train, bboxes_train = make_filenames_bboxes(df_train)
filenames_val, bboxes_val = make_filenames_bboxes(df_val)

Let's see example

In [None]:
filenames_train[0]

In [None]:
print(bboxes_train['0_Parade_Parade_0_1014.jpg'])
print(type(bboxes_train['0_Parade_Parade_0_1014.jpg'][0][0]))
print(bboxes_train['0_Parade_Parade_0_1014.jpg'][0][0])

We now write a function that will take the annotations in given format <a href="http://shuoyang1213.me/WIDERFACE/">(from here)</a> and convert them to a format where information about the bounding boxes is stored in a dictionary, like this (example for many classes):  
{'bboxes':  
[{'class': 'trafficlight', 'xmin': 20, 'ymin': 109, 'xmax': 81, 'ymax': 237},  
{'class': 'trafficlight', 'xmin': 116, 'ymin': 162, 'xmax': 163, 'ymax': 272},  
{'class': 'trafficlight', 'xmin': 189, 'ymin': 189, 'xmax': 233, 'ymax': 295}],  
'filename': 'road4.png',  
'image_size': (267, 400, 3)}






In [None]:
def get_info_dict(filename, bboxes, sample): # We give: name of the file, its bboxes, type(I called it sample): train / val / test
    root = f'{HOME}/Face_dataset/{sample}/images'
    info_dict = {}

    info_dict['bboxes'] = []
    if len(bboxes) != 0:
      array_of_classes = np.array([[0] for i in range(bboxes.shape[0])])
      # list if all bboxes, we add 'class' manually to get this format: class, x_min, y_min, width, height
      lsts = np.concatenate((array_of_classes, bboxes), axis = 1).tolist()

      # get info_dict
      for lst in lsts:
          bbox = {}

          cl = 'face'
          x_min = int(lst[1])
          y_min = int(lst[2])
          x_max = x_min + int(lst[3])
          y_max = y_min + int(lst[4])

          bbox['class'] = cl
          bbox['x_min'] = x_min
          bbox['y_min'] = y_min
          bbox['x_max'] = x_max
          bbox['y_max'] = y_max

          info_dict['bboxes'].append(bbox)

    info_dict['filename'] = filename
    im = cv2.imread(root+'/' + filename)
    info_dict['image_size'] = im.shape

    return info_dict

This outputs:

In [None]:
filename = filenames_train[1]
get_info_dict(filename, bboxes_train[filename], 'train')

In [None]:
filename = filenames_val[2]
get_info_dict(filename, bboxes_val[filename], 'valid')

We got a convenient file format.  
Now we write a function to convert info contained in `info_dict` to YoloV8 style annotations and write them to a `txt` file


In [None]:
def convert_to_yolov8(info_dict, sample):
    class_name_to_id_mapping = {'face': 0}
    print_buffer = []

    # For each bounding box
    for i, b in enumerate(info_dict["bboxes"]):
        try:
            class_id = class_name_to_id_mapping[b["class"]]
        except KeyError:
            print("Invalid Class. Must be one from ", class_name_to_id_mapping.keys())

        # Transform the bbox co-ordinates as per the format required by YOLO v8
        b_center_x = (b["x_min"] + b["x_max"]) / 2
        b_center_y = (b["y_min"] + b["y_max"]) / 2
        b_width    = (b["x_max"] - b["x_min"])
        b_height   = (b["y_max"] - b["y_min"])

        # Normalise the co-ordinates by the dimensions of the image
        image_h, image_w, image_c = info_dict["image_size"]
        b_center_x /= image_w
        b_center_y /= image_h
        b_width    /= image_w
        b_height   /= image_h

        #Write the bbox details to the file
        print_buffer.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, b_center_x, b_center_y, b_width, b_height))
    # Name of the file which we have to save
    save_file_name = os.path.join(f'{HOME}/Face_dataset/{sample}/labels', info_dict["filename"].replace("jpg", "txt"))

    # Save the annotations to file
    print("\n".join(print_buffer), file= open(save_file_name, "w"))

In [None]:
for i, filename in enumerate(filenames_train):
    info_dict = get_info_dict(filename, bboxes_train[filename], 'train')
    convert_to_yolov8(info_dict, 'train')

In [None]:
for i, filename in enumerate(filenames_val):
    info_dict = get_info_dict(filename, bboxes_val[filename], 'valid')
    convert_to_yolov8(info_dict, 'valid')

## Training

**I got two errors:**  
---train: WARNING ⚠️ /content/Face_Dataset/images/train/54_Rescue_rescuepeople_54_29.jpg:  
ignoring corrupt image/label: non-normalized or out of bounds coordinates [      1.025]  
---val: WARNING ⚠️ /content/Face_Dataset/images/val/39_Ice_Skating_iceskiing_39_583.jpg:  
ignoring corrupt image/label: non-normalized or out of bounds coordinates [      1.002]  
That is not my fault, I checked manually, there is a mistake in coordinates in `txt` file:  
In the first case image has width = 1024 with x_min = x_max = 1050   
In the second case image has width = 1024 with x_max = 1026  
**Yolo will skip these images during training**


In [None]:
!pip install ultralytics

In [None]:
from ultralytics import YOLO

In [None]:
model = YOLO('yolov8n.yaml')
model.to(device);

In [None]:
path = f'{HOME}/Face_dataset/dataV8.yaml'
path

In [None]:
model.train(data = path, epochs = 5, imgsz = 640)

## Results

Results saved in ./runs  
We need ./runs/detect/train/weights/best.pt for Streamlit  
We have to download this file and follow instructions on Git

### Metrics and Losses

In [None]:
im = plt.imread(f'{HOME}/runs/detect/train/results.png')
fig, ax = plt.subplots(figsize = (19.2, 10.8))
ax.set_title('Results')
ax.imshow(im);

### Predictions

In [None]:
im = plt.imread(f'{HOME}/runs/detect/train/val_batch0_labels.jpg')
fig, ax = plt.subplots(figsize = (19.2, 10.8))
ax.set_title('Prediction')
ax.imshow(im);

In [None]:
im = plt.imread(f'{HOME}/runs/detect/train/val_batch1_labels.jpg')
fig, ax = plt.subplots(figsize = (19.2, 10.8))
ax.set_title('Prediction')
ax.imshow(im);