# Requirements
Windows or Linux<br>
CMake >= 3.12: https://cmake.org/download/<br>
CUDA >= 10.0: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do Post-installation Actions)<br>
OpenCV >= 2.4: use your preferred package manager (brew, apt), build from source using vcpkg or download from OpenCV official site (on Windows set system variable OpenCV_DIR = C:\opencv\build - where are the include and x64 folders image)
cuDNN >= 7.0 https://developer.nvidia.com/rdp/cudnn-archive (on Linux copy cudnn.h,libcudnn.so... as desribed here 
https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on Windows copy cudnn.h,cudnn64_7.dll, cudnn64_7.lib as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows )<br>
GPU with CC >= 3.0: https://en.wikipedia.org/wiki/CUDA#GPUs_supported<br>
on Linux GCC or Clang, on Windows MSVC 2017/2019 https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community

# Step 1: Cloning and Building Darknet
The following cells will clone darknet from AlexeyAB's famous repository, adjust the Makefile to enable OPENCV and GPU for darknet and then build darknet.

Do not worry about any warnings when you run the '!make' cell!

In [None]:
# verify CUDA
# !nvcc --version
!/usr/local/cuda/bin/nvcc --version

In [None]:
# clone Yolov4 darknet repo
!git clone https://github.com/AlexeyAB/darknet

In [None]:
# change makefile to have GPU and OPENCV enabled
%cd darknet
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/' Makefile

In [None]:
# make darknet (builds darknet so that you can then use the darknet executable file to run or train object detectors)
!make

# Step 2: Download pre-trained YOLOv4 weights
YOLOv4 has been trained already on the coco dataset which has 80 classes that it can predict. We will grab these pretrained weights so that we can run YOLOv4 on these pretrained classes and get detections.

In [None]:
!wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137

# Step 3: Download COCO Dataset of special categories
categories are person, car, bus, truck

In [None]:
# import libraries
import io
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import cv2
import os

In [None]:
# download coco dataset annotation zip file
%cd data
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
    

In [None]:
# unzip annotation zip file
!unzip annotations_trainval2017.zip

In [None]:
!mkdir coco
!mkdir coco/obj
!dir

In [None]:
# get current directory
cdir = os.getcwd()

In [None]:
dataset_path = os.path.join(cdir, 'coco/obj')
coco_path = os.path.join(cdir, 'coco')

In [None]:
# Convert COCO Bounding Box to Yolov4 Format
# COCO Bounding Box format : x(left), y(top), width, height of object
# [477.88, 87.8, 13.7, 33.4]
# Yolov4 Format: central x, y of object, width, height of object
# [0.757391, 0.244731, 0.021406, 0.07822]
# rounding 6 decimal points
def convertBbox2YoloFormat(bbox, size):
  width, height = size
  x = round((bbox[0] + bbox[2]/2) / width, 6)
  y = round((bbox[1] + bbox[3]/2) / height, 6)
  w = round(bbox[2]/width, 6)
  h = round(bbox[3]/height, 6)
  return (x, y, w, h)

In [None]:
train_txt = "train.txt"
train_txt_path = os.path.join(coco_path, train_txt)
valid_txt = "valid.txt"
valid_txt_path = os.path.join(coco_path, valid_txt)
trainfile = open(train_txt_path, 'w')
validfile = open(valid_txt_path, 'w')

In [None]:
def BuildCustomDatasetFromCOCO(annFile, description_file):
    coco=COCO(annFile)
    # get Category Ids
    catNames = ['person', 'car', 'bus', 'truck']
    catIds = coco.getCatIds(catNms=catNames)
    # get Image Ids for 4 categories
    imgIds = []
    for catId in catIds:
      sub_imgIds = coco.getImgIds(catIds=catId)
      print(len(sub_imgIds))
      imgIds += sub_imgIds
    # get unique Image Ids
    imgIds = list(set(imgIds))
    
    # loop Image Ids
    # get Image Information form Coco dataset
    for imgId in imgIds:
        img_info = coco.loadImgs(ids = imgId)[0]
        # Load Image and annotation
        img = io.imread(img_info['coco_url'])
        annIds = coco.getAnnIds(imgIds=img_info['id'], catIds=catIds, iscrowd=0)
        anns = coco.loadAnns(annIds)

        # get file name, e.g. 000000262145.jpg
        filename = img_info['file_name']

        basename = os.path.splitext(filename)[0]
        txtfile_path = os.path.join(dataset_path, basename + '.txt')
        # write the image path in train.txt
        # example
        # coco/obj/000000262145.jpg
        # coco/obj/000000262146.jpg
        image_path = os.path.join(dataset_path, filename)
        description_file.write(image_path + "\n")

        # download image in coco/obj folder
        io.imsave(image_path, img)

        # write the yolo format bounding box in image.txt file
        size = (img_info['width'], img_info['height'])
        txtfile = open(txtfile_path, 'w')
        for i, ann in enumerate(anns):      
          bbox = convertBbox2YoloFormat(ann['bbox'], size)
          item_str = str(catIds.index(ann['category_id']))
          bbox_str = " ".join(str(entry) for entry in bbox)
          item_str += " " + bbox_str
          if i != len(anns) - 1:
            txtfile.write(item_str + "\n")
          else:
            txtfile.write(item_str)

In [None]:
# initialize COCO api for instance annotations
dataDir='.'
dataType='train2017'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)
print(annFile)
BuildCustomDatasetFromCOCO(annFile, trainfile)
dataType='val2017'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)
BuildCustomDatasetFromCOCO(annFile, validfile)

# Step 4: Write Custom Training Config for YOLOv4
<br>
<font size=5>Modify following files</font><br>
&emsp;&emsp;cfg/yolov4-custom.cfg<br>
&emsp;&emsp;cfg/coco.data<br>
&emsp;&emsp;data/coco.names<br>

<font size=4>Modify data/coco.names with our own categories</font><br>
&emsp;&emsp;person<br>
&emsp;&emsp;car<br>
&emsp;&emsp;bus<br>
&emsp;&emsp;truck<br>

<font size=4>Modify cfg/coco.data</font><br>
&emsp;&emsp;classes= 4<br>
&emsp;&emsp;train  = /content/darknet/data/coco/train.txt<br>
&emsp;&emsp;valid  = /content/darknet/data/coco/valid.txt<br>
&emsp;&emsp;names = /content/darknet/data/coco.names<br>
&emsp;&emsp;backup = /content/darknet/backup<br>

<font size=4>Modify cfg/yolov4-custom.cfg</font><br>
&emsp;&emsp;line 20, max_batches = 8000(4*2000)<br>
&emsp;&emsp;line 22, steps = 6400, 7200(0.8, 0.9*8000)<br>
&emsp;&emsp;change yolo layer classes to 4(class number), line 970, 1058, 1146<br>
&emsp;&emsp;change filters of convolution to 27((classes + 5)x3) immediately before each 3 yolo layers, line 963, 1051, 1139

# Step 5: Train the Model with Custom Dataset

In [None]:
# train
!./darknet detector train cfg/coco.data cfg/yolov4-custom.cfg yolov4.conv.137
# train with multiple GPU
# ./darknet detector train cfg/coco.data cfg/yolov4-custom.cfg yolov4.conv.137 -gpus 0,1,2,3
# If want to stop and restart training from a checkpoint:
# ./darknet detector train cfg/coco.data cfg/yolov4-custom.cfg backup/yolov3.backup -gpus 0,1,2,3

# Step 6:Infer Custom Objects with Saved YOLOv4 Weights

In [None]:
#define utility function
def imShow(path):
  import matplotlib.pyplot as plt
  %matplotlib inline

  image = cv2.imread(path)
  height, width = image.shape[:2]
  resized_image = cv2.resize(image,(3*width, 3*height), interpolation = cv2.INTER_CUBIC)

  fig = plt.gcf()
  fig.set_size_inches(18, 10)
  plt.axis("off")
  #plt.rcParams['figure.figsize'] = [10, 5]
  plt.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))
  plt.show()

In [None]:
# test the images with trained yolov4 model
#test out our detector!
img_path = 'test.jpg'
!./darknet detect cfg/yolov4-custom.cfg backup/custom-yolov4-detector_last.weights {img_path} -dont-show
imShow('predictions.jpg')

# Step 7:Download VOC Dataset of special categories

In [None]:
# download VOC 2007, 2012 dataset tar files from url
!wget http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
!wget http://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar

In [None]:
# upzip tar file
!tar xf VOCtrainval_06-Nov-2007.tar
!tar xf VOCtrainval_11-May-2012.tar

In [None]:
# load libraries
import xml.etree.ElementTree as ET
import os
from os import listdir, getcwd
from os.path import join

In [None]:
# dataset description and classes
# in VOC Dataset, there isn't a class of truck, so we can use only 3 classes - person, car, bus
sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val')]
classes = ["person", "car", , "bus"]

In [None]:
# convert VOC bounding box to yolo darknet format
def ConvertVOCBbox2YoloFormat(box, size):
    # get central point of object
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    # get width and height
    w = box[1] - box[0]
    h = box[3] - box[2]
    dw = 1./size[0]
    dh = 1./size[1]
    # convert bbox to darknet format
    x = x * dw
    y = y * dh
    w = w * dw
    h = h * dh
    return (x, y, w, h)

In [None]:
# Read Annotation Xml files from VOC Dataset and create the Yolo darkent text files
def ConvertVOCAnns(year, image_id):
    # read annotation xml file
    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
    out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
    # parse xml file
    root = ET.parse(in_file)
    # get size attribute
    size = root.find('size')
    # get width and height from size attrib
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    size = (w, h)
    # loop object attributes
    for obj in root.iter('object'):
        # get difficult and class
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        # filter object by class and difficult
        if cls not in classes or int(difficult) == 1:
            continue
        # get class id and bound box
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        xmin = float(xmlbox.find('xmin').text)
        ymin = float(xmlbox.find('ymin').text)
        xmax = float(xmlbox.find('xmax').text)
        ymax = float(xmlbox.find('ymax').text)
        box = (xmin, xmax, ymin, ymax)
        # convert bound box to darknet format
        bbox = ConvertVOCBbox2YoloFormat(box, size)
        # make darknet format annotation item
        item_str = str(cls_id)
        item_str += " " + " ".join([str(e) for e in bbox])
        item_str += "\n"
        # write item in annotation file
        out_file.write(item_str)

In [None]:
# get current directory
cdir = getcwd()

In [None]:
# loop all datasets
# create the text and valid text file including image paths
# this stage is the same as COCO dataset
for year, image_set in sets:
    if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
        os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
    list_file = open('%s_%s.txt'%(year, image_set), 'w')                        
    for image_id in image_ids:
        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(cdir, year, image_id))
        ConvertVOCAnns(year, image_id)
    list_file.close()

# Step 8: Write Custom Configuration for COCO and VOC Dataset
<br>
If you need to train with COCO and VOC Dataset, we don't need to modify following files again<br>
&emsp;&emsp;cfg/yolov4-custom.cfg<br>
&emsp;&emsp;cfg/coco.data<br>
&emsp;&emsp;data/coco.names<br>
But you need to concatenate the train.txt and valid.txt files of COCO and VOC dataset.<br>
And you can move the image files from VOC into the directory of COCO image files - in our case, darknet/data/obj

# Step 9: Train the Model with COCO and VOC Dataset
<br>
This step is the same as # step 5

# Step 10: Download Open Images Dataset

In [None]:
# install OpenImage Dataset ToolKit from git
!git clone https://github.com/theAIGuysCode/OIDv4_ToolKit.git

In [None]:
%cd OIDv4_ToolKit/

In [None]:
!pip install -r requirements.txt

In [None]:
# Download images and annotations file form 
# arguments:
#    --classes : list of classes
#    --type_csv : 'train' or 'validation' or 'test' or 'all'
#    --limit : integer number of each class
!python main.py downloader --classes Person Car Bus Truck 'Vehicle registration plate' 'Human face' --type_csv train --multiclasses 1 --limit 300

In [None]:
!python main.py downloader --classes Person Car Bus Truck 'Vehicle registration plate' 'Human face' --type_csv validation --multiclasses 1 --limit 50

<font size=4> Modify the classes.txt file as following</font><br>

&emsp;&emsp;Person<br>
&emsp;&emsp;Car<br>
&emsp;&emsp;Bus<br>
&emsp;&emsp;Truck<br>
&emsp;&emsp;Vehicle registration plate<br>
&emsp;&emsp;Human face

In [None]:
# convert annotations text file to darknet format annotation text file
# make the image list file for train,validation, test subset
import os
import cv2
import numpy as np
from tqdm import tqdm
import argparse
import fileinput

# function that turns XMin, YMin, XMax, YMax coordinates to normalized yolo format
def convert(filename_str, coords):
    os.chdir("..")
    image = cv2.imread(filename_str + ".jpg")
    coords[2] -= coords[0]
    coords[3] -= coords[1]
    x_diff = int(coords[2]/2)
    y_diff = int(coords[3]/2)
    coords[0] = coords[0]+x_diff
    coords[1] = coords[1]+y_diff
    coords[0] /= int(image.shape[1])
    coords[1] /= int(image.shape[0])
    coords[2] /= int(image.shape[1])
    coords[3] /= int(image.shape[0])
    coords[0] = round(coords[0], 6)
    coords[1] = round(coords[1], 6)
    coords[2] = round(coords[2], 6)
    coords[3] = round(coords[3], 6)
    os.chdir("Label")
    return coords

ROOT_DIR = os.getcwd()

# create dict to map class names to numbers for yolo
classes = {}
with open("classes.txt", "r") as myFile:
    for num, line in enumerate(myFile, 0):
        line = line.rstrip("\n")
        classes[line] = num
    myFile.close()
# step into dataset directory
os.chdir(os.path.join("OID", "Dataset"))
DIRS = os.listdir(os.getcwd())

train_file = open("train.txt", "w")
validation_file = open("validation.txt", "w")
test_file = open("test.txt", "w")

# for all train, validation and test folders
for DIR in DIRS:
    if os.path.isdir(DIR):
        os.chdir(DIR)
        print("Currently in subdirectory:", DIR)
        
        CLASS_DIRS = os.listdir(os.getcwd())
        # for all class folders step into directory to change annotations
        for CLASS_DIR in CLASS_DIRS:
            if os.path.isdir(CLASS_DIR):
                os.chdir(CLASS_DIR)
                print("Converting annotations for class: ", CLASS_DIR)

                for filename in tqdm(os.listdir(os.getcwd())):
                  filepath = os.path.join(ROOT_DIR, DIR, CLASS_DIR, filename)
                  if filename.endswith(".jpg"):
                    if DIR == 'train':
                      train_file.write(filepath)
                      train_file.write("\n")
                    if DIR == 'validation':
                      validation_file.write(filepath)
                      validation_file.write("\n")
                    if DIR == 'test':
                      test_file.write(filepath)
                      test_file.write("\n")
                
                # Step into Label folder where annotations are generated
                os.chdir("Label")

                for filename in tqdm(os.listdir(os.getcwd())):
                    filename_str = str.split(filename, ".")[0]
                    if filename.endswith(".txt"):
                        annotations = []
                        with open(filename) as f:
                            for line in f:
                                for class_type in classes:
                                    line = line.replace(class_type, str(classes.get(class_type)))
                                labels = line.split()
                                coords = np.asarray([float(labels[1]), float(labels[2]), float(labels[3]), float(labels[4])])
                                coords = convert(filename_str, coords)
                                labels[1], labels[2], labels[3], labels[4] = coords[0], coords[1], coords[2], coords[3]
                                newline = str(labels[0]) + " " + str(labels[1]) + " " + str(labels[2]) + " " + str(labels[3]) + " " + str(labels[4])
                                line = line.replace(line, newline)
                                annotations.append(line)
                            f.close()
                        os.chdir("..")
                        with open(filename, "w") as outfile:
                            for line in annotations:
                                outfile.write(line)
                                outfile.write("\n")
                            outfile.close()
                        os.chdir("Label")
                os.chdir("..")
                os.chdir("..")
        os.chdir("..")
train_file.close()
validation_file.close()
test_file.close()

In [None]:
# go to folder OIDv4_ToolKit
%cd /content/OIDv4_ToolKit

In [None]:
# move OpenImages Dataset to Yolov4 darknet Dataset Folder
DataSet_DIR = os.path.join(os.getcwd(), 'OID', 'Dataset')
Dest_DIR = os.path.join('/content', 'darknet', 'data/coco/obj')
train_file = open(os.path.join('/content', 'darknet', 'data/coco', 'train.txt'),'a')
valid_file = open(os.path.join('/content', 'darknet', 'data/coco', 'valid.txt'),'a')

# get images and write the path in Train.txt and Valid.txt
# move images and annotation files to Yolov4 Dataset folder
def move_dataset(from_dir, to_dir, file_handle):
  for filename in tqdm(os.listdir(from_dir)):
    print(filename)
    if filename.endswith('.txt') or filename.endswith('.jpg'):
      if filename.endswith('.jpg'):
        file_handle.write(os.path.join(to_dir, filename))
        file_handle.write('\n')
      os.rename(os.path.join(from_dir, filename), os.path.join(to_dir, filename))
Train_DIR = os.path.join(DataSet_DIR, 'train', 'Person_Car_Bus_Truck_Vehicle registration plate_Human face')
Valid_DIR = os.path.join(DataSet_DIR, 'validation', 'Person_Car_Bus_Truck_Vehicle registration plate_Human face')
move_dataset(Train_DIR, Dest_DIR, train_file)
move_dataset(Valid_DIR, Dest_DIR, valid_file)
train_file.close()
valid_file.close()

# Step 11: Write Custom Configuration for OpenImages Dataset
<br>
<font size=5>This step is the similar step with custom configuration for COCO</font><br>
&emsp;&emsp;cfg/yolov4-custom.cfg<br>
&emsp;&emsp;cfg/coco.data<br>
&emsp;&emsp;data/coco.names

<font size=4>Modify data/coco.names with our own categories</font><br>
&emsp;&emsp;Person<br>
&emsp;&emsp;Car<br>
&emsp;&emsp;Bus<br>
&emsp;&emsp;Truck<br>
&emsp;&emsp;License Plate<br>
&emsp;&emsp;Face<br>

<font size=4>Modify cfg/coco.data</font><br>
&emsp;&emsp;classes= 6<br>
&emsp;&emsp;train  = path/to/train.txt<br>
&emsp;&emsp;valid  = path/to/valid.txt<br>
&emsp;&emsp;names = /content/darknet/data/coco.names<br>
&emsp;&emsp;backup = /content/darknet/backup<br>
<br>
&emsp;train.txt, valid will be from /content/OIDv4_ToolKit/OID/Dataset folder

<font size=4>Modify cfg/yolov4-custom.cfg</font><br>
line 20, max_batches = 12000(6*2000)<br>
&emsp;&emsp;line 22, steps = 9600, 10800(0.8, 0.9*8000)<br>
&emsp;&emsp;change yolo layer classes to 6(class number), line 970, 1058, 1146<br>
&emsp;&emsp;change filters of convolution to 33((classes + 5)x3) immediately before each 3 yolo layers, line 963, 1051, 1139

# Step 9: Train the Model with COCO and VOC Dataset
<br>
This step is the same as # step 5