In [None]:
%cd ~
!git clone https://github.com/ultralytics/yolov5
%cd ~/yolov5
%pip install -r requirements.txt
%cd -

In [None]:
%pip install tqdm
%pip install datasets
%pip install wandb

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from datasets import load_dataset


def download_dataset():
    wider_face = load_dataset('wider_face', split='train')
    print("Num images in wider_face training set: %i" % (len(wider_face)))

    img = np.array(wider_face[110]['image'], dtype=np.uint8)
    faces = wider_face[110]['faces']
    bboxes = faces['bbox']

    fig, ax = plt.subplots()
    ax.imshow(img)

    for bbox in bboxes:
        rect = patches.Rectangle((bbox[0], bbox[1]),
                                 bbox[2], bbox[3],
                                 linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)

    plt.show()

    return wider_face

In [None]:
import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import Tuple

from datasets import IterableDataset
from tqdm import tqdm


def convert_to_yolov5_format(
    dataset: IterableDataset, 
    dst_dir: str, 
) -> None:
    Path(os.path.join(dst_dir, "images")).mkdir(parents=True, exist_ok=True)
    Path(os.path.join(dst_dir, "labels")).mkdir(parents=True, exist_ok=True)

    for i in tqdm(range(len(dataset))):
        data_point = dataset[i]
        pil_img = data_point['image']
        label = data_point['faces']
        img_filename = str(i) + ".png"

        dst_image_file = os.path.join(dst_dir, "images/%s" % (img_filename))
        dst_label_file = os.path.join(dst_dir, "labels/%s" % (img_filename.replace(".png", ".txt")))
        if os.path.exists(dst_label_file):
            continue
        
        class_name = "face"  # we're only detecting faces, so these are constants
        class_id = 0
        width, height = pil_img.size
        with open(dst_label_file, "w") as wobj:
            for bbox in label['bbox']:
                x_max = (bbox[0] + bbox[2]) / width
                y_max = (bbox[1] + bbox[3]) / height
                cx = (bbox[0] + x_max) / 2.0 / width
                cy = (bbox[1] + y_max) / 2.0 / height

                # output annotation is: class_id, center_x, center_y, box_width, box_height,
                # image width and height normalized to (0, 1)
                output_line = "%d %f %f %f %f\n" % (class_id, cx, cy, bbox[2]/width, bbox[3]/height)
                wobj.write(output_line)
        pil_img.save(dst_image_file)

In [None]:
def create_yolov5_dataset_yaml(yolo_train_dir: str, yolo_test_dir: str):
    yaml_file = "yolov5/data/wider_face.yaml"
    train_images_dir = os.path.join(yolo_train_dir, "images")
    val_images_dir = os.path.join(yolo_test_dir, "images")

    classes = ['Face']
    names_str = ""
    for item in classes:
        names_str = names_str + ", \'%s\'" % item
    names_str = "names: [" + names_str[1:] + "]"

    with open(yaml_file, "w") as wobj:
        wobj.write("train: %s\n" % train_images_dir)
        wobj.write("val: %s\n" % val_images_dir)
        wobj.write("nc: %d\n" % len(classes))
        wobj.write(names_str + "\n")

In [None]:
from download_dataset import download_dataset
from convert_to_yolov5_format import convert_to_yolov5_format
from create_yolov5_dataset_yaml import create_yolov5_dataset_yaml
from yolov5.utils.downloads import attempt_download

wider_face = download_dataset()

yolo_train_dir = "./yolov5/data/train"
convert_to_yolov5_format(wider_face, yolo_train_dir)

yolo_test_dir = "./yolov5/data/test"
convert_to_yolov5_format(wider_face, yolo_test_dir)

create_yolov5_dataset_yaml(yolo_train_dir, yolo_test_dir)

attempt_download('weights/yolov5s.pt')
attempt_download('weights/yolov5m.pt')
attempt_download('weights/yolov5l.pt')

In [None]:
!python -m torch.distributed.launch --nproc_per_node 2 train.py --data data/wider_face.yaml --batch-size 32 --epochs 10 --img-size 768 --project runs/train --name wider_face --weights weights/yolov5s.pt --device 0

In [None]:
!python -m torch.distributed.launch --nproc_per_node 2 train.py --data data/wider_face.yaml --batch-size 32 --epochs 10 --img-size 768 --project runs/train --name wider_face --weights weights/yolov5m.pt --device 0

In [None]:
!python -m torch.distributed.launch --nproc_per_node 2 train.py --data data/wider_face.yaml --batch-size 32 --epochs 10 --img-size 768 --project runs/train --name wider_face --weights weights/yolov5l.pt --device 0