In [26]:
import os
import shutil

import torch
from torch.cuda import is_available
import torchvision

### Dataset creation

In [9]:
# download COCO dataset 204 and 2017 instances

!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip ./annotations_trainval2017
!rm annotations_trainval2017.zip
!ls annotations/
!mkdir -p ./datasets/motorcycles/annotations
!mv ./annotations ./datasets/motorcycles

In [123]:
# create COCO motocycle dataset

from urllib import request
import json

def coco_download(instance_path, classes, images_dst, labels_dst):
    coco_instance = get_instance(instance_path)
    img_data = coco_instance['images'][0]
    height = img_data['height']
    width = img_data['width']
    
    classes_ids = get_classes_ids(coco_instance, classes)
    remaped = remap_classes_ids(classes_ids)
    annotations = annotations_to_download(coco_instance, classes_ids.values())

    images_ids = from_annotation_to_images_ids(annotations)
    images = get_images(coco_instance, images_ids)

    #download_images(images, images_dst)
    create_annotations(remaped, annotations, images, images_ids, labels_dst)

def get_instance(coco_instance_path):
    COCO = open(coco_instance_path, 'r')
    coco_instance = json.loads(COCO.read())
    COCO.close()
    return coco_instance

def get_classes_ids(coco_instance, classes):
    ids = {}
    categories = coco_instance['categories']
    for c in classes:
        class_instance = next(filter(lambda x: x['name'] == c, categories), None)
        if not class_instance:
            raise Exception(mensage=f'The class {c} doesn\'t exists in this coco annotations')
        else:
            ids[c] = class_instance['id']
    return ids

def remap_classes_ids(classes_ids):
    remaped = {}
    for (index, c) in enumerate(classes_ids.values()):
        remaped[c] = index
    return remaped

def annotations_to_download(coco_instance, classes_ids):
    annotations = coco_instance['annotations']
    return list(filter(lambda x: x['category_id'] in classes_ids, annotations))

def from_annotation_to_images_ids(annotations):
    unique_ids = set(map(lambda x: x['image_id'], annotations))
    return list(unique_ids)

def get_images(coco_instance, images_ids):
    images = coco_instance['images']
    return list(filter(lambda x: x['id'] in images_ids, images))

def download_images(images, target_path):
    problematics = []
    for img in images:
        file_name = img['file_name']
        dst = f'{target_path}/{file_name}'
        try:
            request.urlretrieve(img['coco_url'], dst)
        except:
            problematics.append(img)
            if os.path.exists(dst):
                os.remove(dst)
    return [img for img in images if img not in problematics]

def _format_id_to_name(image_id):
    image_id_str = str(image_id)
    front_zeros = '0' * (12 - len(image_id_str))
    return front_zeros + image_id_str

def _max_min(e1, e2):
    if e1 < e2:
        return e1, e2
    return e2, e1

def _find(func, elements):
    for e in elements:
        if func(e):
            return e
    return None
    

def create_annotations(remaped, annotations, images, images_ids, target_path):
    for annotation in annotations:
        img_id = annotation['image_id']
        img = _find(lambda x: x['id'] == img_id, images)
        if img:
            height = img['height']
            width = img['width']
            
            formated_id = _format_id_to_name(img_id)
            file = open(f'{target_path}/{formated_id}.txt', 'a+')
            category_id = annotation['category_id']
            category_id = remaped[category_id]
            bbox = annotation['bbox']
            
            x1, x2 = _max_min(bbox[0], bbox[2])
            y1, y2 = _max_min(bbox[1], bbox[3])
            
            x_center = (x1+(x2-x1)/2) / width
            y_center = (y1+(y2-y1)/2) / height
            w = (x2-x1) / width
            h = (y2-y1) / height
            
            file.write(f'{category_id} {x_center} {y_center} {w} {h}\n')
            file.close()
            
def image_without_label(image_path, labels_path):
    images = [x[:-4] for x in os.listdir(image_path+'/train') + os.listdir(image_path+'/val')]
    labels = [x[:-4] for x in os.listdir(labels_path)]
    return [x for x in images if x not in labels]

In [124]:
# coco_download(instance_path, classes, images_dst, labels_dst)

coco_instance = '/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/annotations/instances_train2017.json'
images_dst = '/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/images'
labels_dst = '/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/labels'

In [125]:
coco_download(coco_instance, ['motorcycle'], images_dst, labels_dst)

In [126]:
train_images = os.listdir(images_dst)
train_size = int(len(train_images) * 0.8)
train_images = train_images[:train_size]
val_images = os.listdir(images_dst)[train_size:]

train_labels = os.listdir(labels_dst)
train_labels = train_labels[:train_size]
val_labels = os.listdir(labels_dst)[train_size:]

In [127]:
!mkdir -p /media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/images/train
!mkdir -p /media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/images/val
!mkdir -p /media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/labels/train
!mkdir -p /media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/labels/val

In [128]:
images_train_dst = '/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/images/train'
images_val_dst = '/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/images/val'
labels_train_dst = '/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/labels/train'
labels_val_dst = '/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/labels/val'

for img in train_images:
    shutil.copyfile(f'{images_dst}/{img}', f'{images_train_dst}/{img}')


for img in val_images:
    shutil.copyfile(f'{images_dst}/{img}', f'{images_val_dst}/{img}')


for img in train_labels:
    shutil.copyfile(f'{labels_dst}/{img}', f'{labels_train_dst}/{img}')


for img in val_labels:
    shutil.copyfile(f'{labels_dst}/{img}', f'{labels_val_dst}/{img}')

In [129]:
!rm /media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/images/*.jpg
!rm /media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/datasets/motorcycles/labels/*.txt

## YOLOv5

In [20]:
# download YOLOv5

!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt

Cloning into 'yolov5'...
remote: Enumerating objects: 11742, done.[K
remote: Total 11742 (delta 0), reused 0 (delta 0), pack-reused 11742[K
Receiving objects: 100% (11742/11742), 11.37 MiB | 6.68 MiB/s, done.
Resolving deltas: 100% (8127/8127), done.
/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/yolov5
Collecting matplotlib>=3.2.2
  Downloading matplotlib-3.5.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.2 MB)
[K     |████████████████████████████████| 11.2 MB 5.5 MB/s eta 0:00:01
Collecting opencv-python>=4.1.2
  Downloading opencv_python-4.5.5.64-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (60.5 MB)
[K     |████████████████████████████████| 60.5 MB 6.1 MB/s eta 0:00:01
Collecting PyYAML>=5.3.1
  Downloading PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (661 kB)
[K     |████████████████████████████████| 661 kB 14.1 MB/s eta 0:00:01
Collecting scipy>=1.4.1
  Downloading scip

Collecting pyasn1<0.5.0,>=0.4.6
  Downloading pyasn1-0.4.8-py2.py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 3.8 MB/s eta 0:00:011
[?25hCollecting oauthlib>=3.0.0
  Downloading oauthlib-3.2.0-py3-none-any.whl (151 kB)
[K     |████████████████████████████████| 151 kB 7.0 MB/s eta 0:00:01
[?25hInstalling collected packages: pyasn1, rsa, pyasn1-modules, oauthlib, cachetools, requests-oauthlib, pytz, numpy, kiwisolver, google-auth, fonttools, cycler, werkzeug, tensorboard-plugin-wit, tensorboard-data-server, scipy, protobuf, pandas, matplotlib, markdown, grpcio, google-auth-oauthlib, absl-py, tqdm, thop, tensorboard, seaborn, PyYAML, opencv-python
  Attempting uninstall: numpy
    Found existing installation: numpy 1.19.2
    Uninstalling numpy-1.19.2:
      Successfully uninstalled numpy-1.19.2
Successfully installed PyYAML-6.0 absl-py-1.0.0 cachetools-5.0.0 cycler-0.11.0 fonttools-4.31.2 google-auth-2.6.2 google-auth-oauthlib-0.4.6 grpcio-1.44.0 kiwisolver-

In [21]:
from yolov5 import utils

utils.notebook_init()
%cd ..

YOLOv5 🚀 v6.1-60-gc3ae4e4 torch 1.11.0 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 3911MiB)


Setup complete ✅ (12 CPUs, 15.4 GB RAM, 18.2/47.4 GB disk)
/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network


In [44]:
def create_yaml_dataset(target_path: str, yaml_name: str, project_path: str, train_path: str, val_path: str, classes: list):
    '''
        Create the YAML used to train the YOLOv5.
        Parameters:
            target_path: str, The path to write the YAML file
            yaml_name: str, The file name
            project_path: str, The project path insert into the YAML field \"path\"
            train_path: str, The train path insert into the YAML field \"train\"
            val_path: str, The val path insert into the YAML field \"val\"
            classes: List[str], The classes on the dataset
        Return: None
    '''
    
    content = f'''path: {project_path}
train: {train_path}
val: {val_path}

# Classes
nc: {len(classes)}
names: {classes}'''

    file = open(f'{target_path}/{yaml_name}.yaml', 'w')
    file.write(content)
    file.close()

In [49]:
# create the YAML file

target_path = '/media/nalbertgml/files/linux/code/python/YOLOv5/motorcycle_network/yolov5/models'
project_path = '../datasets/motorcycles'
train_path = 'images/train'
labels_path = 'images/val'
create_yaml_dataset(target_path, 'motorcycles', project_path, train_path, labels_path, ['motorcycles'])

In [131]:
!python yolov5/train.py --img 640 --batch 1 --epochs 10 --data yolov5/models/motorcycles.yaml --weights yolov5s.pt --cfg yolov5//models/yolov5s.yaml --cache

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=yolov5//models/yolov5s.yaml, data=yolov5/models/motorcycles.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=10, batch_size=1, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.1-60-gc3ae4e4 torch 1.11.0 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 3911MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1