# YOLOv5 training tutorial

## Step 0: Pull the YOLO repository and the data

In [None]:
# !git clone https://github.com/PikachuDeveloper/yolo_data.git
# %cd yolo_data
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
%pip install -qr requirements.txt  # install dependencies

import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Setup complete. Using torch 1.12.1+cu113 (Tesla T4)


### Step 0.1: Transform the existing annotations

To create a folder "datasets/labels" inside the project folder.
Transform the Labelbox annotations (can be downloaded from [Lumais Cloud](https://cloud.lumais.com/s/3XyiFZzosC3wx6w) or LabelBox) into the YOLOv5 format by running the following row.

In [2]:
%cd /content/  # yolo_data/
!python lbxTorch.py -o datasets/labels -json-path 'Cflo_troph_count_masked_5-30_6-03-rand1.json' -s '1920x1080' -pname 'Cflo_troph_count_masked_5-30_6-03-rand1'
!python lbxTorch.py -o datasets/labels -json-path 'Cflo_troph_count_3-38_3-52.json' -s '1920x1080' -pname 'Cflo_troph_count_3-38_3-52'
!python lbxTorch.py -o datasets/labels -json-path '523cropped_s50p_f10.json' -s '1194x1194' -pname '523cropped_s50p_f10'
!python lbxTorch.py -o datasets/labels -json-path '6.json' -s '2496x2200' -pname '6'
!python lbxTorch.py -o datasets/labels -json-path '100testimages_s25p_f10.json' -s '1000x1000' -pname '100testimages_s25p_f10'

[Errno 2] No such file or directory: '/content/yolo_data/'
/content/yolov5
python3: can't open file 'lbxTorch.py': [Errno 2] No such file or directory
python3: can't open file 'lbxTorch.py': [Errno 2] No such file or directory
python3: can't open file 'lbxTorch.py': [Errno 2] No such file or directory
python3: can't open file 'lbxTorch.py': [Errno 2] No such file or directory
python3: can't open file 'lbxTorch.py': [Errno 2] No such file or directory


 To do the same, for your own data follow the format:
 ```
 !python lbxTorch.py -o datasets/labels -json-path <annotation_file_name> -s <frame_size_WxH> -pname <res_annontations_name>
 ```

### Step 0.2: Transform the existing videos into the frames

The following code splits videos into the images with the name in format `<vidname>_<framenumber>.jpg`.

To perform the same for your video, add your video name into the downLoad dictionary as it is written in a commented instruction.

In [3]:
import os, cv2

downLoad = {
    # vidname: number of annotated frames or -1 if all frames were annoted
    # The videos can be downloaded from [Lumais Cloud](https://cloud.lumais.com/s/nKHYDLDzWKnLgp3) or Labelbox
    'Cflo_troph_count_masked_5-30_6-03-rand1.mp4': -1,
    'Cflo_troph_count_3-38_3-52.mp4': -1,
    '523cropped_s50p_f10.mp4': 111,
    '6.mp4': 175,
    '100testimages_s25p_f10.mp4': -1
    }

img_dir = os.path.join('datasets', 'images')
if not os.path.isdir(img_dir):
  os.makedirs(img_dir)

path = os.getcwd()
for filename, num in downLoad.items():
  vidpath = os.path.join(path, filename)
  vid = cv2.VideoCapture(vidpath)
  num = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) if num == -1 else num
  for i in range(1, num + 1):
    _, frame = vid.read()
    annpath = os.path.join(path, img_dir,
                           '{}_{}.jpg'.format(filename.split('.')[0], i))
    cv2.imwrite(annpath, frame)
    
  vid.release()

error: ignored

### Step 0.3: Test split

In [None]:
%cd /content/  # yolo_data/

from random import shuffle
from shutil import move
from glob import glob

start = os.getcwd()
imgs = os.path.join(os.getcwd(), img_dir)
labs = os.getcwd() + '/datasets/labels'
img_path = 'images'
lbs_path = 'labels'
test_folder = 'val'
train_folder = 'train'

test_percent = 0.2
try:
  os.remove(
      os.path.join(imgs + '/523cropped_s50p_f10_1.jpg')
      )
  os.remove(
      os.path.join(imgs + '/523cropped_s50p_f10_2.jpg')
      )
except: pass
os.chdir(imgs)
files = glob(
    "*.jpg"
    )
os.chdir(start)
shuffle(files)

for folder in (test_folder, train_folder):
  end = os.path.join(start, folder)
  images =  os.path.join(end, img_path)
  labels = os.path.join(end, lbs_path)

  if not os.path.isdir(images):
    os.makedirs(images)
  if not os.path.isdir(labels):
    os.makedirs(labels)
  
  if folder == test_folder:
    filenames = files[: int(test_percent * len(files))]
  else:
    filenames = files[int(test_percent * len(files)):]

  for file in filenames:
    try:
      os.chdir(labels)
      move(os.path.join(labs, file.rstrip('.jpg') + '.txt'), os.path.join(labels, file.rstrip('.jpg') + '.txt'))
      os.chdir(images)
      move(os.path.join(imgs, file), os.path.join(images, file))
    except Exception as e: print(e)

In [None]:
if len(glob('/content/yolo_data/val/images/*')) == len(glob('/content/yolo_data/val/labels/*')) > 0:
  print("Correct length in test data", len(glob('/content/yolo_data/val/images/*')))
if len(glob('/content/yolo_data/train/images/*')) == len(glob('/content/yolo_data/train/labels/*')) > 0:
  print("Correct length in train data", len(glob('/content/yolo_data/train/images/*')))

## Step 1: Preparing for training

Open `yolo_data/yolov5/utils/metrics.py` and replace `w = [0.0, 0.0, 0.1, 0.9]` on 19th row of the file with `[0.0, 0.1, 0.05, 0.85]` or `[0.0, 0.2, 0.05, 0.75]` to raise the valuability of the recall.


## Step 2: Training
### Step 2.1: First training using yolov5

To enlarge the number of epochs tune the `epochs` parameter.

In [9]:
# %cd yolov5
!python train.py --img 1024 --batch 4 --epochs 10 --data ../data.yaml --weights yolov5s.pt  
#--hyp ../hyp_evolve1.yaml

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=../data.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=10, batch_size=4, imgsz=1024, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.2-183-gc98128f Python-3.7.14 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=

### Step 2.2: Training using new weights and hyperparameters

The weights are usually saved in the folder `yolov5/runs/train/exp<INDEX>/weights/best.pt` and hyperparameters are saved in `yolov5/runs/evolve/exp/hyp_evolve.yaml`

In [None]:
!python train.py --img 1024 --batch 4 --epochs 10 --data ../data.yaml --weights runs/train/exp<INDEX>/weights/best.pt  --hyp runs/evolve/exp/hyp_evolve.yaml

## Step 3: Hyperparameter evolution

Updates hyperparameters to speed up the training.

If you want to evolve your current hyperparameters, add a parameter `--hyp` and a path to the hyperparameters path.

In [5]:
!python train.py --img 1024 --batch 4 --epochs 5 --data ../data.yaml --weights <weights_path> --evolve 5
# --hyp yolov5/runs/evolve/exp/hyp_evolve.yaml

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=data/coco128.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=5, batch_size=4, imgsz=1024, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=5, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.2-183-gc98128f Python-3.7.14 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma

## Step 4: Testing

In [None]:
!python detect.py --save-txt --weights <weights>.pt --source <test_folder> --line-thickness 1

In [None]:
!python val.py --save-txt --weights <weights>.pt --data ../data.yaml

[34m[1mval: [0mdata=../data.yaml, weights=['runs/train/exp9/weights/best.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=True, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v6.2-182-g1158a50 Python-3.7.14 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)

Fusing layers... 
Model summary: 157 layers, 7031701 parameters, 0 gradients, 15.8 GFLOPs
[34m[1mval: [0mScanning '/content/yolo_data/val/labels.cache' images and labels... 2 found, 0 missing, 0 empty, 0 corrupt: 100% 2/2 [00:00<?, ?it/s]
                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100% 1/1 [00:00<00:00,  2.85it/s]
                   all          2        184          0          0          0          0
Speed: 0.3ms pre-process, 20.3ms inference, 47.0ms NMS per image at shape (32, 3, 640, 640

## Warning!
If something went wrong, you can remove the folders, but be careful!

In [None]:
# import shutil

# shutil.rmtree('/content/yolo_data/datasets/')
# shutil.rmtree('/content/yolo_data/val/')
# shutil.rmtree('/content/yolo_data/train/')