# YOLOv5 training notebook

**Note:** Training was conducted on Kaggle.

This notebook is part of the repository:
[https://gitlab.com/a-potemkin/allium-cepa-tool](https://gitlab.com/a-potemkin/allium-cepa-tool)

In [None]:
import os
import random
import shutil
import yaml

import torch

## 1 Download the YOLOv5 model

In [None]:
%mkdir tmp
%cd tmp

In [None]:
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
%pip install -qr requirements.txt  # install
# <-- here, you may need to restart

In [None]:
%cd ../

In [None]:
%cd yolov5
from yolov5 import utils  
display = utils.notebook_init()  # checks

## 2 Auxiliary code

In [None]:
# Проверка
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
# Install W&B 
!pip install -q --upgrade wandb
# Login 
import wandb
wandb.login()

In [None]:
!apt-get install -yy software-properties-common
!add-apt-repository -yy ppa:ubuntu-toolchain-r/test
!apt-get update -yy
!apt-get install -yy gcc-4.9
!apt-get upgrade -yy libstdc++6

## 3 Split the dataset

In [None]:
%pwd
%cd ../

In [None]:
random.seed(135)
torch.manual_seed(135)

In [None]:
# Splitting parameters
train_frac = 0.8
val_frac = 0.1

# Spliting the original dataset into training and validation set
root = '../../input/all-yolo-gray'
images = [filename for filename in os.listdir(
          os.path.join(root, 'images'))]
images.sort()

# Shuffling the list. The random seed was set above.
random.shuffle(images)

# Calculating the training set size
train_size = int(train_frac * len(images))
val_size = int(val_frac * len(images))

training_set_filenames = images[:train_size]
validation_set_filenames = images[train_size:train_size+val_size]
test_set_filenames = images[train_size+val_size:]

print(f'The training set length: {len(training_set_filenames)}\n'
      f'The validation set length: {len(validation_set_filenames)}\n'
      f'The test set length: {len(test_set_filenames)}')

Create the folder structure:

In [None]:
root_images = os.path.join(root, 'images')
root_labels = os.path.join(root, 'labels')

In [None]:
if os.path.exists('dataset'):
    shutil.rmtree('dataset')
else:
    print('There is not such a folder')

os.makedirs('dataset/images/train', exist_ok=True)
os.makedirs('dataset/images/valid', exist_ok=True)
os.makedirs('dataset/images/test', exist_ok=True)
os.makedirs('dataset/labels/train', exist_ok=True)
os.makedirs('dataset/labels/valid', exist_ok=True)
os.makedirs('dataset/labels/test', exist_ok=True)


for filename in training_set_filenames:
    txt_filename = os.path.splitext(filename)[0] + '.txt'
    
    shutil.copyfile(
        os.path.join(root_images, filename),
        os.path.join('dataset/images/train', filename)
    )
    shutil.copyfile(
        os.path.join(root_labels, txt_filename),
        os.path.join('dataset/labels/train', txt_filename)
    )

for filename in validation_set_filenames:
    txt_filename = os.path.splitext(filename)[0] + '.txt'
    
    shutil.copyfile(
        os.path.join(root_images, filename),
        os.path.join('dataset/images/valid', filename)
    )
    shutil.copyfile(
        os.path.join(root_labels, txt_filename),
        os.path.join('dataset/labels/valid', txt_filename)
    )

for filename in test_set_filenames:
    txt_filename = os.path.splitext(filename)[0] + '.txt'
    
    shutil.copyfile(
        os.path.join(root_images, filename),
        os.path.join('dataset/images/test', filename)
    )
    shutil.copyfile(
        os.path.join(root_labels, txt_filename),
        os.path.join('dataset/labels/test', txt_filename)
    )

In [None]:
# %cd ../
# !zip test_labels.zip -r tmp/dataset/labels/test

In [None]:
# %cd tmp

Create a YAML file required for YOLOv5:

In [None]:
data_yaml = dict(
    train = '../dataset/images/train',
    val = '../dataset/images/valid',
    test = '../dataset/images/test',
    nc = 2,
    names = ['DIV', 'NOTDIV']
)

with open('dataset/data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)
    
%cat dataset/data.yaml

## 4 Setting up the YOLOv5 model

In [None]:
%cd yolov5

In [None]:
orig = 'data/hyps/hyp.scratch-low.yaml'
dest = '../dataset/myhyp.yaml'

shutil.copyfile(orig, dest)

In [None]:
!grep -Fn 'time_limit = 0.3 + 0.03 * bs' utils/general.py

In [None]:
#!chmod +rwx utils/general.py
!sed -i "s/time_limit = 0.3 + 0.03 \* bs/time_limit = 1 + 3 \* bs/" utils/general.py

In [None]:
# Learning parameters
!sed -i "s/lr0: 0.01/lr0: 0.001/" ../dataset/myhyp.yaml
!sed -i "s/fl_gamma: 0.0/fl_gamma: 0.0/" ../dataset/myhyp.yaml

# Remove augmentations
!sed -i "s/fliplr: 0.5/fliplr: 0.0/" ../dataset/myhyp.yaml
!sed -i "s/mosaic: 1.0/mosaic: 0.0/" ../dataset/myhyp.yaml
!sed -i "s/scale: 0.5/scale: 0.0/" ../dataset/myhyp.yaml
!sed -i "s/translate: 0.1/translate: 0.0/" ../dataset/myhyp.yaml

In [None]:
!sed -i "/A.Blur(p=0.01)/d" utils/augmentations.py
!sed -i "/A.MedianBlur(p=0.01)/d" utils/augmentations.py

In [None]:
# Add augmentations
!sed -i "/A.ToGray(p=0.01),/a \	\	A.VerticalFlip(p=0.2)," utils/augmentations.py
!sed -i "/A.ToGray(p=0.01),/a \	\	A.HorizontalFlip(p=0.2)," utils/augmentations.py
!sed -i "/A.ToGray(p=0.01),/a \	\	A.RandomRotate90(p=0.2)," utils/augmentations.py
!sed -i "/A.ToGray(p=0.01),/a \	\	A.OneOf([A.Rotate(limit=20, p=1), A.RandomResizedCrop(2048, 2048, scale=(0.8, 1), p=1)], p=0.2)," utils/augmentations.py
!sed -i "/A.ToGray(p=0.01),/a \	\	A.OneOf([A.MedianBlur(p=1, blur_limit=5), A.GaussianBlur(p=1), A.GaussNoise(p=1), A.Sharpen(p=1)], p=0.3)," utils/augmentations.py
!sed -i "s/A.RandomBrightnessContrast(p=0.0),/A.RandomBrightnessContrast(p=0.1),/" utils/augmentations.py

In [None]:
!sed -i "/A.ToGray(p=0.01),/d" utils/augmentations.py

In [None]:
!cat utils/augmentations.py

## 5 Training

In [None]:
IMG_SIZE = 2048
BATCH_SIZE = 6
EPOCHS = 300

In [None]:
!python train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data ../dataset/data.yaml \
                 --weights yolov5s.pt \
                 --hyp ../dataset/myhyp.yaml \
                 --optimizer Adam \
                 --save-period 5 \
                 --project "yolov5-allium" \
                 --name "experiment"

In [None]:
%cd ../
%cd ../

In [None]:
!ls 'tmp/yolov5/yolov5-allium/experiment/weights'

Save weights:

In [None]:
!zip train_results.zip tmp/yolov5/yolov5-allium/experiment/weights/*st.pt

## 6 Evaluation on a test sample

In [None]:
%cd tmp/yolov5

In [None]:
BEST = 'yolov5-allium/experiment/weights/best.pt'

In [None]:
!ls ../dataset/images/test

In [None]:
!python val.py --weights best.pt \
               --img {IMG_SIZE} \
               --batch 1 \
               --data ../dataset/data.yaml \
               --weights {BEST} \
               --project "yolov5-allium" \
               --name "test" \
               --save-txt \
               --task test \
               --conf-thres 0.3

In [None]:
!ls yolov5-allium/test/labels

In [None]:
%cd ..
%cd ..

In [None]:
!zip test_results.zip -r tmp/yolov5/yolov5-allium/test

## 7 Inference

In [None]:
%cd tmp/yolov5

In [None]:
SOURCE = '../dataset/images/test'  # For example

In [None]:
!python detect.py --weights {BEST} --img {IMG_SIZE} --conf 0.3 --save-txt --source {SOURCE}

In [None]:
!ls runs/detect/exp

In [None]:
!ls runs/detect/exp/labels

In [None]:
%cd ..
%cd ..

In [None]:
!zip inference_results.zip -r tmp/yolov5/runs/detect/exp