# YoloV5 Training Notebook
Find the ready to use deployed version on Colab : https://colab.research.google.com/drive/1ITSR4jkFPuusqCM6Ob2yUkI43wmDLv0T?usp=sharing  
YoloV5 by default uses TensorBoard, some of my logs can be found on here: https://tensorboard.dev/experiment/0UjGUJATQ5CpbNuaBGOv8w/#scalars

## 1. Setup

In [None]:
# Necessary imports for Google Colab
!pip install --upgrade --force-reinstall --no-deps kaggle
!pip install --upgrade --force-reinstall --no-deps albumentations

In [None]:
!nvidia-smi

Fri Jul 17 17:19:18 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!mkdir "./drive/My Drive/global-wheat-detection/NEWyolov5"
%cd "./drive/My Drive/global-wheat-detection/NEWyolov5"

/content/drive/My Drive/global-wheat-detection/NEWyolov5


In [None]:
import os
os.environ['KAGGLE_USERNAME']="NAME"
os.environ['KAGGLE_KEY']="KEY"

In [None]:
!kaggle competitions download -c global-wheat-detection
!unzip global-wheat-detection.zip -d global-wheat-detection
!rm -r global-wheat-detection.zip

!kaggle datasets download -d qiyaowei/yolov5-files
!unzip yolov5-files -d yolov5-files
!rm -r yolov5-files.zip

!kaggle datasets download -d tanmaypandey/configyolov5-newpath
!unzip configyolov5-newpath -d configyolov5-newpath
!rm -r configyolov5-newpath.zip

In [None]:
!mkdir repos
%cd repos

In [None]:
%cd /content/
!unzip y5.zip -d yolov5
#!git clone https://github.com/ultralytics/yolov5  # This repo is frequently updated, to prevent compatibility issues in future use saved repo

In [None]:
!mv yolov5/* '/content/drive/My Drive/global-wheat-detection/NEWyolov5/repos/'

In [None]:
%cd '/content/drive/My Drive/global-wheat-detection/NEWyolov5/repos'

/content/drive/My Drive/global-wheat-detection/NEWyolov5/repos


In [None]:
!pip install -r requirements.txt

In [None]:
!pwd

/content/drive/My Drive/global-wheat-detection/NEWyolov5/repos


In [None]:
%cd ../
#%cd /content/drive/My Drive/global-wheat-detection/yolov5

/content/drive/My Drive/global-wheat-detection/NEWyolov5


In [None]:
import numpy as np
import pandas as pd 
import os
from tqdm.auto import tqdm
import shutil as sh
from sklearn.model_selection import StratifiedKFold

SEED = 42

def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(SEED)

## 2. Pre-Processing

In [None]:
marking = pd.read_csv('./global-wheat-detection/train.csv')

bboxs = np.stack(marking['bbox'].apply(lambda x: np.fromstring(x[1:-1], sep=',')))
for i, column in enumerate(['x', 'y', 'w', 'h']):
    marking[column] = bboxs[:,i]
marking.drop(columns=['bbox'], inplace=True)
marking['area'] = marking['w'] * marking['h']
marking=marking[marking['area']<100000].reset_index(drop=True)
del marking['area']
#marking.head()

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

df_folds = marking[['image_id']].copy()
df_folds.loc[:, 'bbox_count'] = 1
df_folds = df_folds.groupby('image_id').count()
df_folds.loc[:, 'source'] = marking[['image_id', 'source']].groupby('image_id').min()['source']
df_folds.loc[:, 'stratify_group'] = np.char.add(
    df_folds['source'].values.astype(str),
    df_folds['bbox_count'].apply(lambda x: f'_{x // 15}').values.astype(str)
)
df_folds.loc[:, 'fold'] = 0

for fold_number, (train_index, val_index) in enumerate(skf.split(X=df_folds.index, y=df_folds['stratify_group'])):
    df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = fold_number

#df_folds.head()



In [None]:
folds=df_folds.reset_index()
index=list(set(folds.image_id))

fold=0
val_fold=folds[folds['fold']==fold]
val_index=list(set(val_fold.image_id))

print(f'Train size: {len(index)-len(val_index)} \nVal size: {len(val_index)}')

Train size: 3035 
Val size: 338


In [None]:
df = pd.read_csv('./global-wheat-detection/train.csv')
bboxs = np.stack(df['bbox'].apply(lambda x: np.fromstring(x[1:-1], sep=',')))
for i, column in enumerate(['x', 'y', 'w', 'h']):
    df[column] = bboxs[:,i]
df.drop(columns=['bbox'], inplace=True)
df['x_center'] = df['x'] + df['w']/2
df['y_center'] = df['y'] + df['h']/2
df['classes'] = 0
df = df[['image_id','x', 'y', 'w', 'h','x_center','y_center','classes']]
df['area'] = df['w'] * df['h']
df=df[df['area']<100000].reset_index(drop=True)
del df['area']
#df.head()

In [None]:
%cd ./repos

/content/drive/My Drive/global-wheat-detection/NEWyolov5/repos


In [None]:
source = 'train'

fold=0    #Always 0 for the pipeline to work
for name,mini in tqdm(df.groupby('image_id')):
    if name in val_index:
        path2save = 'val2017/'
    else:
        path2save = 'train2017/'
    if not os.path.exists('convertor/fold{}/labels/'.format(fold)+path2save):
        os.makedirs('convertor/fold{}/labels/'.format(fold)+path2save)
    with open('convertor/fold{}/labels/'.format(fold)+path2save+name+".txt", 'w+') as f:
        row = mini[['classes','x_center','y_center','w','h']].astype(float).values
        row = row/1024
        row = row.astype(str)
        for j in range(len(row)):
            text = ' '.join(row[j])
            f.write(text)
            f.write("\n")
    if not os.path.exists('convertor/fold{}/images/{}'.format(fold,path2save)):
        os.makedirs('convertor/fold{}/images/{}'.format(fold,path2save))
    sh.copy("../global-wheat-detection/{}/{}.jpg".format(source,name),'convertor/fold{}/images/{}/{}.jpg'.format(fold,path2save,name))

HBox(children=(FloatProgress(value=0.0, max=3373.0), HTML(value='')))




## 3. Training
Produces records on TensorBoard, can also be accessed in repo folder after training

In [None]:
!python train.py --img 1024 --batch 4 --epochs 70 --data ../configyolov5-newpath/wheat0.yaml --cfg ../configyolov5-newpath/yolov5x.yaml --name yolov5x_4M70fold0 --weights ../yolov5-files/yolov5_weights/yolov5_models/yolov5x.pt
!rm -rf convertor

Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex
{'lr0': 0.01, 'momentum': 0.937, 'weight_decay': 0.0005, 'giou': 0.05, 'cls': 0.58, 'cls_pw': 1.0, 'obj': 1.0, 'obj_pw': 1.0, 'iou_t': 0.2, 'anchor_t': 4.0, 'fl_gamma': 0.0, 'hsv_h': 0.014, 'hsv_s': 0.68, 'hsv_v': 0.36, 'degrees': 0.0, 'translate': 0.0, 'scale': 0.5, 'shear': 0.0}
Namespace(adam=False, batch_size=4, bucket='', cache_images=False, cfg='../configyolov5-newpath/yolov5x.yaml', data='../configyolov5-newpath/wheat0.yaml', device='', epochs=70, evolve=False, img_size=[1024], multi_scale=False, name='yolov5x_4M70fold0', noautoanchor=False, nosave=False, notest=False, rect=False, resume=False, single_cls=False, weights='../yolov5-files/yolov5_weights/yolov5_models/yolov5x.pt')
Using CUDA device0 _CudaDeviceProperties(name='Tesla P100-PCIE-16GB', total_memory=16280MB)

2020-07-17 17:29:57.428574: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library 