In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/global-wheat-detection/sample_submission.csv
/kaggle/input/global-wheat-detection/train.csv
/kaggle/input/global-wheat-detection/test/796707dd7.jpg
/kaggle/input/global-wheat-detection/test/2fd875eaa.jpg
/kaggle/input/global-wheat-detection/test/cc3532ff6.jpg
/kaggle/input/global-wheat-detection/test/53f253011.jpg
/kaggle/input/global-wheat-detection/test/f5a1f0358.jpg
/kaggle/input/global-wheat-detection/test/51f1be19e.jpg
/kaggle/input/global-wheat-detection/test/aac893a91.jpg
/kaggle/input/global-wheat-detection/test/cb8d261a3.jpg
/kaggle/input/global-wheat-detection/test/51b3e36ab.jpg
/kaggle/input/global-wheat-detection/test/348a992bb.jpg
/kaggle/input/global-wheat-detection/train/944c60a15.jpg
/kaggle/input/global-wheat-detection/train/dd5dd0234.jpg
/kaggle/input/global-wheat-detection/train/72f8aaa4f.jpg
/kaggle/input/global-wheat-detection/train/69595016d.jpg
/kaggle/input/global-wheat-detection/train/28b8ba0aa.jpg
/kaggle/input/global-wheat-detection/train/2d6357

# YOLOv5

## Config

In [2]:
import seaborn as sns
import ast
from tqdm import trange, tqdm
from colorama import Fore
from enum import Enum
from glob import glob

# For Data preparation
from sklearn.preprocessing import *
from sklearn.model_selection import *
from sklearn.metrics import *

In [3]:
class Config(Enum):
    def __str__(self):
        return self.value
    
    TRAIN_CSV = '../input/global-wheat-detection/train.csv'
    TEST_CSV = "../input/global-wheat-detection/sample_submission.csv"
    TRAIN_DIR = "../input/global-wheat-detection/train"
    TEST_DIR = "../input/global-wheat-detection/test"
    OUTPUT_PATH = "./yolov5/output"
    IMG_SHAPE = 1024
    CONFIG_FILENAME = "ws_data"
    EPOCHS = 20
    BATCH_SIZE = 8

In [4]:
def process_data(data_df, image_id_col, bbox_col, label_col, path_col, config_filename = 'data', test_size = 0.2):
    """
    Build dataset for Yolo training
        + Expect form: (label, x_center, y_center, width, height)
        + return df_train, df_val
    """
    os.system('git clone https://github.com/ultralytics/yolov5.git')
    OUTPUT_FOLDER_NAME = Config.OUTPUT_PATH.value.split('/')[-1]
    if not os.path.exists(Config.OUTPUT_PATH.value):
        os.system(
            f'''
                cd ./yolov5
                mkdir {OUTPUT_FOLDER_NAME} 
                cd {OUTPUT_FOLDER_NAME}
                mkdir images
                mkdir labels
                cd images
                mkdir train
                mkdir validation
                cd ..
                cd labels
                mkdir train
                mkdir validation
                cd ../../
                tree {OUTPUT_FOLDER_NAME}
                cd ../
            '''
        )
    
    # Convert string form of list to original form
    data_df.bbox = data_df.bbox.apply(ast.literal_eval)
    
    # Encoding all labels
    mapper = {k: d for d, k in enumerate(set(data_df[label_col]))}
    data_df[label_col] = data_df[label_col].apply(lambda x: int(mapper[x]))
    
    # Group the bounding boxes wrt image_id, label_col and path_col
    data_df = data_df.groupby(by = [image_id_col, label_col, path_col])[bbox_col].apply(list).reset_index(name = bbox_col)
    
    # Divide data into train and val set
    df_train, df_val = train_test_split(data_df, test_size = test_size, random_state = 1234, shuffle = True)
    df_train = df_train.reset_index(drop = True)
    df_val = df_val.reset_index(drop = True)    
    
    print(f"[INFO] Train_SHAPE : {df_train.shape}, VAL_SHAPE: {df_val.shape}")
    data_dict = {'train': df_train, 'validation': df_val}
    for data_type, data in data_dict.items():
        for idx in trange(len(data), desc=f'Processing {data_type}...', bar_format="{l_bar}%s{bar:50}%s{r_bar}" % (Fore.CYAN, Fore.RESET), position=0, leave=True):
            row = data.iloc[idx]
            image_name = row[image_id_col]
            bounding_boxes = row[bbox_col]
            label = row[label_col]
            path = row[path_col]
            yolo_data = []
            for bbox in bounding_boxes:
                x = bbox[0]
                y = bbox[1]
                w = bbox[2]
                h = bbox[3]
                
                x_center = x + w / 2
                y_center = y + h / 2
                
                x_center, y_center, w, h = tuple(map(lambda x: x/Config.IMG_SHAPE.value, (x_center, y_center, w, h)))
                yolo_data.append([label, x_center, y_center, w, h])
                
            yolo_data = np.array(yolo_data)
            np.savetxt(
                f"{Config.OUTPUT_PATH.value}/labels/{data_type}/{image_name}.txt",
                yolo_data,
                fmt = ["%d", "%f", "%f", "%f", "%f"]
            )
            os.system(
                f"""
                cp {path} {Config.OUTPUT_PATH.value}/images/{data_type}/{path.split("/")[-1]}

                """
            )
    with open(f"./yolov5/{config_filename}.yaml", "w+") as file_:
        file_.write(
            f"""
            
            train: {OUTPUT_FOLDER_NAME}/images/train
            val: {OUTPUT_FOLDER_NAME}/images/validation
            nc: {len(mapper)}
            names: {list(mapper.keys())}
            
            """
        )
    file_.close()
    print("[INFO] Done with data processing")   

## Load data

In [5]:
df = pd.read_csv(Config.TRAIN_CSV.value)
df_test = pd.read_csv(Config.TEST_CSV.value)

df['path'] = df.image_id.apply(lambda x: f'{Config.TRAIN_DIR.value}/{x}.jpg')
df_test['path'] = df_test.image_id.apply(lambda x: f'{Config.TEST_DIR.value}/{x}.jpg')

df['label'] = ['Wheat']*len(df)

print(df.dtypes)
df.head()

image_id    object
width        int64
height       int64
bbox        object
source      object
path        object
label       object
dtype: object


Unnamed: 0,image_id,width,height,bbox,source,path,label
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1,../input/global-wheat-detection/train/b6ab77fd...,Wheat
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1,../input/global-wheat-detection/train/b6ab77fd...,Wheat
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1,../input/global-wheat-detection/train/b6ab77fd...,Wheat
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1,../input/global-wheat-detection/train/b6ab77fd...,Wheat
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1,../input/global-wheat-detection/train/b6ab77fd...,Wheat


## Train YOLO model

In [6]:
process_data(data_df = df, 
             image_id_col = "image_id", 
             bbox_col = "bbox", 
             label_col = "label",
             path_col = "path", 
             config_filename = Config.CONFIG_FILENAME.value)

Cloning into 'yolov5'...


output
├── images
│   ├── train
│   └── validation
└── labels
    ├── train
    └── validation

6 directories, 0 files
[INFO] Train_SHAPE : (2698, 4), VAL_SHAPE: (675, 4)


Processing train...: 100%|[36m██████████████████████████████████████████████████[39m| 2698/2698 [00:29<00:00, 90.92it/s] 
Processing validation...: 100%|[36m██████████████████████████████████████████████████[39m| 675/675 [00:07<00:00, 87.89it/s]

[INFO] Done with data processing





In [7]:
def train(model_name, config_filename, preTrainedWeights_path = None):
    mapper = {}
    for idx, model_ in enumerate(glob('yolov5/models/*yaml')):
        mapper[idx + 1] = model_
        print(f"{idx + 1} => {model_.split('/')[-1].split('.')[0]}")
    
    model = mapper[int(input(f'Select model from idx'))]
    if preTrainedWeights_path:
        os.system(
        f'''
        python yolov5/train.py --img {Config.IMG_SHAPE.value} --batch {Config.BATCH_SIZE.value} --epochs {Config.EPOCHS.value} --data yolov5/{config_filename}.yaml --cfg {model} --name {model_name} --weights {preTrainedweights_path}
        '''
        )
    else:
        os.system(
            f"""
                python yolov5/train.py --img {Config.IMG_SHAPE.value} --batch {Config.BATCH_SIZE.value} --epochs {Config.EPOCHS.value} --data yolov5/{config_filename}.yaml --cfg {model} --name {model_name}
            """
        )
train(model_name = 'ws_yolov5', config_filename = Config.CONFIG_FILENAME.value)

1 => yolov5m
2 => yolov5s
3 => yolov5n
4 => yolov5l
5 => yolov5x


Select model from idx 2


Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
[34m[1mtrain: [0mweights=yolov5/yolov5s.pt, cfg=yolov5/models/yolov5s.yaml, data=yolov5/ws_data.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=20, batch_size=8, imgsz=1024, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=ws_yolov5, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.1-40-gb0ba101 torch 1.9.1 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmu

[34m[1mWeights & Biases: [0mrun 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)
Downloading https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt to yolov5/yolov5s.pt...


100%|██████████| 14.1M/14.1M [00:00<00:00, 39.5MB/s]
Overriding model.yaml nc=80 with nc=1

                 from  n    params  module                                  arguments                     
  0                -1  1      3520  models.common.Conv                      [3, 32, 6, 2, 2]              
  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   
  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 
  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              
  6                -1  3    625152  models.common.C3                        [256, 256, 3]                 
  7                -1  1   1180672  models.common.Co




[34m[1mtrain: [0mScanning '/kaggle/working/yolov5/output/labels/train' images and labels...2698 found, 0 missing, 0 empty, 0 corrupt: 100%|██████████| 2698/2698 [00:01<00:00, 1533.83it/s]
[34m[1mtrain: [0mNew cache created: /kaggle/working/yolov5/output/labels/train.cache
[34m[1mval: [0mScanning '/kaggle/working/yolov5/output/labels/validation' images and labels...675 found, 0 missing, 0 empty, 0 corrupt: 100%|██████████| 675/675 [00:00<00:00, 748.70it/s]
[34m[1mval: [0mNew cache created: /kaggle/working/yolov5/output/labels/validation.cache
Plotting labels to yolov5/runs/train/ws_yolov5/labels.jpg... 

[34m[1mAutoAnchor: [0m5.72 anchors/target, 0.999 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅
Image sizes 1024 train, 1024 val
Using 2 dataloader workers
Logging results to [1myolov5/runs/train/ws_yolov5[0m
Starting training for 20 epochs...

     Epoch   gpu_mem       box       obj       cls    labels  img_size
      0/19     4.91G   0.07942 

In [8]:
def predict(images_path:"path to the test images", weights_path: "path to the weights folder"):
    """
    Helper function to make predictions over images using Yolo
    """
    os.system(
        f"""
            python yolov5/detect.py --source {images_path} --weights {weights_path}
        """)

predict(images_path = "../input/global-wheat-detection/test",
       weights_path = "yolov5/runs/train/ws_yolov54/weights/best.pt")

[34m[1mdetect: [0mweights=['yolov5/runs/train/ws_yolov54/weights/best.pt'], source=../input/global-wheat-detection/test, data=yolov5/data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=yolov5/runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 🚀 v6.1-40-gb0ba101 torch 1.9.1 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)

Traceback (most recent call last):
  File "yolov5/detect.py", line 252, in <module>
    main(opt)
  File "yolov5/detect.py", line 247, in main
    run(**vars(opt))
  File "/opt/conda/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 28, in decorate_context
    return func(*args, **kwargs)
  File "yolov5/detect.py", line 92, in run
    model = DetectMultiBackend(weights, device=device, d