In [1]:
import numpy as np 
import pandas as pd 
import os
from tqdm.auto import tqdm
import shutil as sh

from sklearn.model_selection import StratifiedKFold

# Data

In [2]:
# training data - bboxes
df = pd.read_csv('../input/global-wheat-detection/train.csv')
df.head(3)

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1


In [3]:
bboxs = np.stack(df['bbox'].apply(lambda x: np.fromstring(x[1:-1], sep=',')))
bboxs

array([[834., 222.,  56.,  36.],
       [226., 548., 130.,  58.],
       [377., 504.,  74., 160.],
       ...,
       [134., 228., 141.,  71.],
       [430.,  13., 184.,  79.],
       [875., 740.,  94.,  61.]])

In [4]:
# reformat to yolo 
for i, column in enumerate(['x', 'y', 'w', 'h']):
    df[column] = bboxs[:,i]
df.drop(columns=['bbox'], inplace=True)
df['x_center'] = df['x'] + df['w']/2
df['y_center'] = df['y'] + df['h']/2
df['classes'] = 0

df.head(3)


Unnamed: 0,image_id,width,height,source,x,y,w,h,x_center,y_center,classes
0,b6ab77fd7,1024,1024,usask_1,834.0,222.0,56.0,36.0,862.0,240.0,0
1,b6ab77fd7,1024,1024,usask_1,226.0,548.0,130.0,58.0,291.0,577.0,0
2,b6ab77fd7,1024,1024,usask_1,377.0,504.0,74.0,160.0,414.0,584.0,0


In [5]:
# stratify on source
fold_id = np.zeros((df.shape[0],1))

skf = StratifiedKFold(n_splits = 5, random_state = 42, shuffle = True)

for (ff, (train_index, test_index)) in enumerate(skf.split(df, df['source'])):
    fold_id[test_index]= int(ff)

df['fold'] = fold_id.copy()

In [6]:
df.head(3)

Unnamed: 0,image_id,width,height,source,x,y,w,h,x_center,y_center,classes,fold
0,b6ab77fd7,1024,1024,usask_1,834.0,222.0,56.0,36.0,862.0,240.0,0,0.0
1,b6ab77fd7,1024,1024,usask_1,226.0,548.0,130.0,58.0,291.0,577.0,0,2.0
2,b6ab77fd7,1024,1024,usask_1,377.0,504.0,74.0,160.0,414.0,584.0,0,3.0


In [7]:
df = df[['image_id','x', 'y', 'w', 'h','x_center','y_center','classes', 'fold']]



# Yolo preparation

The implementation from Ultralytics has some requirements on the structure of the dataset - where the annotations are stored and the folders for training / validation data. The creation of the folders in the code below is fairly straightforward, but a more inquisitive reader is encouraged to consult the official documentation: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data



In [8]:
source = 'train'

# pick a single fold for demonstration sake
fold = 0 

val_index = set(df[df['fold'] == fold]['image_id'])

# loop through the bounding boxes per image
for name,mini in tqdm(df.groupby('image_id')):    
    # where to save the files
    if name in val_index:
        path2save = 'valid/'
    else:
        path2save = 'train/'   
    # storage path for labels
    if not os.path.exists('convertor/fold{}/labels/'.format(fold)+path2save):
        os.makedirs('convertor/fold{}/labels/'.format(fold)+path2save)
    with open('convertor/fold{}/labels/'.format(fold)+path2save+name+".txt", 'w+') as f:
        # normalize the coordinates in accordance with the Yolo format requirements
        row = mini[['classes','x_center','y_center','w','h']].astype(float).values
        row = row/1024
        row = row.astype(str)
        for j in range(len(row)):
            text = ' '.join(row[j])
            f.write(text)
            f.write("\n")
    if not os.path.exists('convertor/fold{}/images/{}'.format(fold,path2save)):
        os.makedirs('convertor/fold{}/images/{}'.format(fold,path2save))
    # no preprocessing needed for images => copy them as a batch
    sh.copy("../input/global-wheat-detection/{}/{}.jpg".format(source,name),'convertor/fold{}/images/{}/{}.jpg'.format(fold,path2save,name))

  0%|          | 0/3373 [00:00<?, ?it/s]

# Model

Actual Yolo

In [9]:
!git clone https://github.com/ultralytics/yolov5  && cd yolov5 && pip install -r requirements.txt  

Cloning into 'yolov5'...
remote: Enumerating objects: 12530, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (12/12), done.[K
remote: Total 12530 (delta 7), reused 17 (delta 7), pack-reused 12511[K
Receiving objects: 100% (12530/12530), 11.62 MiB | 21.32 MiB/s, done.
Resolving deltas: 100% (8715/8715), done.
Collecting thop
  Downloading thop-0.0.31.post2005241907-py3-none-any.whl (8.7 kB)
Installing collected packages: thop
Successfully installed thop-0.0.31.post2005241907


In [10]:
# check the assigned GPU type
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
    print('and then re-execute this cell.')
    
else:
    print(gpu_info)

Tue Apr  5 23:11:00 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.119.04   Driver Version: 450.119.04   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [11]:
yaml_text = """train: /kaggle/working/convertor/fold0/images/train/
val: /kaggle/working/convertor/fold0/images/valid/

nc: 1
names: ['wheat']"""

In [12]:
with open("wheat.yaml", 'w') as f:
    f.write(yaml_text)
%cat wheat.yaml

train: /kaggle/working/convertor/fold0/images/train/
val: /kaggle/working/convertor/fold0/images/valid/

nc: 1
names: ['wheat']

In [13]:
!python ./yolov5/train.py --img 512 --batch 2 --epochs 3 --workers 2 --data wheat.yaml --cfg "./yolov5/models/yolov5s.yaml" --name yolov5x_fold0 --cache

Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5/yolov5s.pt, cfg=./yolov5/models/yolov5s.yaml, data=wheat.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=3, batch_size=2, imgsz=512, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=2, project=yolov5/runs/train, name=yolov5x_fold0, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[3

In [14]:
!ls ./yolov5/runs/train/yolov5x_fold0/weights/ -lh

total 28M
-rw-r--r-- 1 root root 14M Apr  5 23:16 best.pt
-rw-r--r-- 1 root root 14M Apr  5 23:16 last.pt


# Prediction

In [15]:
!ls /kaggle/input/global-wheat-detection/test

2fd875eaa.jpg  51b3e36ab.jpg  53f253011.jpg  aac893a91.jpg  cc3532ff6.jpg
348a992bb.jpg  51f1be19e.jpg  796707dd7.jpg  cb8d261a3.jpg  f5a1f0358.jpg


In [16]:
!python ./yolov5/detect.py --weights ./yolov5/runs/train/yolov5x_fold0/weights/best.pt --img 512 --conf 0.1 --source /kaggle/input/global-wheat-detection/test --save-txt --save-conf --exist-ok

[34m[1mdetect: [0mweights=['./yolov5/runs/train/yolov5x_fold0/weights/best.pt'], source=/kaggle/input/global-wheat-detection/test, data=yolov5/data/coco128.yaml, imgsz=[512, 512], conf_thres=0.1, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=yolov5/runs/detect, name=exp, exist_ok=True, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 🚀 v6.1-105-gd257c75 torch 1.9.1 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
image 1/10 /kaggle/input/global-wheat-detection/test/2fd875eaa.jpg: 512x512 2 wheats, Done. (0.018s)
image 2/10 /kaggle/input/global-wheat-detection/test/348a992bb.jpg: 512x512 2 wheats, Done. (0.018s)
image 3/10 /kaggle/input/global-wheat-detection/test/51b3e36ab.jpg: 512x512 Done. (0.018s)
im

In [17]:
!ls ./yolov5/runs/detect/exp/labels/

2fd875eaa.txt  53f253011.txt  aac893a91.txt  f5a1f0358.txt
348a992bb.txt  796707dd7.txt  cc3532ff6.txt


In [18]:
def convert(s):
    x = int(1024 * (s[1] - s[3]/2))
    y = int(1024 * (s[2] - s[4]/2))
    w = int(1024 * s[3])
    h = int(1024 * s[4])
    
    return(str(s[5]) + ' ' + str(x) + ' ' + str(y) + ' ' + str(w) + ' ' + str(h))

In [19]:
with open('submission.csv', 'w') as myfile:

    # prepare submission
    wfolder = './yolov5/runs/detect/exp/labels/'
    for f in os.listdir(wfolder):
        fname = wfolder + f
        xdat = pd.read_csv(fname, sep = ' ', header = None)
        outline = f[:-4] + ' ' + ' '.join(list(xdat.apply(lambda s: convert(s), axis = 1)))
        myfile.write(outline + '\n')
        
myfile.close()        

In [20]:
!cat submission.csv

53f253011 0.100472 61 669 961 57 0.106223 0 125 234 183 0.1082 96 696 928 126 0.108863 515 393 86 161 0.11459 31 0 167 209 0.120246 517 466 89 147
aac893a91 0.108037 376 435 325 188
796707dd7 0.235373 684 128 234 113
cc3532ff6 0.100443 406 752 144 108 0.102479 405 87 4 89 0.107173 576 537 138 94 0.113459 256 498 179 211 0.114847 836 618 186 65 0.121121 154 544 248 115 0.125105 40 567 483 199
2fd875eaa 0.101398 439 163 204 860 0.112546 807 440 216 323
348a992bb 0.100572 0 10 440 298 0.101236 344 445 401 211
f5a1f0358 0.102549 398 424 295 96
