In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
  #      print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
pwd


'/kaggle/working'

In [3]:
%cd ../
!mkdir tmp
%cd tmp

/kaggle
/kaggle/tmp


In [4]:
# Download YOLOv5
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
# Install dependencies
%pip install -qr requirements.txt  # install dependencies

%cd ../
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Cloning into 'yolov5'...
remote: Enumerating objects: 10147, done.[K
remote: Counting objects: 100% (15/15), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 10147 (delta 4), reused 7 (delta 0), pack-reused 10132[K
Receiving objects: 100% (10147/10147), 10.42 MiB | 30.22 MiB/s, done.
Resolving deltas: 100% (7035/7035), done.
/kaggle/tmp/yolov5
Note: you may need to restart the kernel to use updated packages.
/kaggle/tmp
Setup complete. Using torch 1.9.1 (Tesla P100-PCIE-16GB)


In [5]:
pip install wandb

Note: you may need to restart the kernel to use updated packages.


In [6]:
import wandb

In [7]:
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()

# I have saved my API token with "wandb_api" as Label. 
# If you use some other Label make sure to change the same below. 
wandb_api = user_secrets.get_secret("nfl_wb") 

wandb.login(key=wandb_api)

[34m[1mwandb[0m: W&B API key is configured (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [8]:
# Install W&B 
!pip install -q --upgrade wandb


# Login 
#import wandb
print(wandb.__version__)
#wandb.login()

0.12.7


In [9]:
import wandb

wandb.init(project="test-project", entity="aml8850")

[34m[1mwandb[0m: Currently logged in as: [33marchanabc[0m (use `wandb login --relogin` to force relogin)


In [10]:
import os
import gc
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from shutil import copyfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [11]:
pwd

'/kaggle/tmp'

In [12]:
%cd ../
TRAIN_PATH = 'input/nfl-health-and-safety-helmet-assignment/images/'
IMG_SIZE = 256
BATCH_SIZE = 16
EPOCHS = 10

print(f'Number of extra images: {len(os.listdir(TRAIN_PATH))}') 

/kaggle
Number of extra images: 9947


In [13]:
# Load image level csv file
extra_df = pd.read_csv('input/nfl-health-and-safety-helmet-assignment/image_labels.csv')
print('Number of ground truth bounding boxes: ', len(extra_df))

# Number of unique labels
label_to_id = {label: i for i, label in enumerate(extra_df.label.unique())}
print('Unique labels: ', label_to_id)

# Group together bbox coordinates belonging to the same image. 
image_bbox_label = {} # key is the name of the image, value is a dataframe with label and bbox coordinates. 
for image, df in extra_df.groupby('image'): 
    image_bbox_label[image] = df.reset_index(drop=True)

# Visualize
extra_df.head(5)

Number of ground truth bounding boxes:  193736
Unique labels:  {'Helmet': 0, 'Helmet-Blurred': 1, 'Helmet-Difficult': 2, 'Helmet-Sideline': 3, 'Helmet-Partial': 4}


Unnamed: 0,image,label,left,width,top,height
0,57503_000116_Endzone_frame443.jpg,Helmet,1099,16,456,15
1,57503_000116_Endzone_frame443.jpg,Helmet,1117,15,478,16
2,57503_000116_Endzone_frame443.jpg,Helmet,828,16,511,15
3,57503_000116_Endzone_frame443.jpg,Helmet,746,16,519,16
4,57503_000116_Endzone_frame443.jpg,Helmet,678,17,554,17


In [14]:
# Create train and validation split.
train_names, valid_names = train_test_split(list(image_bbox_label), test_size=0.2, random_state=42)
print(f'Size of dataset: {len(image_bbox_label)},\
       training images: {len(train_names)},\
       validation images: {len(valid_names)}')

Size of dataset: 9947,       training images: 7957,       validation images: 1990


In [15]:
pwd

'/kaggle'

In [16]:
os.makedirs('tmp/nfl_extra/images/train', exist_ok=True)
os.makedirs('tmp/nfl_extra/images/valid', exist_ok=True)

os.makedirs('tmp/nfl_extra/labels/train', exist_ok=True)
os.makedirs('tmp/nfl_extra/labels/valid', exist_ok=True)

# Move the images to relevant split folder.
for img_name in tqdm(train_names):
    copyfile(f'{TRAIN_PATH}/{img_name}', f'tmp/nfl_extra/images/train/{img_name}')

for img_name in tqdm(valid_names):
    copyfile(f'{TRAIN_PATH}/{img_name}', f'tmp/nfl_extra/images/valid/{img_name}')

100%|██████████| 7957/7957 [01:00<00:00, 132.56it/s]
100%|██████████| 1990/1990 [00:14<00:00, 136.34it/s]


In [17]:
pwd

'/kaggle'

In [18]:
# Create .yaml file 
import yaml

data_yaml = dict(
    train = '../nfl_extra/images/train',
    val = '../nfl_extra/images/valid',
    nc = 5,
    names = list(extra_df.label.unique())
)

# Note that I am creating the file in the yolov5/data/ directory.
with open('tmp/yolov5/data/data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)
    
%cat tmp/yolov5/data/data.yaml

{names: [Helmet, Helmet-Blurred, Helmet-Difficult, Helmet-Sideline, Helmet-Partial],
  nc: 5, train: ../nfl_extra/images/train, val: ../nfl_extra/images/valid}


In [19]:
pwd

'/kaggle'

In [20]:
def get_yolo_format_bbox(img_w, img_h, box):
    """
    Convert the bounding boxes in YOLO format.
    
    Input:
    img_w - Original/Scaled image width
    img_h - Original/Scaled image height
    box - Bounding box coordinates in the format, "left, width, top, height"
    
    Output:
    Return YOLO formatted bounding box coordinates, "x_center y_center width height".
    """
    w = box.width # width 
    h = box.height # height
    xc = box.left + int(np.round(w/2)) # xmin + width/2
    yc = box.top + int(np.round(h/2)) # ymin + height/2

    return [xc/img_w, yc/img_h, w/img_w, h/img_h] # x_center y_center width height
    
# Iterate over each image and write the labels and bbox coordinates to a .txt file. 
for img_name, df in tqdm(image_bbox_label.items()):
    # open image file to get the height and width 
    img = cv2.imread(TRAIN_PATH+'/'+img_name)
    height, width, _ = img.shape 
    
    # iterate over bounding box df
    bboxes = []
    for i in range(len(df)):
        # get a row
        box = df.loc[i]
        # get bbox in YOLO format
        box = get_yolo_format_bbox(width, height, box)
        bboxes.append(box)
    
    if img_name in train_names:
        img_name = img_name[:-4]
        file_name = f'tmp/nfl_extra/labels/train/{img_name}.txt'
    elif img_name in valid_names:
        img_name = img_name[:-4]
        file_name = f'tmp/nfl_extra/labels/valid/{img_name}.txt'
        
    with open(file_name, 'w') as f:
        for i, bbox in enumerate(bboxes):
            label = label_to_id[df.loc[i].label]
            bbox = [label]+bbox
            bbox = [str(i) for i in bbox]
            bbox = ' '.join(bbox)
            f.write(bbox)
            f.write('\n')

100%|██████████| 9947/9947 [03:17<00:00, 50.32it/s]


In [21]:
%cd tmp/yolov5/

/kaggle/tmp/yolov5


In [22]:
#!add-apt-repository ppa:ubuntu-toolchain-r/test -y
#!apt-get update
#!apt-get upgrade libstdc++6 -y

In [23]:
!python train.py --img 720 \
                 --batch 16 \
                 --epochs 10 \
                 --data data.yaml \
                 --weights yolov5s.pt \
                 #--save_period 1 \
                 --project nfl2021
#test-project\--entity aml8850

Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
[34m[1mwandb[0m: Currently logged in as: [33marchanabc[0m (use `wandb login --relogin` to force relogin)
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
[34m[1mwandb[0m: Tracking run with wandb version 0.12.7
[34m[1mwandb[0m: Syncing run [33mlegendary-night-2[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/archanabc/YOLOv5[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/archanabc/YOLOv5/runs/3hfrmrzo[0m
[34m[1mwandb[0m: Run data is saved locally in /kaggle/tmp/yolov5/wandb/run-20211204_151739-3hfrmrzo
[34m[1mwandb[0m: Run `wandb offline` to turn off syncing.































[34m[1mwandb[0m: Waiting for W&B process to finish, PID 381... (success).
[34m[1mwandb[0m:                                                                                
[34m