In [None]:
import pandas as pd
from google.cloud import storage
import os 
from yolov5 import train 
import sys 
import yaml
from collections import OrderedDict

# Train YOLO v5

**Author:** Madhava Paliyam (madhavapaliyam@gmail.com)

**Description:** Downloads the images from train.csv and val.csv and trains a YOLO model on it. 

**Inputs**: Folder to save dataset to 

**Outputs**: Trained Model Weights

This is done through the following steps: 

NOTE: First perform a dvc pull in the repository to get the most up to date train/val sets

1. Read and download images to local directory
2. Run YOLOv5 Training Script
3. Run YOLOv5 Validation Script


In [19]:
#### SET PARAMETERS HERE #####
FOLDER = 'dataset'


!dvc pull -q -f

#### Read and download images to local directory

In [34]:
train_set = pd.read_csv('../data/train.csv')
val_set = pd.read_csv('../data/val.csv')

client = storage.Client()
gcloud_uploaded_bucket = client.bucket('street2sat-uploaded')

# This function downloads the images into directories as needed for training YOLO
def download_to_folder(folder, dataset):
    if not os.path.exists(folder):
        os.makedirs(os.path.join(folder, 'images'))
        os.makedirs(os.path.join(folder, 'labels'))

    for i,image in dataset.iterrows():
        # download image to directory 
        path = image['path'].replace('gs://street2sat-uploaded/', '')
        blob = gcloud_uploaded_bucket.blob(path)
        blob.download_to_filename(os.path.join(folder,'images',str(i) + '.jpg'))
        
        # create txt file and download 
        if isinstance(image['bounding_boxes'], str): 
            with open(os.path.join(folder, 'labels', str(i) + '.txt'), 'w') as f: 
                f.write(image['bounding_boxes'])
                

download_to_folder(os.path.join(FOLDER, 'train'), train_set)
download_to_folder(os.path.join(FOLDER, 'val'), val_set)


In [42]:
# open the classes to index dictionary
classes_dict = OrderedDict()
with open('../street2sat_utils/crop_info/classes.txt') as classes_file: 
    for i, line in enumerate(classes_file):
        classes_dict[line.strip()] = i


# Create yaml file 
path = os.path.abspath(FOLDER)
training_yaml = {'train' : os.path.join(path, 'train', 'images'), 
                    'val' : os.path.join(path, 'val', 'images'), 
                    'nc' : len(classes_dict), 
                    'names' : list(classes_dict.keys())}

with open(f"{FOLDER}/data_info.yaml", 'w') as file: 
    yaml.dump(training_yaml, file, default_flow_style=None)

#### Run YOLOv5 Training Script

In [61]:
# runs the detect script from yolov5 libary
# change run parameters here if doing experiments: 
# Full list here: https://github.com/ultralytics/yolov5/blob/master/train.py

train.run(data = f'{FOLDER}/data_info.yaml', imgsz = 800, weights = 'yolov5s.pt', multi_scale = False, batch = 2, epochs = 2)

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=dataset/data_info.yaml, hyp=, epochs=2, batch_size=8, imgsz=600, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=cpu, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=0, save_period=-1, local_rank=-1, mmdet_tags=False, entity=None, bbox_interval=-1, artifact_alias=latest, neptune_token=None, neptune_project=None, s3_upload_dir=None, upload_dataset=False, batch=2
[34m[1mWeights & Biases: [0mrun 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs
[34m[1mNeptune AI: [0mrun 'pip install neptune-client' to automatically track and visualize YOLOv5 🚀 runs


[34m[1mtrain: [0mScanning '/gpfs/data1/cmongp1/mpaliyam/street2sat/street2sat/notebooks/dataset/train/labels.cache' images and labels... 206 found, 177 missing, 0 empty, 0 corrupted: 100%|██████████| 383/383 [00:00<?, ?it/s]
[34m[1mval: [0mScanning '/gpfs/data1/cmongp1/mpaliyam/street2sat/street2sat/notebooks/dataset/val/labels.cache' images and labels... 60 found, 33 missing, 0 empty, 0 corrupted: 100%|██████████| 93/93 [00:00<?, ?it/s]


Plotting labels... 

[34m[1mautoanchor: [0mAnalyzing anchors... anchors/target = 3.71, Best Possible Recall (BPR) = 0.9976


       0/1    0.363G   0.06912  0.009045   0.02482        48       608: 100%|██████████| 48/48 [05:35<00:00,  7.00s/it]
               Class     Images     Labels          P          R     mAP@.5 mAP@.5:.95: 100%|██████████| 6/6 [00:34<00:00,  5.75s/it]


                 all         93          0          0          0          0          0


       1/1    0.363G   0.06846  0.008906    0.0245        32       608: 100%|██████████| 48/48 [04:53<00:00,  6.11s/it]
               Class     Images     Labels          P          R     mAP@.5 mAP@.5:.95: 100%|██████████| 6/6 [00:28<00:00,  4.82s/it]


                 all         93          0          0          0          0          0
Optimizer stripped from runs/train/exp13/weights/last.pt, 14.4MB
Optimizer stripped from runs/train/exp13/weights/best.pt, 14.4MB


               Class     Images     Labels          P          R     mAP@.5 mAP@.5:.95: 100%|██████████| 6/6 [00:28<00:00,  4.74s/it]


                 all         93          0          0          0          0          0


In [None]:
# TODO: Move model weights to folder and commit to dvc 