In [1]:
from google.cloud import storage
import pandas as pd
from yolov5 import val
import sys
import yaml
import os
import shutil
import reverse_geocoder as rg
from collections import OrderedDict

# Analyze Predictions

**Author:** Madhava Paliyam (madhavapaliyam@gmail.com)

**Description:** Analyzes the model output using the YOLOv5 validation script. Breaks down model performance by crop type for a given region. 


**Inputs**: Region to analyze, train/val dataset csv

**Outputs**: YOLOv5 validation performance 



In [9]:
!dvc pull -q -f

[0m

In [10]:
#### SET PARAMETERS HERE #####

# Country to analyze, choose from US, KE, UG
COUNTRY = 'KE'

# download folder 
FOLDER = 'dataset'

# save output images here
SAVE_FOLDER = 'runs/val'

# exp name 
EXP_NAME = 'all'

# path to model weights 
model_weights_path = '/gpfs/data1/cmongp1/mpaliyam/street2sat/yolov5/runs/train/exp18/weights/best.pt'  

# confidence threshold 
conf_thresh = .0001

# IOU threshold
iou_thresh = .1

#### Run following cells to download images from particular country

In [12]:
train_set = pd.read_csv('../data/train.csv')
val_set = pd.read_csv('../data/val.csv')
database_info = pd.read_csv('../data/database-info.csv', index_col = 0)

client = storage.Client()
gcloud_uploaded_bucket = client.bucket('street2sat-uploaded')

# This function downloads the images into directories as needed for training YOLO
def download_to_folder(folder, dataset, country_code):
    if not os.path.exists(folder):
        os.makedirs(os.path.join(folder, 'images'))
        os.makedirs(os.path.join(folder, 'labels'))

    for i,image in dataset.iterrows():
        # download image to directory if country code matches 
        match = database_info[database_info['input_img'] == image['path']]
        assert len(match) == 1, 'Multiple images found for same path!'

        if match.iloc[0]['cc'] != country_code:
            continue 
        print(image['path'])
        path = image['path'].replace('gs://street2sat-uploaded/', '')
        blob = gcloud_uploaded_bucket.blob(path)
        blob.download_to_filename(os.path.join(folder,'images',str(i) + '.jpg'))
        
        # create txt file and download 
        if isinstance(image['bounding_boxes'], str): 
            with open(os.path.join(folder, 'labels', str(i) + '.txt'), 'w') as f: 
                f.write(image['bounding_boxes'])
                

download_to_folder(os.path.join(FOLDER, 'train'), train_set, COUNTRY)
download_to_folder(os.path.join(FOLDER, 'val'), val_set, COUNTRY)


  exec(code_obj, self.user_global_ns, self.user_ns)


gs://street2sat-uploaded/KENYA/2021_07_10_T2/110GOPRO/GPED4063.JPG
gs://street2sat-uploaded/KENYA/2021_07_15_T2/103GOPRO/GPCV6094.JPG
gs://street2sat-uploaded/KENYA/2021_07_13_T2/106GOPRO/GPBY2219.JPG
gs://street2sat-uploaded/KENYA/2021-07-06-T1/GPBC3759.JPG
gs://street2sat-uploaded/KENYA/2021-07-23-T1/GPAR2964.JPG
gs://street2sat-uploaded/KENYA/2021_07_10_T2/111GOPRO/GPET5778.JPG
gs://street2sat-uploaded/KENYA/2021-08-03-T1/GPAJ5589.JPG
gs://street2sat-uploaded/KENYA/2021_07_12_T2/105GOPRO/GPCR0235.JPG
gs://street2sat-uploaded/KENYA/2021-07-17-T1/GPQP2454.JPG
gs://street2sat-uploaded/KENYA/2021-08-02-T1/GPAM4051.JPG
gs://street2sat-uploaded/KENYA/2021-07-26-T1/GPAE8743.JPG
gs://street2sat-uploaded/KENYA/2021-07-06-T1/GPBO4072.JPG
gs://street2sat-uploaded/KENYA/2021_07_12_T2/103GOPRO/GPCA9066.JPG
gs://street2sat-uploaded/KENYA/2021_07_10_T2/111GOPRO/GPES5766.JPG
gs://street2sat-uploaded/KENYA/2021-08-04-T1/GPAS9205.JPG
gs://street2sat-uploaded/KENYA/2021_07_12_T2/108GOPRO/GPEE3512.JPG


In [13]:
# open the classes to index dictionary
classes_dict = OrderedDict()
with open('../street2sat_utils/crop_info/classes.txt') as classes_file: 
    for i, line in enumerate(classes_file):
        classes_dict[line.strip()] = i


# Create yaml file 
path = os.path.abspath(FOLDER)
training_yaml = {'train' : os.path.join(path, 'train', 'images'), 
                    'val' : os.path.join(path, 'val', 'images'), 
                    'nc' : len(classes_dict), 
                    'names' : list(classes_dict.keys())}

with open(f"{FOLDER}/data_info.yaml", 'w') as file: 
    yaml.dump(training_yaml, file, default_flow_style=None)

folder = os.path.abspath(FOLDER)
yaml_file = os.path.join(folder, 'data_info.yaml')

### Runs validation script with parameters

https://github.com/ultralytics/yolov5/blob/63ddb6f0d06f6309aa42bababd08c859197a27af/val.py#L319


In [14]:
# runs the validate script from yolov5 libary
to_parse = f"val.py --data {yaml_file} " \
            + f"--weights {model_weights_path} " \
            + f"--batch-size {1} " \
            + f"--imgsz {800} " \
            + f"--conf-thres {conf_thresh} " \
            + f"--iou-thres {iou_thresh} " \
            + f"--verbose " \
            + f"--exist-ok " \
            + f"--task val " \
            # + f"--device cpu"\

to_parse = to_parse.split()
sys.argv = to_parse 
val.main()

YOLOv5 🚀 2022-2-16 torch 1.10.2+cu102 CUDA:0 (Tesla V100-PCIE-16GB, 16160.5MB)



[34m[1mval: [0mdata=/gpfs/data1/cmongp1/mpaliyam/street2sat/street2sat/notebooks/dataset/data_info.yaml, weights=['/gpfs/data1/cmongp1/mpaliyam/street2sat/yolov5/runs/train/exp18/weights/best.pt'], batch_size=1, imgsz=800, conf_thres=0.0001, iou_thres=0.1, task=val, device=, single_cls=False, augment=False, verbose=True, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=True, half=False


Fusing layers... 
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Model Summary: 369 layers, 20919810 parameters, 0 gradients, 48.2 GFLOPs
[34m[1mval: [0mScanning '/gpfs/data1/cmongp1/mpaliyam/street2sat/street2sat/notebooks/dataset/val/labels' images and labels...20 found, 5 missing, 0 empty, 0 corrupted: 100%|██████████| 25/25 [00:00<00:00, 71.85it/s]
[34m[1mval: [0mNew cache created: /gpfs/data1/cmongp1/mpaliyam/street2sat/street2sat/notebooks/dataset/val/labels.cache
               Class     Images     Labels          P          R     mAP@.5 mAP@.5:.95: 100%|██████████| 25/25 [00:03<00:00,  6.73it/s]


                 all         25         60       0.34      0.362      0.312      0.136
              banana         25          1          0          0          0          0
               maize         25         54      0.491      0.685      0.536      0.228
           sugarcane         25          5       0.53        0.4      0.401       0.18
Speed: 0.2ms pre-process, 10.5ms inference, 5.0ms NMS per image at shape (1, 3, 800, 800)
Results saved to [1mruns/val/exp[0m


![alt text](runs/val/exp/confusion_matrix.png "Confusion Matrix")

![alt text](runs/val/exp/F1_curve.png "F1")