In [None]:
# Going to use TrashCAN 1.0 Dataset (https://conservancy.umn.edu/handle/11299/214865)
# Specifically, use TrashCAN-Material sub-dataset with the following classes:

'''
 'rov': 0,
 'plant': 1,
 'animal_fish': 2,
 'animal_starfish': 3,
 'animal_shells': 4,
 'animal_crab': 5,
 'animal_eel': 6,
 'animal_etc': 7,
 'trash_etc': 8,
 'trash_fabric': 9,
 'trash_fishing_gear': 10,
 'trash_metal': 11,
 'trash_paper': 12,
 'trash_plastic': 13,
 'trash_rubber': 14,
 'trash_wood': 15
'''

# Reference paper: https://arxiv.org/pdf/2007.08097.pdf

# This dataset includes both detection and segmentation labels (going to just focus on detection)

In [None]:
# for debugging
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
import json
from PIL import Image
from PIL import ImageDraw
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torchvision
import pycocotools
import numpy as np
import copy
import math

# pickle compatibility:
# because lower pandas version used in colab, make sure pickle files are protocol = 4

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
os.chdir('/content/gdrive/MyDrive/UTD')

In [None]:
!git clone https://github.com/ultralytics/yolov5 

Cloning into 'yolov5'...
remote: Enumerating objects: 6123, done.[K
remote: Counting objects: 100% (143/143), done.[K
remote: Compressing objects: 100% (82/82), done.[K
remote: Total 6123 (delta 82), reused 96 (delta 61), pack-reused 5980[K
Receiving objects: 100% (6123/6123), 8.72 MiB | 10.16 MiB/s, done.
Resolving deltas: 100% (4171/4171), done.


In [None]:
!pip install -r yolov5/requirements.txt

Collecting PyYAML>=5.3.1
[?25l  Downloading https://files.pythonhosted.org/packages/7a/a5/393c087efdc78091afa2af9f1378762f9821c9c1d7a22c5753fb5ac5f97a/PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636kB)
[K     |████████████████████████████████| 645kB 6.5MB/s 
Collecting thop
  Downloading https://files.pythonhosted.org/packages/6c/8b/22ce44e1c71558161a8bd54471123cc796589c7ebbfc15a7e8932e522f83/thop-0.0.31.post2005241907-py3-none-any.whl
Installing collected packages: PyYAML, thop
  Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully installed PyYAML-5.4.1 thop-0.0.31.post2005241907


In [None]:
# Data Preparation

train = pd.read_pickle('UTD_train.pkl')

if os.getcwd() != '/content/gdrive/MyDrive/UTD/data/labels/train':
    os.chdir('/content/gdrive/MyDrive/UTD/data/labels/train')

for fn in train.file_name.unique():
    with open(fn[:-4] + '.txt', 'w') as label_file:
        temp = train[train.file_name == fn]
        labels = list(temp.category_id.values)
        bbox = list(temp.bbox.values)
        # need to remove any duplicate bounding boxes:
        # if there are any duplicates, remove the corresponding label from labels:
        dup_idx = [idx for idx, item in enumerate(bbox) if item in bbox[:idx]]
        for d in dup_idx:
            labels.pop(d)
        
        bset = set(map(tuple, bbox))
        b_final = list(map(list, bset))
        b_final.sort(key = lambda x: bbox.index(x)) # maintain original order so matches with category
        bbox = b_final
        
        im_width = list(temp.width.values)
        im_height = list(temp.height.values)
        
        for i, label in enumerate(labels):
            line = str(label - 1)
            
            # currently: x_min, y_min, width, height
            # need each box to have format: x_center, y_center, width, height
            width = bbox[i][-2]
            height = bbox[i][-1]

            x_center = (bbox[i][0] + 0.5*width)  # x_center = x_min + 0.5*width
            y_center = (bbox[i][1] + 0.5*height) # y_center = y_min + 0.5*height
            
            # standardization (to get each bbox value between 0 and 1)
            # divide x_center and width by image width, and y_center and height by image height
            
            width /= im_width[i]
            height /= im_height[i]
            
            x_center /= im_width[i]
            y_center /= im_height[i]
            
            # because some bounding boxes extend the edge of the image, need to adjust standardized values
            # which are greater than 1
            
            if x_center <= 0.0:
                x_center = 0.001
            if x_center >= 1.0:
                x_center = 0.999
                
            if y_center <= 0.0:
                y_center = 0.001
            if y_center >= 1.0:
                y_center = 0.999
                
            if width <= 0.0:
                width = 0.001
            if width >= 1.0:
                width = 0.999
                
            if height <= 0.0:
                height = 0.001
            if height >= 1.0:
                height = 0.999
            
            line += ' ' + str(x_center) + ' ' + str(y_center) + ' ' + str(width) + ' ' + str(height)
            print(line)
            
            label_file.write(line + '\n')

In [None]:
# Data Preparation

val = pd.read_pickle('UTD_val.pkl')

if os.getcwd() != '/content/gdrive/MyDrive/UTD/data/labels/val':
    os.chdir('/content/gdrive/MyDrive/UTD/data/labels/val')

for fn in val.file_name.unique():
    with open(fn[:-4] + '.txt', 'w') as label_file:
        temp = val[val.file_name == fn]
        labels = list(temp.category_id.values)
        bbox = list(temp.bbox.values)
        # need to remove any duplicate bounding boxes:
        # if there are any duplicates, remove the corresponding label from labels:
        dup_idx = [idx for idx, item in enumerate(bbox) if item in bbox[:idx]]
        for d in dup_idx:
            labels.pop(d)
        
        bset = set(map(tuple, bbox))
        b_final = list(map(list, bset))
        b_final.sort(key = lambda x: bbox.index(x)) # maintain original order so matches with category
        bbox = b_final
        
        im_width = list(temp.width.values)
        im_height = list(temp.height.values)
        
        for i, label in enumerate(labels):
            line = str(label - 1)
            
            # currently: x_min, y_min, width, height
            # need each box to have format: x_center, y_center, width, height
            width = bbox[i][-2]
            height = bbox[i][-1]

            x_center = (bbox[i][0] + 0.5*width)  # x_center = x_min + 0.5*width
            y_center = (bbox[i][1] + 0.5*height) # y_center = y_min + 0.5*height
            
            # standardization (to get each bbox value between 0 and 1)
            # divide x_center and width by image width, and y_center and height by image height
            
            width /= im_width[i]
            height /= im_height[i]
            
            x_center /= im_width[i]
            y_center /= im_height[i]
            
            # because some bounding boxes extend the edge of the image, need to adjust standardized values
            # which are greater than 1
            
            if x_center <= 0.0:
                x_center = 0.001
            if x_center >= 1.0:
                x_center = 0.999
                
            if y_center <= 0.0:
                y_center = 0.001
            if y_center >= 1.0:
                y_center = 0.999
                
            if width <= 0.0:
                width = 0.001
            if width >= 1.0:
                width = 0.999
                
            if height <= 0.0:
                height = 0.001
            if height >= 1.0:
                height = 0.999
            
            line += ' ' + str(x_center) + ' ' + str(y_center) + ' ' + str(width) + ' ' + str(height)
            print(line)
            
            label_file.write(line + '\n')

In [None]:
torch.cuda.is_available() # check GPU availability

True

In [None]:
# trains the model
# Running this line will create a sub-folder in yolov5 that contains the weights of the trained model
if os.getcwd() != '/content/gdrive/MyDrive/UTD/yolov5':
  os.chdir('/content/gdrive/MyDrive/UTD/yolov5')

!python train.py --img 480 --rect --batch 32 --epochs 200 --data '/content/gdrive/MyDrive/UTD/dataset.yaml' --cfg '/content/gdrive/MyDrive/UTD/yolov5/models/yolov5x.yaml' \
--weights yolov5x.pt --name utd_yolo5_t7 --cache


YOLOv5 🚀 v5.0-59-g31ee54c torch 1.8.1+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)

Namespace(adam=False, artifact_alias='latest', batch_size=32, bbox_interval=-1, bucket='', cache_images=True, cfg='/content/gdrive/MyDrive/UTD/yolov5/models/yolov5x.yaml', data='/content/gdrive/MyDrive/UTD/dataset.yaml', device='', entity=None, epochs=200, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[480, 480], label_smoothing=0.0, linear_lr=False, local_rank=-1, multi_scale=False, name='utd_yolo5_t7', noautoanchor=False, nosave=False, notest=False, project='runs/train', quad=False, rect=True, resume=False, save_dir='runs/train/utd_yolo5_t7', save_period=-1, single_cls=False, sync_bn=False, total_batch_size=32, upload_dataset=False, weights='yolov5x.pt', workers=8, world_size=1)
[34m[1mtensorboard: [0mStart with 'tensorboard --logdir runs/train', view at http://localhost:6006/
2021-05-02 23:53:42.189168: I tensorflow/stream_executor/platf

In [None]:
# trying it out with a test video:

if os.getcwd() != '/content/gdrive/MyDrive/UTD/yolov5':
  os.chdir('/content/gdrive/MyDrive/UTD/yolov5')

!python detect.py --source /content/gdrive/MyDrive/UTD/bali.mp4 --weights '/content/gdrive/MyDrive/UTD/yolov5/runs/train/utd_yolo5_t7/weights/best.pt' --img 480 --conf 0.1 --save-txt --project infer_video_t7


Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.1, device='', exist_ok=False, hide_conf=False, hide_labels=False, img_size=480, iou_thres=0.45, line_thickness=3, name='exp', nosave=False, project='infer_video_t7', save_conf=False, save_crop=False, save_txt=True, source='/content/gdrive/MyDrive/UTD/bali.mp4', update=False, view_img=False, weights=['/content/gdrive/MyDrive/UTD/yolov5/runs/train/utd_yolo5_t7/weights/best.pt'])
YOLOv5 🚀 v5.0-59-g31ee54c torch 1.8.1+cu101 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)

Fusing layers... 
Model Summary: 476 layers, 87299629 parameters, 0 gradients, 217.4 GFLOPS
video 1/1 (1/4615) /content/gdrive/MyDrive/UTD/bali.mp4: 288x480 Done. (0.055s)
video 1/1 (2/4615) /content/gdrive/MyDrive/UTD/bali.mp4: 288x480 1 trash_etc, Done. (0.045s)
video 1/1 (3/4615) /content/gdrive/MyDrive/UTD/bali.mp4: 288x480 1 trash_etc, Done. (0.082s)
video 1/1 (4/4615) /content/gdrive/MyDrive/UTD/bali.mp4: 288x480 1 trash_etc, Done. (0.060s)
video 1/1

In [None]:
# genetic algorithm for hyper param tuning:

# won't be feasible: will take 150 hrs on the GPU since this line will need to be repeated for 300 iterations

#!python train.py --epochs 10 --data '/content/gdrive/MyDrive/UTD/dataset.yaml' --weights yolov5x.pt --cache --evolve

In [None]:
# inference on validation images

if os.getcwd() != '/content/gdrive/MyDrive/UTD/yolov5':
  os.chdir('/content/gdrive/MyDrive/UTD/yolov5')

!python detect.py --source /content/gdrive/MyDrive/UTD/data/images/val/ --weights '/content/gdrive/MyDrive/UTD/yolov5/runs/train/utd_yolo5_t7/weights/best.pt' --img 480 --conf 0.5 --save-txt


Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.5, device='', exist_ok=False, hide_conf=False, hide_labels=False, img_size=480, iou_thres=0.45, line_thickness=3, name='exp', nosave=False, project='runs/detect', save_conf=False, save_crop=False, save_txt=True, source='/content/gdrive/MyDrive/UTD/data/images/val/', update=False, view_img=False, weights=['/content/gdrive/MyDrive/UTD/yolov5/runs/train/utd_yolo5_t7/weights/best.pt'])
YOLOv5 🚀 2021-5-1 torch 1.8.1+cu101 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)

Fusing layers... 
Model Summary: 476 layers, 87299629 parameters, 0 gradients, 217.4 GFLOPS
image 1/1204 /content/gdrive/MyDrive/UTD/data/images/val/vid_000002_frame0000013.jpg: 288x480 1 trash_etc, Done. (0.084s)
image 2/1204 /content/gdrive/MyDrive/UTD/data/images/val/vid_000002_frame0000014.jpg: 288x480 1 trash_etc, Done. (0.046s)
image 3/1204 /content/gdrive/MyDrive/UTD/data/images/val/vid_000002_frame0000015.jpg: 288x480 1 trash_etc, Done. (0.047s)
image