# Install and import requirements

In [None]:
# !pip3 install -U pip
# !pip3 install -U setuptools wheel

# Install the proper version of PyTorch following https://pytorch.org/get-started/locally/
# !pip3 install torch==1.12.0+cu113 torchvision==0.13.0+cu113 torchtext==0.13.0 --extra-index-url https://download.pytorch.org/whl/cu113

!pip3 install autogluon

# For GPU users, CUDA 101
!pip3 install "mxnet_cu112<2.0.0, >=1.7.0"

In [None]:
import random
import os

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import torch
import torchvision
from torchvision.io import read_image
from torchvision.utils import draw_bounding_boxes

import autogluon.core as ag
from autogluon.vision import ObjectDetector

%matplotlib inline

In [None]:
!nvidia-smi

# Datasets

## Download

In [None]:
# upload your kaggle API token into files panel (cwd), then run the cell
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle

In [None]:
!rm -r datasets
!mkdir datasets

In [None]:
# https://www.kaggle.com/datasets/mbkinaci/fruit-images-for-object-detection?select=train_zip

!kaggle datasets download -d mbkinaci/fruit-images-for-object-detection -p datasets --unzip

In [None]:
# remove corrupted files

!rm datasets/train_zip/train/apple_17.*
!rm datasets/train_zip/train/apple_1.*
!rm datasets/train_zip/train/apple_20.*
!rm datasets/train_zip/train/apple_28.*
!rm datasets/train_zip/train/apple_37.*
!rm datasets/train_zip/train/apple_42.*
!rm datasets/train_zip/train/apple_43.*
!rm datasets/train_zip/train/apple_44.*
!rm datasets/train_zip/train/apple_8.*
!rm datasets/train_zip/train/banana_25.*
!rm datasets/train_zip/train/banana_35.*
!rm datasets/train_zip/train/banana_53.*
!rm datasets/train_zip/train/banana_57.*
!rm datasets/train_zip/train/banana_60.*
!rm datasets/train_zip/train/banana_71.*
!rm datasets/train_zip/train/banana_72.*
!rm datasets/train_zip/train/banana_75.*
!rm datasets/train_zip/train/mixed_16.*
!rm datasets/train_zip/train/orange_11.*
!rm datasets/train_zip/train/orange_13.*
!rm datasets/train_zip/train/orange_18.*
!rm datasets/train_zip/train/orange_1.*
!rm datasets/train_zip/train/orange_22.*
!rm datasets/train_zip/train/orange_30.*
!rm datasets/train_zip/train/orange_35.*
!rm datasets/train_zip/train/orange_42.*
!rm datasets/train_zip/train/orange_4.*
!rm datasets/train_zip/train/orange_50.*
!rm datasets/train_zip/train/orange_51.*
!rm datasets/train_zip/train/orange_59.*
!rm datasets/train_zip/train/orange_64.*
!rm datasets/train_zip/train/orange_68.*
!rm datasets/train_zip/train/orange_70.*

!rm datasets/test_zip/test/apple_79.*
!rm datasets/test_zip/test/apple_92.*
!rm datasets/test_zip/test/banana_87.*
!rm datasets/test_zip/test/orange_94.*

In [None]:
help(ObjectDetector.Dataset.from_voc)

In [None]:
!rm -r datasets/fruits
!mkdir --parents datasets/fruits/Annotations
!mkdir --parents datasets/fruits/ImageSets/Main
!mkdir --parents datasets/fruits/JPEGImages

In [None]:
!ls datasets/test_zip/test/*.jpg  | xargs -n 1 basename | while read f; do echo ${f%.jpg}; done > datasets/fruits/ImageSets/Main/test.txt
!ls datasets/train_zip/train/*.jpg  | xargs -n 1 basename | while read f; do echo ${f%.jpg}; done > datasets/fruits/ImageSets/Main/train.txt

In [None]:
!cp datasets/{test_zip/test,train_zip/train}/*.jpg datasets/fruits/JPEGImages
!cp datasets/{test_zip/test,train_zip/train}/*.xml datasets/fruits/Annotations

## Dataset objects

In [None]:
def show_bounding_boxes(image_path, rois):
    default_color = 'white'
    colors_map = {
        'motorbike': 'green',
        'person': 'red',
        'car': 'blue',
        'apple': 'red',
        'orange': 'orange',
        'banana': 'yellow',
    }
    get_color = lambda cls: colors_map.get(cls, default_color)

    img = mpimg.imread(image_path)
    h, w, c = img.shape
    abs_roi = [[
        int(roi['xmin'] * w),
        int(roi['ymin'] * h),
        int(roi['xmax'] * w),
        int(roi['ymax'] * h),
    ] for roi in rois]
    colors = [get_color(roi['class']) for roi in rois]

    # read input image from your computer
    annot_img = read_image(image_path)
    
    # bounding box are xmin, ymin, xmax, ymax
    box = torch.tensor(abs_roi, dtype=torch.int)
    
    # draw bounding box and fill color
    annot_img = draw_bounding_boxes(
        annot_img, box, width=3,
        colors=colors, fill=False
    )
    
    # transform this image to PIL image
    annot_img = torchvision.transforms.ToPILImage()(annot_img)
    
    # display output
    return annot_img

In [None]:
class Dataset:
    def __init__(self, name, path, train_split='train', test_split='test'):
        self.name = name

        self.train_data = ObjectDetector.Dataset.from_voc(path, splits=train_split)
        print(f'[{name}] Train data size: {len(self.train_data)}')
        self.test_data = ObjectDetector.Dataset.from_voc(path, splits=test_split)
        print(f'[{name}] Test data size: {len(self.test_data)}')

    def show_random_sample(self, split='train'):
        data = self.train_data if split == 'train' else self.test_data
        row = data.iloc[random.randint(0, len(data))]
        return show_bounding_boxes(row['image'], row['rois'])

In [None]:
datasets = []

fruits_dataset_path = os.path.join('datasets', 'fruits')
datasets.append(Dataset('fruits', fruits_dataset_path))

tiny_morotbike_url = 'https://autogluon.s3.amazonaws.com/datasets/tiny_motorbike.zip'
datasets.append(Dataset('motorbike', tiny_morotbike_url, train_split='trainval'))

In [None]:
datasets[0].show_random_sample('train')

In [None]:
datasets[1].show_random_sample('train')

# Benchmarking

In [None]:
class AutoGluonObjectDetectionAML:
    def set_dataset(self, dataset):
        self.dataset = dataset
        
    def fit(self, 
            time_budget=60*15,    # at most 0.5 hour
        ):
        print(
            f"""----------------------------
            start fitting on {self.dataset.name}
            ----------------------------""")
        self.aml = ObjectDetector(
            path=os.path.join('outputs', self.dataset.name)
        )

        # for demo 
        hyperparameters = {'epochs': 10, 'batch_size': 8}
        hyperparameter_tune_kwargs={'num_trials': 5}
        ##########

        self.aml.fit(
            self.dataset.train_data, 
            time_limit=time_budget, 
            hyperparameters=hyperparameters, 
            hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
            # presets='medium_quality_faster_train',
        )

        print('\nTrain summary:\n----------------------')
        print(self.aml.fit_summary())
    
    def evaluate_test(self):
        test_map = self.aml.evaluate(self.dataset.test_data)
        print("mAP on `{}` test dataset: {}".format(self.dataset.name, test_map[1][-1]))
        return test_map

    def predict_on_random_sample(self, confidence_threshold=0.7):
        data = self.dataset.test_data
        image_path = data.iloc[random.randint(0, len(data))]['image']
        result = self.aml.predict(image_path)
        result = result[result['predict_score'] > confidence_threshold]
        
        predicted_rois = []
        for i, row in result.iterrows():
            d = row['predict_rois']
            d['class'] = row['predict_class']
            predicted_rois.append(d)

        return show_bounding_boxes(image_path, predicted_rois), result

In [None]:
amls = []

for d in datasets:
    aml = AutoGluonObjectDetectionAML()
    aml.set_dataset(d)
    amls.append(aml)

In [None]:
# fitting

!rm -r outputs

for aml in amls:
    aml.fit()

In [None]:
# evaluating

for aml in amls:
    aml.evaluate_test()

In [None]:
# prediction
img, result = amls[0].predict_on_random_sample(0.5)
display(img)

In [None]:
img, result = amls[1].predict_on_random_sample(0.5)
display(img)