### Extract the data

In [None]:
from IPython.display import display,clear_output

In [None]:
import glob
import json
import math

import os
import random
import time
import zipfile

import cv2
import gluoncv
import matplotlib.pyplot as plt
import mxnet as mx
import numpy as np

In [None]:
plt.rcParams['figure.figsize'] = (15,15)
plt.rcParams['font.size'] = 15

# Import the images

In [None]:
data_dir = 'DataSet/'

In [None]:
train_images = glob.glob(data_dir+"*.jpg")

In [None]:
print("We have {} images".format(len(train_images)))

### Resize the images

In [None]:
from PIL import Image


def resize(picfile, small_edge=512):
    
    """resize image to a small edge of size small_edge
       and save it at same name if smallest edge bigger than small_edge"""
    
    im = Image.open(picfile)
    width, height = im.size
    smallest = min(width, height)

    ratio = smallest / small_edge
    print('pic ' + picfile + ': applying ratio of ' + str(ratio))
        
    new_width, new_height = int(width/ratio), int(height/ratio)
    print(new_width, new_height)
    im2 = im.resize((new_width, new_height), Image.ANTIALIAS)
    cheminsauvegarde = 'ResizedPics/' + os.path.basename(picfile)
    im2.save(cheminsauvegarde)
        
    return ratio

In [None]:
%%time

resize_meta = {}

for pic in train_images:
    if pic.lower().endswith('.jpg'):
        resize_meta[pic] = resize(pic)

Let's see how they look like

In [None]:
n_images = 12
cols = (int(math.sqrt(n_images)))*2
fig = plt.figure(figsize=(20,5))
for n, (image) in enumerate(train_images[:n_images]):
    image = plt.imread(image)
    a = fig.add_subplot(np.ceil(n_images/float(cols)), cols, n + 1)
    plt.imshow(image)
    plt.axis('off')
plt.subplots_adjust(wspace=0.06, hspace=0.06)
plt.show()

### Bounding boxes

## Fine-tuning network for Trash detection

Now that we have explored the dataset, let's get to work to be able to fine-tune our object detection model on this novel dataset

### Create a Gluon Dataset 

We need to handle the data loading so that we can feed our network the images and the targets during training.

We inherit from the base `Dataset` class from Gluon and create our own custom dataset that will return our images with the bounding box target information. We do a 80%, 15%, 5% split for training, validation and testing data

In [None]:
import gluoncv as gcv
from gluoncv.utils import viz

from mxnet import gluon, nd, autograd

In [None]:
resize_meta

In [None]:
class GTDataset(gluon.data.Dataset):
    """
    Custom Dataset to handle the TrashData Set
    """
    def __init__(self, split='train', data_path=data_dir):
        """
        Parameters
        ---------
        data_path: str, Path to the data folder, default 'data'
        split: str, Which dataset split to request, default 'train'
    
        """
        self.data_path = data_dir
        self.image_info = []
        with open(os.path.join('.', 'output.manifest'), errors='ignore') as f:
            lines = f.readlines()
            for line in lines:
                info = json.loads(line[:-1])
                if len(info['Unknown']['annotations']):
                    self.image_info.append(info)
      
        assert split in ['train', 'test', 'val']
        
        l = len(self.image_info)
        if split == 'train':
            self.image_info = self.image_info[:int(0.8*l)]
        if split == 'val':
            self.image_info = self.image_info[int(0.8*l):int(0.95*l)]
        if split == 'test':
            self.image_info = self.image_info[int(0.95*l):]

        
        
    def __getitem__(self, idx):
        """
        Parameters
        ---------
        idx: int, index requested

        Returns
        -------
        image: nd.NDArray
            The image 
        label: np.NDArray bounding box labels of the form [[x1,y1, x2, y2, class], ...]
        """
        info = self.image_info[idx]
        imagename = info['source-ref'].split('/')[-1]
        image = mx.image.imread(os.path.join('ResizedPics', imagename))
        boxes = info['Unknown']['annotations']
        label = []
        for box in boxes:
            label.append([int(box['left']/resize_meta[os.path.join(data_dir, imagename)]),
                          int(box['top']/resize_meta[os.path.join(data_dir, imagename)]),
                          int((box['left']+box['width'])/resize_meta[os.path.join(data_dir, imagename)]),
                          int((box['top']+box['height'])/resize_meta[os.path.join(data_dir, imagename)]),
                          box['class_id']])
        
        return image, np.array(label)
        
    def __len__(self):
        return len(self.image_info)

We have only one class the "bee" class now

In [None]:
classes=["palette"]

We get the dataset for each of the split. We will use the training split for training our model, the validation split to monitor our training for overfitting, and the testing split for the final qualitative evaluation

In [None]:
train_dataset = GTDataset(split='train')

In [None]:
validation_dataset = GTDataset(split='val')

In [None]:
test_dataset = GTDataset(split='test')

In [None]:
print("Example of bounding box label data [[x1,y1, x2, y2, class], ...] : {}".format(train_dataset[0][1]))

In [None]:
print("There is {} training images, {} validation images, {} testing images".format(len(train_dataset), len(validation_dataset), len(test_dataset)))

Because we now respect the same format as all other object detection dataset in GluonCV, we can take advantage of the vizualization functions!

In [None]:
image, label = test_dataset[random.randint(0, len(test_dataset) - 1)]
ax = viz.plot_bbox(image, bboxes=label[:, :4], labels=label[:, 4:5], class_names=classes)
plt.show()

# SSD:  Single Shot multibox Detector

SSD is a tried and tested model that gives us a good baseline for object detection. It is simple conceptually and fast and stable during training, that's why we pick it.
Refer to this [graph](https://gluon-cv.mxnet.io/model_zoo/detection.html) for a complete comparison of object detectors on accuracy / speed / memory consumption.

![](https://cdn-images-1.medium.com/max/1200/1*pPxrkm4Urz04Ez65mwWE9Q.png)

*source: [SSD: Single Shot MultiBox Detector](https://arxiv.org/abs/1512.02325), Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg, 2015*

We import some useful function from GluonCV:
The SSD default transforms for training (a lot of data augmentation) and the validation transform for resizing and normalization
The VOC07MApMetric to track the quality of the detection

In [None]:
from gluoncv.data.batchify import Tuple, Stack, Pad
from gluoncv.data.transforms.presets.ssd import SSDDefaultTrainTransform, SSDDefaultValTransform
from gluoncv.utils.metrics.voc_detection import VOC07MApMetric

In [None]:
batch_size = 24
image_size = 512
num_workers = 8
num_epochs = 100
ctx = [mx.gpu(0)] if mx.context.num_gpus() > 0 else [mx.cpu()]

In [None]:
ctx

In [None]:
gluoncv.__version__

### Network 

In [None]:
net = gcv.model_zoo.get_model('ssd_512_resnet50_v1_coco', pretrained=True)
net.reset_class(classes)

We can generate ahead of the time the targets for the difference between the anchor box and the ground truth bounding boxes, for that we need the anchor boxes

**Training data iterator**

In [None]:
with autograd.train_mode():
    _, _, anchors = net(mx.nd.zeros((1, 3, image_size, image_size)))
train_transform = SSDDefaultTrainTransform(image_size, image_size, anchors)
batchify_fn = Tuple(Stack(), Stack(), Stack())  # stack image, cls_targets, box_targets
train_data = gluon.data.DataLoader(train_dataset.transform(train_transform), batch_size, True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers)

**Validation data iterator**

In [None]:
val_transform = SSDDefaultValTransform(image_size, image_size)
batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
val_data = gluon.data.DataLoader(validation_dataset.transform(val_transform), batch_size, False, batchify_fn=batchify_fn, last_batch='keep', num_workers=num_workers)

### Training

We set a learning rate schedule, to decrease the learning rate by 3 after 5, 10 and 15 epochs.

In [None]:
steps_epochs = [50,70,80,90]
iterations_per_epoch = math.ceil(len(train_dataset) / batch_size)
steps_iterations = [s*iterations_per_epoch for s in steps_epochs]
print("Learning rate drops after iterations: {}".format(steps_iterations))
schedule = mx.lr_scheduler.MultiFactorScheduler(step=steps_iterations, factor=0.15)

We move the network to the right compute context and set the trainer with the right optimizer and learning rate schedule

In [None]:
net.collect_params().reset_ctx(ctx)
trainer = gluon.Trainer(
    net.collect_params(), 'sgd',
    {'learning_rate': 0.0001, 'wd': 0.0004, 'momentum': 0.9, 'lr_scheduler':schedule})

The object detection tasks combines losses for box localization and class detection

In [None]:
mbox_loss = gcv.loss.SSDMultiBoxLoss()
ce_metric = mx.metric.Loss('CrossEntropy')
smoothl1_metric = mx.metric.Loss('SmoothL1')

Measure the mean average precision, with the IoU (intersection over union) threshold of 0.5

In [None]:
def validate(net, val_data, ctx, classes, size):
    """
    Compute the mAP for the network on the validation data
    """
    metric = VOC07MApMetric(iou_thresh=0.5, class_names=classes)
    net.set_nms(0.2)
    for ib, batch in enumerate(val_data):
        
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        det_bboxes, det_ids, det_scores = [],[],[]
        gt_bboxes,gt_ids = [], []
        
        for x, y in zip(data, label):
            ids, scores, bboxes = net(x)
            det_ids.append(ids)
            det_scores.append(scores)
            det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
            gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
            gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
            
            metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids[0], None)
    return metric.get()

**main training loop**

In [None]:
%%time

#On cree le tableau pour stocker les mAP et les afficher dans un graphique
TabHistorymAP = []
TabEpoch = []

best_val = 0 
for epoch in range(num_epochs):
    net.hybridize(static_alloc=True, static_shape=True)
    #net.cast('float16')
    ce_metric.reset()
    smoothl1_metric.reset()
    tic, btic = time.time(), time.time()

    for i, batch in enumerate(train_data):

        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        cls_targets = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
        box_targets = gluon.utils.split_and_load(batch[2], ctx_list=ctx, batch_axis=0)

        with autograd.record():
            cls_preds, box_preds = [], []
            for x in data:
                cls_pred, box_pred, _ = net(x)
                cls_preds.append(cls_pred)
                box_preds.append(box_pred)
            sum_loss, cls_loss, box_loss = mbox_loss(cls_preds, box_preds, cls_targets, box_targets)
            autograd.backward(sum_loss)

        trainer.step(1)
        ce_metric.update(0, [l * batch_size for l in cls_loss])
        smoothl1_metric.update(0, [l * batch_size for l in box_loss])
        name1, loss1 = ce_metric.get()
        name2, loss2 = smoothl1_metric.get()

        #if i % 20 == 0:
        #    print('[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}={:.3f}, {}={:.3f}'.format(epoch, i, batch_size/(time.time()-btic), name1, loss1, name2, loss2))
        btic = time.time()
        
    name, val = validate(net, val_data, ctx, classes, image_size)
    
    #A priori on a la moyenne des mAP des classes directement à la fin du tableau
    meanAP = val[0]

    #On stock l'hisotrique des résultats par Epoch
    TabHistorymAP.append(meanAP)
    TabEpoch.append(epoch)
    
    clear_output(wait=True)
    plt.plot(TabEpoch, TabHistorymAP, color='black', label='mAP')
    plt.legend()

    plt.show() # affiche la figure a l'ecran
    
    
    print('[Epoch {}] Training cost: {:.3f}, Learning rate {}, mAP={:.3f}'.format(epoch, (time.time()-tic), trainer.learning_rate, val[0]))
    
    # If validation accuracy improve, save the parameters
    if val[0] > best_val:
        net.save_parameters('ssd_resnet.palette.params')
        best_val = val[0]
        print("Saving the parameters, best mAP {}".format(best_val))


## Testing

Let's test our model on the set aside testing images!

In [None]:
net.load_parameters('ssd_resnet.palette.params', ctx=ctx)

In [None]:
net.set_nms(0.45)

In [None]:
for info in test_dataset.image_info:
    test_path = os.path.join(data_dir,info['source-ref'].split('/')[-1])
    x, image = gcv.data.transforms.presets.ssd.load_test(test_path, image_size)
    cid, score, bbox = net(x.as_in_context(ctx[0]))
    ax = viz.plot_bbox(image, bbox[0], score[0], cid[0], class_names=classes, thresh=0.6)
    plt.axis('off')
    plt.show()