<a href="https://colab.research.google.com/github/96Asch/cv-bicycle-detection/blob/master/CV_Bike_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Bike detection using a Mask RCNN pretrained model
In this notebook we fine tune a Mask RCNN model, trained on the MSCOCO dataset, on our own created bike dataset. This dataset has already been pre-generated and is stored on a publicly accesible drive.

You can run the notebook in sequence except for when downgrading Keras. More instruction on this will be below.

#Install required packages

Here we clone the Mask RCNN repo and install the requirements from the given requirements.txt file

In [None]:
%cd
  
!git clone --quiet https://github.com/matterport/Mask_RCNN.git

In [None]:
%cd ~/Mask_RCNN

!pip install -q PyDrive
!pip install -r requirements.txt
!pip install 'h5py<3.0.0'
!python setup.py install

#Download and extract dataset
This downloads the COCO_Bikes dataset from Google Drive and extracts it into Mask_RCNN/dataset/

In [None]:
%cd ~/Mask_RCNN


fileId = '14EAGfdRPSuIcrB3nmvXOfjtICFNALnMT'

import os
from zipfile import ZipFile
from shutil import copy
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

if not os.path.exists('./dataset'):
  os.makedirs('dataset')
os.chdir('dataset')

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

if not os.path.exists('./COCO_Bikes'):
  fileName = fileId + '.zip'
  downloaded = drive.CreateFile({'id': fileId})
  downloaded.GetContentFile(fileName)
  ds = ZipFile(fileName)
  ds.extractall()
  os.remove(fileName)
  print('Extracted zip file ' + fileName)


# Download the pretrained weights
We fetch the MSCOCO pretrained weights of Mask RCNN


In [None]:
%cd ~

!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5

#Configure the the Config and Dataset 

The Config class handles the configuration of the training.
The Dataset class handles the import of our custom dataset

In [None]:
%cd ~/Mask_RCNN
%tensorflow_version 1.x
import os
import argparse
import tensorflow as tf
import datetime
import warnings

import numpy as np
from pycocotools.coco import COCO
import sys


from mrcnn.config import Config
from mrcnn import utils

sys.path.append(os.path.join("/root/Mask_RCNN", "samples/coco/"))  # To find local version
import coco

class DelftBikesConfig(Config):
    NAME = "delft-bikes"
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = 1 + 1  # background + bike
    STEPS_PER_EPOCH = 100
    LEARNING_RATE = 0.001
  

class COCODelftBikes(coco.CocoDataset):

    def load(self, dataset_dir: str, subset: str) -> None:

        path = os.path.join(dataset_dir, subset)
        annotation = os.path.join(dataset_dir, f"{subset}.json")

        coco = COCO(annotation)
        class_ids = sorted(coco.getCatIds())
        
        image_ids = []
        for id in class_ids:
            image_ids.extend(list(coco.getImgIds(catIds=[id])))
        # Remove duplicates
        image_ids = list(set(image_ids))

        self.add_class("coco", 1, coco.loadCats(1)[0]["name"])
        
        
          # Add images
        for i in image_ids:
            self.add_image(
                "coco", image_id=i,
                path=os.path.join(path, coco.imgs[i]['file_name']),
                width=coco.imgs[i]["width"],
                height=coco.imgs[i]["height"],
                annotations=coco.loadAnns(coco.getAnnIds(
                    imgIds=[i], catIds=class_ids, iscrowd=False)))

    def load_mask(self, image_id):
        """Load instance masks for the given image.
        Different datasets use different ways to store masks. This
        function converts the different mask format to one format
        in the form of a bitmap [height, width, instances].
        Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a COCO image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "coco":
            return super(coco.CocoDataset, self).load_mask(image_id)

        instance_masks = []
        class_ids = []
        annotations = self.image_info[image_id]["annotations"]
        # Build mask of shape [height, width, instance_count] and list
        # of class IDs that correspond to each channel of the mask.
        for annotation in annotations:
            class_id = annotation['category_id']
            if class_id:
                m = self.annToMask(annotation, image_info["height"],
                                   image_info["width"])
                # Some objects are so small that they're less than 1 pixel area
                # and end up rounded out. Skip those objects.
                if m.max() < 1:
                    continue
                # Is it a crowd? If so, use a negative class ID.
                if annotation['iscrowd']:
                    # Use negative class ID for crowds
                    class_id *= -1
                    # For crowd masks, annToMask() sometimes returns a mask
                    # smaller than the given dimensions. If so, resize it.
                    if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
                        m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
                instance_masks.append(m)
                class_ids.append(class_id)

        # Pack instance masks into an array
        if class_ids:
            mask = np.stack(instance_masks, axis=2).astype(np.bool)
            return mask, np.ones([mask.shape[-1]], dtype=np.uint8)
        else:
            # Call super class to return an empty mask
            return super(coco.CocoDataset, self).load_mask(image_id)

###  Check if the GPU is running

In [None]:
%tensorflow_version 1.x



import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  device_name = '/CPU:0'

print('Using: {}'.format(device_name))

# Train the model
Here we begin training the model, by first enabling Tensorboard in order to visualize each loss later on.

In [None]:
%load_ext tensorboard

#### Re-install the correct version of Keras
For some reason, the built-in version of Keras gets reset when using the tensorflow version command. So we install the correct version: 2.2.5.
You must the install below and reset the runtime to change the versions. Then re-run the install cell and go back to the config and dataset and continue from there.

In [None]:
!pip install keras==2.2.5
!pip list | grep Keras

We load the pretrained model and begin training using the dataset and config files specified before.

The amount of epochs can be changed in the train function below. For our experiment, we chose an epoch=100, however since it saves the weights after each epoch, you can stop training if tensorboard shows that the training and validation losses are diverging.

In [None]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning) 
warnings.filterwarnings("ignore", category=DeprecationWarning) 
import mrcnn.model as modellib

config = DelftBikesConfig()
model_save_dir = os.path.join("/content", "exps")
pretrained_model_file = "/root/mask_rcnn_coco.h5"
data_set_dir = "/root/Mask_RCNN/dataset/COCO_Bikes"

with tf.device(device_name):
    model = modellib.MaskRCNN(mode='training', config=config, model_dir=model_save_dir)

assert os.path.exists(pretrained_model_file)
    
model.load_weights(pretrained_model_file, by_name=True, exclude=[ "mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])

dataset_train = COCODelftBikes()
dataset_train.load(data_set_dir, 'train')
dataset_train.prepare()

dataset_val = COCODelftBikes()
dataset_val.load(data_set_dir, 'test')
dataset_val.prepare()

model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=100,
            layers='all')

# Visualize the losses using TensorBoard
You should edit the path after --logdir and put the path to folder created by Mask RCNN which contains the tfevents file

In [None]:
%tensorboard --logdir /content/exps

# Download the ValBikes Dataset


In [None]:
%cd ~/Mask_RCNN
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from zipfile import ZipFile
from shutil import copy

fileId = "1rTPgMfAB9T33utZZ34tIuqaHaZEg0byV"

if not os.path.exists('./dataset'):
  os.makedirs('dataset')
os.chdir('dataset')

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

if not os.path.exists('./ValBikes'):
  fileName = fileId + '.zip'
  downloaded = drive.CreateFile({'id': fileId})
  downloaded.GetContentFile(fileName)
  ds = ZipFile(fileName)
  ds.extractall()
  os.remove(fileName)
  print('Extracted zip file ' + fileName)

# Download the two fine-tuned models
Here we will retrieve the models that were fine-tuned on our augmented dataset, one model was trained with the default LR=0.001 and the other with a LR=0.0001

In [None]:
%cd ~

maskLR0001_id = '1ClTIBdFRNMLb9utSMjOEGh7s9Uci76Y2'
maskLR00001_id = '11VD9mWNN9YK8ZI4Rm2myDLRVAC1EUv_z'

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

if not os.path.exists('./maskLR0001.h5'):
  downloaded = drive.CreateFile({'id': maskLR0001_id})
  downloaded.GetContentFile('maskLR0001.h5')

if not os.path.exists('./maskLR00001.h5'):
  downloaded = drive.CreateFile({'id': maskLR00001_id})
  downloaded.GetContentFile('maskLR00001.h5')

# Run tests on the three models
For these tests, we will calculate the mean average precision (mAP) and the mean intersection over union (mIOU). Luckily, Mask RCNN already provides us with these metrics in the utils file.

In [None]:
class InferenceConfigAllClass(coco.CocoConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = 81

class InferenceConfigBikeClass(coco.CocoConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = 2


class ValBikes(coco.CocoDataset):

    def load(self, dataset_dir: str) -> None:

        path = os.path.join(dataset_dir, "val")
        annotation = os.path.join(dataset_dir, "valbikes.json")

        coco = COCO(annotation)
        class_ids = sorted(coco.getCatIds())
        
        image_ids = []
        for id in class_ids:
            image_ids.extend(list(coco.getImgIds(catIds=[id])))
        # Remove duplicates
        image_ids = list(set(image_ids))

        self.add_class("coco", 1, coco.loadCats(1)[0]["name"])
        
          # Add images
        for i in image_ids:
            self.add_image(
                "coco", image_id=i,
                path=os.path.join(path, coco.imgs[i]['file_name']),
                width=coco.imgs[i]["width"],
                height=coco.imgs[i]["height"],
                annotations=coco.loadAnns(coco.getAnnIds(
                    imgIds=[i], catIds=class_ids, iscrowd=None)))

    def load_mask(self, image_id):
        """Load instance masks for the given image.
        Different datasets use different ways to store masks. This
        function converts the different mask format to one format
        in the form of a bitmap [height, width, instances].
        Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a COCO image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "coco":
            return super(coco.CocoDataset, self).load_mask(image_id)

        instance_masks = []
        class_ids = []
        annotations = self.image_info[image_id]["annotations"]
        # Build mask of shape [height, width, instance_count] and list
        # of class IDs that correspond to each channel of the mask.
        for annotation in annotations:
            class_id = annotation['category_id']
            if class_id:
                m = self.annToMask(annotation, image_info["height"],
                                   image_info["width"])
                # Some objects are so small that they're less than 1 pixel area
                # and end up rounded out. Skip those objects.
                if m.max() < 1:
                    continue
                # Is it a crowd? If so, use a negative class ID.
                if annotation['iscrowd']:
                    # Use negative class ID for crowds
                    class_id *= -1
                    # For crowd masks, annToMask() sometimes returns a mask
                    # smaller than the given dimensions. If so, resize it.
                    if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
                        m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
                instance_masks.append(m)
                class_ids.append(class_id)

        # Pack instance masks into an array
        if class_ids:
            mask = np.stack(instance_masks, axis=2).astype(np.bool)
            return mask, np.ones([mask.shape[-1]], dtype=np.uint8)
        else:
            # Call super class to return an empty mask
            print("empty mask")
            return super(coco.CocoDataset, self).load_mask(image_id)

In [None]:
%cd ~/Mask_RCNN
%tensorflow_version 1.x
import os
import tensorflow as tf
import warnings
import mrcnn.model as modellib
import numpy as np
from pycocotools.coco import COCO

model_save_dir = os.path.join('/content', 'model')
baseline_model_file = "/root/mask_rcnn_coco.h5"
lr0001_model_file = "/root/maskLR0001.h5"
lr00001_model_file = "/root/maskLR00001.h5"

allClassConfig = InferenceConfigAllClass()
bikeClassConfig = InferenceConfigBikeClass()

with tf.device(device_name):
    baseline = modellib.MaskRCNN(mode='inference', config=allClassConfig, model_dir=model_save_dir)
    model_LR0001 = modellib.MaskRCNN(mode='inference', config=bikeClassConfig, model_dir=model_save_dir)
    model_LR00001 = modellib.MaskRCNN(mode='inference', config=bikeClassConfig, model_dir=model_save_dir)

baseline.load_weights(baseline_model_file, by_name=True)
model_LR0001.load_weights(lr0001_model_file, by_name=True)
model_LR00001.load_weights(lr00001_model_file, by_name=True)    


In [None]:


def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

coco_class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
               'bus', 'train', 'truck', 'boat', 'traffic light',
               'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
               'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
               'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
               'kite', 'baseball bat', 'baseball glove', 'skateboard',
               'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
               'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
               'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
               'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
               'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
               'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
               'teddy bear', 'hair drier', 'toothbrush']



In [None]:
from mrcnn import utils

data_set_dir = os.path.join("/root", "Mask_RCNN", "dataset", "ValBikes")

valbikes = ValBikes()
valbikes.load(data_set_dir)
valbikes.prepare()

coco_dataset = coco.CocoDataset()
coco_dataset.load_coco(os.path.join("/root", "Mask_RCNN", "dataset", "COCO"), "minival", class_ids=[2], auto_download=False)
coco_dataset.prepare()

##Calculate the APs and IOUs
To change the test dataset, choose one of the datasets above to run evaluation

In [None]:
from tqdm import tqdm
from mrcnn import visualize 

# Choose either coco_dataset or valbikes to run evaluation
dataset = coco_dataset
config = InferenceConfigAllClass()

baseline_APs = []
baseline_IOUs = []

m1_APs = []
m1_IOUs = []

m2_APs = []
m2_IOUs = []

for image_id in tqdm(dataset.image_ids):

    # Load image and ground truth data
  image, _, gt_class_id, gt_bbox, gt_mask = \
      modellib.load_image_gt(dataset, config, image_id)

  # Run object detection
  def eval_model(model, image, is_bike_model=False):
    r = model.detect([image], verbose=0)[0]

    class_ids = r["class_ids"]
    if not is_bike_model:
      class_ids = np.array([1 if id == 2 else id for id in class_ids])
    AP, _, _, _ = utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                        r["rois"], class_ids, r["scores"], r['masks'], iou_threshold=0.5)
    IOU = utils.compute_overlaps(r["rois"], gt_bbox)
    return AP, IOU


  b_AP, b_iou = eval_model(baseline, image)
  baseline_APs.append(b_AP)
  baseline_IOUs.append(b_iou)

  m1_AP, m1_iou = eval_model(model_LR0001, image, True)
  m1_APs.append(m1_AP)
  m1_IOUs.append(m1_iou)

  m2_AP, m2_iou = eval_model(model_LR00001, image, True)
  m2_APs.append(m2_AP)
  m2_IOUs.append(m2_iou)

print("Done")

Here we take the ious calculated in the last step and discard all the empty IoUs (where the detector could not find a bicycle), replacing them with 0. We then take the mean of all ious to get our mIoU metric

In [None]:
def iou_mean(IOUs):
  return np.mean([np.mean(iou) if iou.size else 0 for iou in IOUs])

b_ious = iou_mean(baseline_IOUs)
m1_ious = iou_mean(m1_IOUs)
m2_ious = iou_mean(m2_IOUs)

In [None]:

import csv

header = ['Model', 'mAP', "mIOU"]

with open('/content/val_metrics.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)

    # write the header
    writer.writerow(header)

    # write the data
    writer.writerow(['Baseline', np.nanmean(baseline_APs), b_ious])
    writer.writerow(['DelftBikes (LR=0.001, ep=100)', np.nanmean(m1_APs), m1_ious])
    writer.writerow(['DelftBikes (LR=0.0001, ep=100)', np.nanmean(m2_APs), m2_ious])


#Visual Examples

In [None]:
image, _, gt_class_id, gt_bbox, gt_mask = \
    modellib.load_image_gt(valbikes, config, 10)

r = model_LR0001.detect([image], verbose=1)[0]

visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
                            coco_class_names, r['scores'])