## Configuration Script

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# 安装最新版本Keras
# https://keras.io/
#!pip install keras
#最新版 keras==2.3.0 2020.06.27
# 指定版本安装
#!pip install keras==2.0.8
!pip install keras==2.1.5

%tensorflow_version 1.x
!pip install imgaug==0.2.5
!pip install gast==0.2.2

# 安装 OpenCV
# https://opencv.org/
!apt-get -qq install -y libsm6 libxext6 && pip install -q -U opencv-python
# 安装 Pytorch
# http://pytorch.org/
!pip install --upgrade pip
!pip install folium==0.2.1
!pip install numpy --upgrade
!pip install torch==1.5.1
# 安装 XGBoost
# https://github.com/dmlc/xgboost
!pip install -q xgboost
# 安装 7Zip
!apt-get -qq install -y libarchive-dev && pip install -q -U libarchive
# 安装 GraphViz 和 PyDot
!apt-get -qq install -y graphviz && pip install -q pydot

#import tensorflow
#print(tensorflow.__version__)

!pip install --upgrade pip
!pip install folium==0.2.1
!pip install numpy --upgrade
!pip install torch==1.5.1

import os
#from google.colab import drive
#drive.mount('/content/drive',force_remount=True)
 
path = "/content/drive/My Drive/Colab Notebooks/Mask_RCNN-master"
os.chdir(path)

!pip install -r requirements.txt

!python setup.py install

#path = "/content/drive/My Drive/Colab Notebooks/coco-master/PythonAPI"
#os.chdir(path)

#!make

## Mask R-CNN

In [None]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import json
import datetime
import skimage.draw
import imgaug.augmenters as iaa

# Root directory of the project
ROOT_DIR = os.path.abspath("../../") 
#ROOT_DIR = os.path.abspath("/content/drive/My Drive/Colab Notebooks/Mask_RCNN-master")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

%matplotlib inline 

# Directory to dataset
dataset_dir = os.path.abspath("/content/drive/My Drive/Colab Notebooks/Mask_RCNN-master/samples/footpath/footpath dataset 2.0")

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to pre-trained weights file (transfer learning)
DIODE_DEPTH_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_diode_depth.h5")
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

## Configurations

In [None]:
# Configurations
class FootpathConfig(Config):
    """Configuration for training on the footpath dataset.
    Derives from the base Config class and overrides values specific
    to the footpath dataset.
    """
    # Give the configuration a recognizable name
    NAME = "footpath"

    # Train on 1 GPU and 2 images per GPU.
    # Batch size is 2 (Batch size = GPU_COUNT * IMAGES_PER_GPU). 
    GPU_COUNT = 1
    IMAGES_PER_GPU = 2 # use large RAM mode 12GB for two images

    # Number of classes (including background)
    NUM_CLASSES = 1 + 3  # background + 1 class

    STEPS_PER_EPOCH = 300

    #VALIDATION_STEPS = 10
	
	  # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9

    IMAGE_CHANNEL_COUNT = 3

    # Image mean (RGB)
    DEPTH_MEAN_PIXEL = np.array([89.6, 149.9, 51.9])
    COLOR_MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
    
config = FootpathConfig()
config.display()

## Dataset


In [None]:
"""Color image frames dataset"""
# Dataset 
class RGB_FootpathDataset(utils.Dataset):
    def load_footpath(self, dataset_dir, subset):
        
        # Add classes. 
        self.add_class("footpath", 1, "footpath")
        self.add_class("footpath", 2, "highway")
        self.add_class("footpath", 3, "obstacle")

        name_dict = {}
        for i in range(1, len(self.class_info)):
          name_dict[self.class_info[i]["name"]] = self.class_info[i]["id"]
        name_dict_keys = []
        for key, value in name_dict.items():
          name_dict_keys.append(key)

        # Train or validation dataset?
        assert subset in ["train", "val", "test"]
        files_dir = os.path.join(dataset_dir, subset)

        # Note: In VIA 2.0, regions was changed from a dict to a list.
        # VGG Image Annotator (up to version 2.0.10) saves each image in the form:
        # { 'filename': 'footpath_13.png',
        #   'size': 463211, 
        #   'regions': [
        #          {
        #           'shape_attributes': {
        #               'name': 'polygon'
        #               'all_points_x': [...],
        #               'all_points_y': [...]},
        #           'region_attributes': {'footpath': 'footpath'}}
        #           ... more regions ...
        #          ],
        #   'file_attributes': {}
        # }
        annotations = json.load(open(os.path.join(files_dir, "via_region_data_"+subset+".json")))
        annotations = list(annotations.values())  # don't need the dict keys
        annotations = [a for a in annotations if a['regions']]

        # Select the annotations by classes 
        temp_a = []
        bool_flag = False
        for a in annotations:
          names = [r['region_attributes']['footpath'] for r in a['regions']]
          for key, value in name_dict.items():
            bool_flag = bool_flag or key in names
          if bool_flag:
            temp_a.append(a)
            bool_flag = False
        annotations = temp_a
        temp_a = []

        # Add images
        name_ids = []
        full_name_ids = []
        for a in annotations:
            names = [r['region_attributes']['footpath'] for r in a['regions']]
            for name in names:
              if name in name_dict_keys:
                name_ids.append(name_dict[name])
                full_name_ids.append(name_dict[name])
              else:
                full_name_ids.append(-1)
            # image_name = a['filename'].replace('color.png', 'depth_visualization.png')
            image_name = a['filename']
            image_path = os.path.join(files_dir, image_name)
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]
            self.add_image("footpath", image_id=image_name, path=image_path, width=width, height=height,
                           class_ids=name_ids, full_class_ids=full_name_ids)
            name_ids = []
            full_name_ids = []

    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """

        # If not a footpath dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "footpath":
            return super(self.__class__, self).load_mask(image_id)
        full_name_ids = image_info['full_class_ids']

        info = self.image_info[image_id]
        mask_file_id = info["id"].replace('color.png', 'mask.npy')
        depth_visualization_image_path = info["path"]
        parent_dir = os.path.dirname(depth_visualization_image_path)
        mask_file_path = os.path.join(parent_dir, mask_file_id)

        original_mask = np.load(mask_file_path)
        original_class_ids = np.array(full_name_ids, dtype=np.int32)

        id_list = []
        for item in self.class_info:
          id_list.append(item['id'])

        mask = np.zeros([info["height"], info["width"], len(full_name_ids)])
        class_ids_list = []
        count = 0
        for i in range(0, len(full_name_ids)):
          if full_name_ids[i] in id_list:
            class_ids_list.append(full_name_ids[i])
            mask[:, :, count] = original_mask[:, :, i]
            count = count + 1
        class_ids = np.array(class_ids_list, dtype=np.int32)
        mask = mask[:, :, 0:count]

        # Return mask, and array of class IDs of each instance.
        return mask, class_ids

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "footpath":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)

############################################################################################
"""Depth image frames dataset"""
# Dataset 
class Depth_FootpathDataset(utils.Dataset):

    def load_footpath(self, dataset_dir, subset):
        from tqdm import tqdm
        # Add classes. 
        self.add_class("footpath", 1, "footpath")
        self.add_class("footpath", 2, "highway")
        self.add_class("footpath", 3, "obstacle")

        name_dict = {}
        for i in range(1, len(self.class_info)):
          name_dict[self.class_info[i]["name"]] = self.class_info[i]["id"]
        name_dict_keys = []
        for key, value in name_dict.items():
          name_dict_keys.append(key)

        # Train or validation dataset?
        assert subset in ["train", "val", "test"]
        files_dir = os.path.join(dataset_dir, subset)

        # Note: In VIA 2.0, regions was changed from a dict to a list.
        # VGG Image Annotator (up to version 2.0.10) saves each image in the form:
        # { 'filename': 'footpath_13.png',
        #   'size': 463211, 
        #   'regions': [
        #          {
        #           'shape_attributes': {
        #               'name': 'polygon'
        #               'all_points_x': [...],
        #               'all_points_y': [...]},
        #           'region_attributes': {'footpath': 'footpath'}}
        #           ... more regions ...
        #          ],
        #   'file_attributes': {}
        # }
        annotations = json.load(open(os.path.join(files_dir, "via_region_data_"+subset+".json")))
        annotations = list(annotations.values())  # don't need the dict keys
        annotations = [a for a in annotations if a['regions']]

        # Select the annotations by classes 
        temp_a = []
        bool_flag = False
        for a in annotations:
          names = [r['region_attributes']['footpath'] for r in a['regions']]
          for key, value in name_dict.items():
            bool_flag = bool_flag or key in names
          if bool_flag:
            temp_a.append(a)
            bool_flag = False
        annotations = temp_a
        temp_a = []

        # Add images
        name_ids = []
        full_name_ids = []
        for a in annotations:
            names = [r['region_attributes']['footpath'] for r in a['regions']]
            for name in names:
              if name in name_dict_keys:
                name_ids.append(name_dict[name])
                full_name_ids.append(name_dict[name])
              else:
                full_name_ids.append(-1)
            image_name = a['filename'].replace('color.png', 'depth_visualization.png')
            image_path = os.path.join(files_dir, image_name)
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]
            self.add_image("footpath", image_id=image_name, path=image_path, width=width, height=height,
                           class_ids=name_ids, full_class_ids=full_name_ids)
            name_ids = []
            full_name_ids = []
        

    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """

        # If not a footpath dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "footpath":
            return super(self.__class__, self).load_mask(image_id)
        full_name_ids = image_info['full_class_ids']

        info = self.image_info[image_id]
        mask_file_id = info["id"].replace('depth_visualization.png', 'mask.npy')
        depth_visualization_image_path = info["path"]
        parent_dir = os.path.dirname(depth_visualization_image_path)
        mask_file_path = os.path.join(parent_dir, mask_file_id)

        original_mask = np.load(mask_file_path)
        original_class_ids = np.array(full_name_ids, dtype=np.int32)

        id_list = []
        for item in self.class_info:
          id_list.append(item['id'])

        mask = np.zeros([info["height"], info["width"], len(full_name_ids)])
        class_ids_list = []
        count = 0
        for i in range(0, len(full_name_ids)):
          if full_name_ids[i] in id_list:
            class_ids_list.append(full_name_ids[i])
            mask[:, :, count] = original_mask[:, :, i]
            count = count + 1
        class_ids = np.array(class_ids_list, dtype=np.int32)
        mask = mask[:, :, 0:count]

        # Return mask, and array of class IDs of each instance.
        return mask, class_ids

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "footpath":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)


In [None]:
"""Initialize color image frames dataset"""
# Training dataset
rgb_dataset_train = RGB_FootpathDataset()
rgb_dataset_train.load_footpath(dataset_dir, "train")
rgb_dataset_train.prepare()

# Validation dataset
rgb_dataset_val = RGB_FootpathDataset()
rgb_dataset_val.load_footpath(dataset_dir, "val")
rgb_dataset_val.prepare()

# Test dataset
rgb_dataset_test = RGB_FootpathDataset()
rgb_dataset_test.load_footpath(dataset_dir, "test")
rgb_dataset_test.prepare()

######################################################
"""Initialize depth image frames dataset"""
# Training dataset
depth_dataset_train = Depth_FootpathDataset()
depth_dataset_train.load_footpath(dataset_dir, "train")
depth_dataset_train.prepare()

# Validation dataset
depth_dataset_val = Depth_FootpathDataset()
depth_dataset_val.load_footpath(dataset_dir, "val")
depth_dataset_val.prepare()

# Test dataset
depth_dataset_test = Depth_FootpathDataset()
depth_dataset_test.load_footpath(dataset_dir, "test")
depth_dataset_test.prepare()


## Create Model

In [None]:
# Create Model
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)

In [None]:
# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "diode":
    model.load_weights(DIODE_DEPTH_MODEL_PATH, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
    
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)

## Training

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

In [None]:
# Data augmentation configuration
aug = iaa.Sometimes(9/10, iaa.SomeOf((1, 3), [iaa.Affine(translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}), 
                                              iaa.Affine(scale=(0.5, 1.5)), 
                                              iaa.Affine(rotate=(-10, 10)), 
                                              iaa.Fliplr(1)]))


In [None]:
# Training
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
print("Training all the branches")
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs=50, 
            layers='heads', 
            augmentation=aug)


In [None]:
# Fine tune all layers
# Passing layers="all" trains all layers. You can also 
# pass a regular expression to select which layers to
# train by name pattern.
print("Fine-tuning all layers")
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE / 10,
            epochs=10, 
            layers="all")

In [None]:
# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
# model_path = os.path.join(MODEL_DIR, "mask_rcnn_diode_depth_pretrain.h5")
# model.keras_model.save_weights(model_path)

In [None]:
# Tensorboard
%load_ext tensorboard
%tensorboard --logdir '/content/drive/My Drive/logs/footpath20200814T2038'

## Detection

In [None]:
# Detection
class InferenceConfig(FootpathConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", config=inference_config, model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [None]:
# Test on a random image
image_id = random.choice(dataset_test.image_ids)
#image_id = 10
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_test, inference_config, image_id, use_mini_mask=False)

log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)

# Display ground truth
visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, dataset_train.class_names)


In [None]:
# Prediction(detection) results
results = model.detect([original_image], verbose=1)

r = results[0]
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], dataset_test.class_names, r['scores'])


## Evaluation

In [None]:
#Evaluation
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids = np.random.choice(dataset_test.image_ids, 20)
APs = []
for image_id in image_ids:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_test, inference_config,
                               image_id, use_mini_mask=False)
    molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    # Compute AP
    AP, precisions, recalls, overlaps =\
        utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                         r["rois"], r["class_ids"], r["scores"], r['masks'])
    APs.append(AP)
    
print("mAP: ", np.mean(APs))