# Mask R-CNN - Train on Shapes Dataset


This notebook shows how to train Mask R-CNN on your own dataset. To keep things simple we use a synthetic dataset of shapes (squares, triangles, and circles) which enables fast training. You'd still need a GPU, though, because the network backbone is a Resnet101, which would be too slow to train on a CPU. On a GPU, you can start to get okay-ish results in a few minutes, and good results in less than an hour.

The code of the *Shapes* dataset is included below. It generates images on the fly, so it doesn't require downloading any data. And it can generate images of any size, so we pick a small image size to train faster. 

In [1]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


## Configurations

In [2]:
class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_RESIZE_MODE='crop'
    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 128
    
    IMAGE_CHANNEL_COUNT= 1
    MEAN_PIXEL= np.array([123.7])
    
    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5
    
config = ShapesConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            1
IMAGE_MAX_DIM                  128
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  128
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              crop
IMAGE_SHAPE                    [128 128   1]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE               

## Notebook Preferences

In [3]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

## Dataset

Create a synthetic dataset

Extend the Dataset class and add a method to load the shapes dataset, `load_shapes()`, and override the following methods:

* load_image()
* load_mask()
* image_reference()

In [None]:
import os
import sys
import json
import datetime
import numpy as np
import skimage.draw
import cv2
import glob

from mrcnn.config import Config
from mrcnn import model as modellib, utils

# Directory to save logs and model checkpoints, if not provided
# through the command line argument --logs
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")

MODEL_DIR = os.path.join(ROOT_DIR, "logs")

#DATASET_DIR= os.path.dirname('C:/Users/Pray/Desktop/dataset1/')
DATASET_DIR= os.path.abspath('C:/Programming/Bubbles/dataset_manga')
print("Dataset directory:", DATASET_DIR)

class ShapesDataset(utils.Dataset):
    
    def load_shapes(self, subset):
        self.subset= subset
        self.add_class("shapes", 1, "square")
            
        filelist= os.listdir(os.path.join(DATASET_DIR, subset, 'images'))
        for file in filelist:
            if ".png" not in file and ".jpg" not in file:
                continue
            image_path = os.path.join(DATASET_DIR, subset, 'images', file)
            try:
                image = skimage.io.imread(image_path)
            except Exception as e:
                print(image_path)
                print(e)
            height, width = image.shape[:2]
            
            #print(f'file {file}')
            self.add_image(
                "shapes",
                image_id=file,
                name=file,
                path= os.path.join(DATASET_DIR, subset, 'images', file),
                width=width, height=height)

    def load_mask(self, image_id):
        image_info = self.image_info[image_id]
        
        fdir= DATASET_DIR + "/" + self.subset + "/masks/" + image_info["name"][:-4]
        if not (os.path.exists(fdir)):
            print(fdir)
            assert(os.path.exists(fdir))
        
        
        ids=[]
        masks= []
        for file in glob.glob(fdir + "/*.png"):
            if len(masks)!=0:
                img = cv2.imread(file, 0).astype(np.bool)
                #print(file, masks.shape, img.shape)
                masks= np.dstack((masks, img))
                ids.append(1)
            else:
                masks= cv2.imread(file, 0).astype(np.bool)
                ids.append(1)
        
              
        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        if len(masks.shape) < 3:
            masks= np.expand_dims(masks, axis=2)
        return masks, np.ones(len(ids), dtype=np.int32)
    
    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "bubble":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)
            
    def load_image(self, image_id):
        """Load the specified image and return a [H,W,3] Numpy array.
        """
        # Load image
        image = skimage.io.imread(self.image_info[image_id]['path'])
        # If grayscale. Convert to RGB for consistency.
        if image.ndim != 1:
            image = skimage.color.rgb2gray(image)
        image = image[..., np.newaxis]
        #print(image.shape, " ", self.image_info[image_id]['path'])
        return image

In [1]:
dataset_train = ShapesDataset()
dataset_train.load_shapes("train")
dataset_train.prepare()
print(len(dataset_train.image_ids), 'training images')

dataset_val = ShapesDataset()
dataset_val.load_shapes("val")
dataset_val.prepare()
print(len(dataset_val.image_ids), 'validation images')

NameError: name 'ShapesDataset' is not defined

In [2]:
# Load and display random samples
image_ids = np.random.choice(dataset_train.image_ids, 2)
for image_id in image_ids:
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names, limit=1)

NameError: name 'np' is not defined

## Create Model

In [6]:
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
box_ind is deprecated, use box_indices instead


In [7]:
#model.load_weights(model.find_last(), by_name=True)

## Training

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

In [8]:
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs=1, 
            layers='all')


Starting at epoch 0. LR=0.001

Checkpoint Path: C:\Programming\Bubbles\Mask_RCNN\logs\shapes20190907T1832\mask_rcnn_shapes_{epoch:04d}.h5
Selecting layers to train
conv1                  (Conv2D)
bn_conv1               (BatchNorm)
res2a_branch2a         (Conv2D)
bn2a_branch2a          (BatchNorm)
res2a_branch2b         (Conv2D)
bn2a_branch2b          (BatchNorm)
res2a_branch2c         (Conv2D)
res2a_branch1          (Conv2D)
bn2a_branch2c          (BatchNorm)
bn2a_branch1           (BatchNorm)
res2b_branch2a         (Conv2D)
bn2b_branch2a          (BatchNorm)
res2b_branch2b         (Conv2D)
bn2b_branch2b          (BatchNorm)
res2b_branch2c         (Conv2D)
bn2b_branch2c          (BatchNorm)
res2c_branch2a         (Conv2D)
bn2c_branch2a          (BatchNorm)
res2c_branch2b         (Conv2D)
bn2c_branch2b          (BatchNorm)
res2c_branch2c         (Conv2D)
bn2c_branch2c          (BatchNorm)
res3a_branch2a         (Conv2D)
bn3a_branch2a          (BatchNorm)
res3a_branch2b         (Conv2D)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "




Epoch 1/1
(1600, 1088, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_3_01_0177.jpg
(1600, 1104, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0052.jpg
(1600, 1103, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0030.jpg
  1/100 [..............................] - ETA: 41:41 - loss: 46.5800 - rpn_class_loss: 25.4504 - rpn_bbox_loss: 3.4218 - mrcnn_class_loss: 6.5100 - mrcnn_bbox_loss: 10.3371 - mrcnn_mask_loss: 0.8607(1600, 1109, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_4_02_0024.jpg
  2/100 [..............................] - ETA: 21:30 - loss: 44.1062 - rpn_class_loss: 23.7971 - rpn_bbox_loss: 4.2840 - mrcnn_class_loss: 5.4109 - mrcnn_bbox_loss: 9.6557 - mrcnn_mask_loss: 0.9585 (1600, 1099, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_3_01_0162.jpg
(1600, 1115, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_4_02_0020.jpg
(1600, 1097, 1)   C:\Programming\Bubbles\dataset_manga\train\i

(1600, 1102, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_3_01_0172.jpg
(1600, 1115, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_5_02_0043.jpg
(1600, 1111, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_5_02_0051.jpg
 10/100 [==>...........................] - ETA: 6:25 - loss: 21.6123 - rpn_class_loss: 9.8247 - rpn_bbox_loss: 4.9129 - mrcnn_class_loss: 1.7304 - mrcnn_bbox_loss: 4.3843 - mrcnn_mask_loss: 0.7600 (1600, 1109, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_4_02_0013.jpg
(1600, 1103, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_3_01_0182.jpg
(1600, 1103, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_3_01_0158.jpg
(1600, 1111, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_4_02_0033.jpg
(1600, 1103, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0071.jpg
(1600, 1109, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_4_02_0023.jpg
(1600, 1110

 23/100 [=====>........................] - ETA: 3:26 - loss: 15.2423 - rpn_class_loss: 4.5435 - rpn_bbox_loss: 5.4085 - mrcnn_class_loss: 1.0576 - mrcnn_bbox_loss: 3.5909 - mrcnn_mask_loss: 0.6419(1600, 1106, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0092.jpg
(1600, 1103, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0016.jpg
(1600, 1114, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_4_02_0035.jpg
(1600, 1115, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_4_02_0034.jpg
(1600, 1113, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_5_02_0045.jpg
(1600, 1103, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0080.jpg
(1600, 1102, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0031.jpg
(1600, 1102, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0038.jpg
(1600, 1105, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0086.jpg
(1600, 1114,

(1600, 1111, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_5_02_0049.jpg
(1600, 1108, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0094.jpg
(1600, 1102, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0029.jpg
(1600, 1105, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0024.jpg
(1600, 1105, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0060.jpg
(1600, 1096, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_3_01_0175.jpg
(1600, 1088, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_3_01_0177.jpg
(1600, 1102, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0022.jpg
(1600, 1104, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_1_01_0048.jpg
(1600, 1114, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_5_02_0064.jpg
(1600, 1112, 1)   C:\Programming\Bubbles\dataset_manga\train\images\asuka_5_02_0061.jpg
(1600, 1098, 1)   C:\Programming

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "c:\programming\bubbles\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-5add68873891>", line 8, in <module>
    layers='all')
  File "C:\Programming\Bubbles\Mask_RCNN\mrcnn\model.py", line 2374, in train
    use_multiprocessing=True,
  File "c:\programming\bubbles\venv\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "c:\programming\bubbles\venv\lib\site-packages\keras\engine\training.py", line 1658, in fit_generator
    initial_epoch=initial_epoch)
  File "c:\programming\bubbles\venv\lib\site-packages\keras\engine\training_generator.py", line 181, in fit_generator
    generator_output = next(output_generator)
  File "C:\Programming\Bubbles\Mask_RCNN\mrcnn\model.py", line 1709, in data_generator
    use_mini_mask=config.USE_MINI_MASK)
  File "C:\Programming\Bubbles\Mask_RCNN\mr

KeyboardInterrupt: 

In [None]:
# Fine tune all layers
# Passing layers="all" trains all layers. You can also 
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE / 10,
            epochs=2, 
            layers="all")

In [None]:
# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
# model_path = os.path.join(MODEL_DIR, "mask_rcnn_shapes.h5")
# model.keras_model.save_weights(model_path)

## Detection

In [None]:
class InferenceConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [None]:
# Test on a random image
image_id = random.choice(dataset_val.image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_val, inference_config, 
                           image_id, use_mini_mask=False)

log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)

visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                            dataset_train.class_names, figsize=(8, 8))

In [None]:
results = model.detect([original_image], verbose=1)

r = results[0]
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_val.class_names, r['scores'], ax=get_ax())

## Evaluation

In [None]:
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids = np.random.choice(dataset_val.image_ids, 10)
APs = []
for image_id in image_ids:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_val, inference_config,
                               image_id, use_mini_mask=False)
    molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    # Compute AP
    AP, precisions, recalls, overlaps =\
        utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                         r["rois"], r["class_ids"], r["scores"], r['masks'])
    APs.append(AP)
    
print("mAP: ", np.mean(APs))