# Mask R-CNN - Train on Shapes Dataset


This notebook shows how to train Mask R-CNN on your own dataset. To keep things simple we use a synthetic dataset of shapes (squares, triangles, and circles) which enables fast training. You'd still need a GPU, though, because the network backbone is a Resnet101, which would be too slow to train on a CPU. On a GPU, you can start to get okay-ish results in a few minutes, and good results in less than an hour.

The code of the *Shapes* dataset is included below. It generates images on the fly, so it doesn't require downloading any data. And it can generate images of any size, so we pick a small image size to train faster. 

In [1]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import PIL.Image as Image
import matplotlib
import matplotlib.pyplot as plt
import yaml
# Root directory of the project
#ROOT_DIR = os.path.abspath("../../")
ROOT_DIR = os.getcwd()
# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

#%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

Using TensorFlow backend.


## Configurations

In [2]:
class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "1116"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 8

    # Number of classes (including background)
    NUM_CLASSES = 1 + 15  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 384
    IMAGE_MAX_DIM = 384

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5
    
config = ShapesConfig()
config.display()


Configurations:
BACKBONE                       resnet50
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     8
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 8
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  384
IMAGE_META_SIZE                28
IMAGE_MIN_DIM                  384
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [384 384   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE              

## Notebook Preferences

In [3]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

## Dataset

Create a synthetic dataset

Extend the Dataset class and add a method to load the shapes dataset, `load_shapes()`, and override the following methods:

* load_image()
* load_mask()
* image_reference()

In [4]:
class ShapesDataset(utils.Dataset):
    """Generates the shapes synthetic dataset. The dataset consists of simple
    shapes (triangles, squares, circles) placed randomly on a blank surface.
    The images are generated on the fly. No file access required.
    """

    #获取该图中有多少个实例
    def get_obj_index(self,image):
        n=np.max(image)
        
        return n
    
    #解析yaml文件
    def from_yaml_get_class(self,image_id):
        info=self.image_info[image_id]
        with open(info['yaml_path']) as f:
            temp=yaml.load(f.read())
            labels=temp['labelname']
            del labels[0]
        #print(labels)
        return labels
    
    
    def load_shapes(self, count, height, width,img_floder,mask_floder,imglist,dataset_root_path):
        """Generate the requested number of synthetic images.
        count: number of images to generate.
        height, width: the size of the generated images.
        """
        # Add classes
        
        self.add_class("1116", 1, "0/200/0")
        self.add_class("1116", 2, "150/250/0")
        self.add_class("1116", 3, "150/200/150")
        self.add_class("1116", 4, "200/0/150")
        self.add_class("1116", 5, "150/0/250")
        self.add_class("1116", 6, "150/150/250")
        self.add_class("1116", 7, "250/200/0")
        self.add_class("1116", 8, "200/200/0")
        self.add_class("1116", 9, "200/0/0")
        self.add_class("1116", 10, "250/0/150")
        self.add_class("1116", 11, "200/150/150")
        self.add_class("1116", 12, "250/150/150")
        self.add_class("1116", 13, "0/0/200")
        self.add_class("1116", 14, "0/150/200")
        self.add_class("1116", 15, "0/200/250")
        

        # Add images
        # Generate random specifications of images (i.e. color and
        # list of shapes sizes and locations). This is more compact than
        # actual images. Images are generated on the fly in load_image().
        for i in range(count):
            #bg_color, shapes = self.random_image(height, width)
            
            filestr=imglist[i].split('.')[0]
            #filestr=filestr.split('_')[1]
            mask_path=mask_floder+"/"+filestr+".tif"
            yaml_path=dataset_root_path+"yaml/"+filestr+".yaml"
            
            #修改
            self.add_image("1116", image_id=i, path=img_floder+"/"+imglist[i],
                           width=width, height=height,
                           mask_path=mask_path,yaml_path=yaml_path)

    

    
    #重写
    def load_mask(self, image_id):
        """Generate instance masks for shapes of the given image ID.
        """
        global iter_num
        
        info = self.image_info[image_id]
        print(info)
        #shapes = info['source']
       
        #count = len(shapes)
        
        # Map class names to class IDs.
        labels=[]
        labels=self.from_yaml_get_class(image_id)
        labels_form=[]
        
        for i in range(len(labels)):
            if labels[i].find("0/200/0") !=-1:
                #print "box"
                labels_form.append("0/200/0")
            elif labels[i].find("150/250/0") !=-1:
                #print "column"
                labels_form.append("150/250/0")
            elif labels[i].find("150/200/150") !=-1:
                #print "package"
                labels_form.append("150/200/150")
            elif labels[i].find("200/0/150") !=-1:
                #print "fruit"
                labels_form.append("200/0/150")
            elif labels[i].find("150/0/250") !=-1:
                #print "fruit"
                labels_form.append("150/0/250")
            elif labels[i].find("150/150/250") !=-1:
                #print "fruit"
                labels_form.append("150/150/250")
            elif labels[i].find("250/200/0") !=-1:
                #print "fruit"
                labels_form.append("250/200/0")
            elif labels[i].find("200/200/0") !=-1:
                #print "fruit"
                labels_form.append("200/200/0")
            elif labels[i].find("200/0/0")!=-1:
                #print "fruit"
                labels_form.append("200/0/0")
            elif labels[i].find("250/0/150") !=-1:
                #print "fruit"
                labels_form.append("250/0/150")
            elif labels[i].find("200/150/150") !=-1:
                #print "fruit"
                labels_form.append("200/150/150")
            elif labels[i].find("250/150/150")!=-1:
                #print "fruit"
                labels_form.append("250_150_150")
            elif labels[i].find("0_0_200")!=-1:
                #print "fruit"
                labels_form.append("0/0/200")
            elif labels[i].find("0/150/200") !=-1:
                #print "fruit"
                labels_form.append("0/150/200")
            elif labels[i].find("0/200/250") !=-1:
                #print "fruit"
                labels_form.append("0/200/250")
        
        count=len(labels_form)
        img=Image.open(info['mask_path'])
        num_obj=self.get_obj_index(img)
        mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
        mask=self.draw_mask(num_obj,mask,img,image_id)
        
        #for i, (shape, _, dims) in enumerate(info['1116']):
            #mask[:, :, i:i+1] = self.draw_shape(mask[:, :, i:i+1].copy(),shape, dims, 1)
        # Handle occlusions
        
        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
        for i in range(count-2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion
            occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
        
        class_ids = np.array([self.class_names.index(s) for s in labels_form])
        print(class_ids)
        #print(mask)
        return mask, class_ids.astype(np.int32)

    #draw_mask
    def draw_mask(self, num_obj,mask, image, image_id):
        info = self.image_info[image_id]
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] =1
        return mask

    
    

    

    

In [5]:
#基础设置
dataset_root_path="F:/DataSet/tes/train/"
img_floder=dataset_root_path+"image"
mask_floder=dataset_root_path+"label"
imglist=os.listdir(img_floder)
imglist.sort()
count=len(imglist)
width=384
height=384


# Training dataset
dataset_train = ShapesDataset()
dataset_train.load_shapes(count, 384, 384,img_floder,mask_floder,imglist,dataset_root_path)
dataset_train.prepare()

# Validation dataset
dataset_val = ShapesDataset()
dataset_val.load_shapes(2, 384, 384,img_floder,mask_floder,imglist,dataset_root_path)
dataset_val.prepare()

In [39]:
# Load and display random samples
#image_ids = np.random.choice(dataset_train.image_ids, 4)
#for image_id in image_ids:
#    image = dataset_train.load_image(image_id)
#    mask, class_ids = dataset_train.load_mask(image_id)
#    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

## Create Model

In [6]:
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

W0801 14:51:39.370477   824 deprecation_wrapper.py:119] From E:\Anaconda\envs\tfenv\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0801 14:51:39.420450   824 deprecation_wrapper.py:119] From E:\Anaconda\envs\tfenv\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0801 14:51:39.449455   824 deprecation_wrapper.py:119] From E:\Anaconda\envs\tfenv\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0801 14:51:39.481437   824 deprecation_wrapper.py:119] From E:\Anaconda\envs\tfenv\lib\site-packages\keras\backend\tensorflow_backend.py:1919: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

W0801 14:51:39.484434   824 deprecation_wrapper.py:119] From E:

In [7]:
# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last()[1], by_name=True)

## Training

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

In [None]:
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE, 
            epochs=1,
            layers='heads')


Starting at epoch 0. LR=0.001

Checkpoint Path: F:\python\Mask_RCNN-master\logs\111620190801T1451\mask_rcnn_1116_{epoch:04d}.h5
Selecting layers to train
fpn_c5p5               (Conv2D)
fpn_c4p4               (Conv2D)
fpn_c3p3               (Conv2D)
fpn_c2p2               (Conv2D)
fpn_p5                 (Conv2D)
fpn_p2                 (Conv2D)
fpn_p3                 (Conv2D)
fpn_p4                 (Conv2D)
In model:  rpn_model
    rpn_conv_shared        (Conv2D)
    rpn_class_raw          (Conv2D)
    rpn_bbox_pred          (Conv2D)
mrcnn_mask_conv1       (TimeDistributed)
mrcnn_mask_bn1         (TimeDistributed)
mrcnn_mask_conv2       (TimeDistributed)
mrcnn_mask_bn2         (TimeDistributed)
mrcnn_class_conv1      (TimeDistributed)
mrcnn_class_bn1        (TimeDistributed)
mrcnn_mask_conv3       (TimeDistributed)
mrcnn_mask_bn3         (TimeDistributed)
mrcnn_class_conv2      (TimeDistributed)
mrcnn_class_bn2        (TimeDistributed)
mrcnn_mask_conv4       (TimeDistributed)
mrcnn_mas

W0801 14:52:11.094628   824 deprecation_wrapper.py:119] From E:\Anaconda\envs\tfenv\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
W0801 14:52:18.876106   824 deprecation_wrapper.py:119] From E:\Anaconda\envs\tfenv\lib\site-packages\keras\callbacks.py:850: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

W0801 14:52:18.877103   824 deprecation_wrapper.py:119] From E:\Anaconda\envs\tfenv\lib\site-packages\keras\callbacks.py:853: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.



Epoch 1/1
{'id': 256, 'source': '1116', 'path': 'F:/DataSet/tes/train/image/1_329.tif', 'width': 384, 'height': 384, 'mask_path': 'F:/DataSet/tes/train/label/1_329.tif', 'yaml_path': 'F:/DataSet/tes/train/yaml/1_329.yaml'}


  app.launch_new_instance()


[2]
{'id': 133, 'source': '1116', 'path': 'F:/DataSet/tes/train/image/1_218.tif', 'width': 384, 'height': 384, 'mask_path': 'F:/DataSet/tes/train/label/1_218.tif', 'yaml_path': 'F:/DataSet/tes/train/yaml/1_218.yaml'}


  app.launch_new_instance()


[11]
{'id': 294, 'source': '1116', 'path': 'F:/DataSet/tes/train/image/1_56.tif', 'width': 384, 'height': 384, 'mask_path': 'F:/DataSet/tes/train/label/1_56.tif', 'yaml_path': 'F:/DataSet/tes/train/yaml/1_56.yaml'}


  app.launch_new_instance()


[ 2 11]
{'id': 114, 'source': '1116', 'path': 'F:/DataSet/tes/train/image/1_200.tif', 'width': 384, 'height': 384, 'mask_path': 'F:/DataSet/tes/train/label/1_200.tif', 'yaml_path': 'F:/DataSet/tes/train/yaml/1_200.yaml'}


  app.launch_new_instance()


[ 2 11]
{'id': 198, 'source': '1116', 'path': 'F:/DataSet/tes/train/image/1_277.tif', 'width': 384, 'height': 384, 'mask_path': 'F:/DataSet/tes/train/label/1_277.tif', 'yaml_path': 'F:/DataSet/tes/train/yaml/1_277.yaml'}


  app.launch_new_instance()


In [None]:
# Fine tune all layers
# Passing layers="all" trains all layers. You can also 
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE / 10,
            epochs=2, 
            layers="all")

In [None]:
# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
# model_path = os.path.join(MODEL_DIR, "mask_rcnn_shapes.h5")
# model.keras_model.save_weights(model_path)

## Detection

In [None]:
class InferenceConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [None]:
# Test on a random image
image_id = random.choice(dataset_val.image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_val, inference_config, 
                           image_id, use_mini_mask=False)

log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)

visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                            dataset_train.class_names, figsize=(8, 8))

In [None]:
results = model.detect([original_image], verbose=1)

r = results[0]
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_val.class_names, r['scores'], ax=get_ax())

## Evaluation

In [None]:
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids = np.random.choice(dataset_val.image_ids, 10)
APs = []
for image_id in image_ids:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_val, inference_config,
                               image_id, use_mini_mask=False)
    molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    # Compute AP
    AP, precisions, recalls, overlaps =\
        utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                         r["rois"], r["class_ids"], r["scores"], r['masks'])
    APs.append(AP)
    
print("mAP: ", np.mean(APs))