## Import Dependencies

In [3]:
# COCO related libraries
import coco

# MaskRCNN libraries
from mrcnn.config import Config
import mrcnn.utils as utils
from mrcnn import visualize
import mrcnn.model as modellib

# Misc
import os
import sys
import json
import numpy as np
import time
from PIL import Image, ImageDraw

Using TensorFlow backend.


## Constants

In [4]:
# Number of classes in dataset. Must be of type integer
NUM_CLASSES = 3

# Relative path to .h5 weights file
WEIGHTS_FILE = None

# Relative path to annotations JSON file
TRAIN_ANNOTATIONS_FILE = "/path/to/annotations/.json"

# Relative path to directory of images that pertain to annotations file
TRAIN_ANNOTATION_IMAGE_DIR = '/path/to/annotation/image/dir'

# Relative path to annotations JSON file
VALIDATION_ANNOTATIONS_FILE = "/path/to/annotations/.json"

# Relative path to directory of images that pertain to annotations file
VALIDATION_ANNOTATION_IMAGE_DIR = '/path/to/annotation/image/dir'

# Number of epochs to train dataset on
NUM_EPOCHS = 80

MODEL_NAME = "Model 1"

## Additional setup

In [5]:
# Set the ROOT_DIR variable to the root directory of the Mask_RCNN git repo
ROOT_DIR = os.getcwd()

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Select which GPU to use
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"]="0"; 

## Declare training configuration

In [None]:
class TrainConfig(coco.CocoConfig):
    """Configuration for training where MRCNN has two mask layers
    """
    # Give the configuration a recognizable name
    NAME = MODEL_NAME

    # Train on 1 image per GPU. Batch size is 1 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 2

    # Number of classes (including background)
    NUM_CLASSES = 1 + NUM_CLASSES

    # Min and max image dimensions
    IMAGE_MIN_DIM = 1152
    IMAGE_MAX_DIM = 1280

    # You can experiment with this number to see if it improves training
    STEPS_PER_EPOCH = 180

    # This is how often validation is run. If you are using too much hard drive space
    # on saved models (in the MODEL_DIR), try making this value larger.
    VALIDATION_STEPS = 50
    
    # Matterport originally used resnet101, but I downsized to fit it on my graphics card
    BACKBONE = 'resnet101'

    # To be honest, I haven't taken the time to figure out what these do
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
    
    # Changed to 512 because that's how many the original MaskRCNN paper used
    TRAIN_ROIS_PER_IMAGE = 200
    MAX_GT_INSTANCES = 114
    POST_NMS_ROIS_INFERENCE = 1000 
    POST_NMS_ROIS_TRAINING = 2000 
    
    DETECTION_MAX_INSTANCES = 114
    DETECTION_MIN_CONFIDENCE = 0.1

## Display configuration

In [None]:
TrainConfig().display()

## Create class to load dataset

In [9]:
class CocoLikeDataset(utils.Dataset):
    """ Generates a COCO-like dataset, i.e. an image dataset annotated in the style of the COCO dataset.
        See http://cocodataset.org/#home for more information.
    """
    def load_data(self, annotation_json, images_dir):
        """ Load the coco-like dataset from json
        Args:
            annotation_json: The path to the coco annotations json file
            images_dir: The directory holding the images referred to by the json file
        """
        # Load json from file
        json_file = open(annotation_json)
        coco_json = json.load(json_file)
        json_file.close()
        
        # Add the class names using the base method from utils.Dataset
        source_name = "coco_like"
        for category in coco_json['categories']:
            class_id = category['id']
            class_name = category['name']
            if class_id < 1:
                print('Error: Class id for "{}" cannot be less than one. (0 is reserved for the background)'.format(class_name))
                return
            
            self.add_class(source_name, class_id, class_name)
        
        # Get all annotations
        annotations = {}
        for annotation in coco_json['annotations']:
            image_id = annotation['image_id']
            if image_id not in annotations:
                annotations[image_id] = []
            annotations[image_id].append(annotation)
        
        # Get all images and add them to the dataset
        seen_images = {}
        for image in coco_json['images']:
            image_id = image['id']
            if image_id in seen_images:
                print("Warning: Skipping duplicate image id: {}".format(image))
            else:
                seen_images[image_id] = image
                try:
                    image_file_name = image['file_name']
                    image_width = image['width']
                    image_height = image['height']
                except KeyError as key:
                    print("Warning: Skipping image (id: {}) with missing key: {}".format(image_id, key))
                
                image_path = os.path.abspath(os.path.join(images_dir, image_file_name))
                image_annotations = annotations[image_id]
                
                # Add the image using the base method from utils.Dataset
                self.add_image(
                    source=source_name,
                    image_id=image_id,
                    path=image_path,
                    width=image_width,
                    height=image_height,
                    annotations=image_annotations
                )
                
    def load_mask(self, image_id):
        """ Load instance masks for the given image.
        MaskRCNN expects masks in the form of a bitmap [height, width, instances].
        Args:
            image_id: The id of the image to load masks for
        Returns:
            masks: A bool array of shape [height, width, instance count] with
                one mask per instance.
            class_ids: a 1D array of class IDs of the instance masks.
        """
        image_info = self.image_info[image_id]
        annotations = image_info['annotations']
        instance_masks = []
        class_ids = []
        
        for annotation in annotations:
            class_id = annotation['category_id']
            mask = Image.new('1', (image_info['width'], image_info['height']))
            mask_draw = ImageDraw.ImageDraw(mask, '1')
            for segmentation in annotation['segmentation']:
                mask_draw.polygon(segmentation, fill=1)
                bool_array = np.array(mask) > 0
                instance_masks.append(bool_array)
                class_ids.append(class_id)

        mask = np.dstack(instance_masks)
        class_ids = np.array(class_ids, dtype=np.int32)
        
        return mask, class_ids

## Load train and validation datasets

In [None]:
dataset_train = CocoLikeDataset()
dataset_train.load_data(TRAIN_ANNOTATIONS_FILE, TRAIN_ANNOTATION_IMAGE_DIR)
dataset_train.prepare()

dataset_val = CocoLikeDataset()
dataset_val.load_data(VALIDATION_ANNOTATIONS_FILE, VALIDATION_ANNOTATION_IMAGE_DIR)
dataset_val.prepare()

## Build MaskRCNN Model

In [None]:
# Create model in training mode
model = modellib.MaskRCNN(mode = "training", config = config, model_dir = MODEL_DIR)

## Load weights into model if weights file is not None
### This is meant to be used if you are refining on a set of preexisting weights

In [None]:
if WEIGHTS_FILE is not None:
    model.load_weights(WEIGHTS_FILE, by_name = True)

## Train model
### The model after each epoch will be saved in the logs folder

In [None]:
start_train = time.time()
model.train(dataset_train, dataset_val, learning_rate = config.LEARNING_RATE, epochs = NUM_EPOCHS, layers = 'all')
end_train = time.time()
minutes = round((end_train - start_train) / 60, 2)
print(f'Training took {minutes} minutes')

## Write description about model

In [None]:
description = "Model trained with 3 mask layers. The first layer has resolution of 128, then 256, and then 512. Model trained with 80 epochs and 180 steps per epoch"
with open(MODEL_DIR + "/" + MODEL_NAME + "/description.txt") as outfile:
    outfile.write(description)