

---
# Document Mask R-CNN training script


## Initializations
setting up the specific versions needed from library

In [None]:
print("DOCUMENT RECOGNITION Training")
%tensorflow_version 1.x
!pip install keras==2.0.8

clone repo library and install it, after that we work inside the library directory

In [None]:
# setup the library and it's dependencies
!git clone https://github.com/matterport/Mask_RCNN.git
%cd Mask_RCNN
!python setup.py install
!pip install -r requirements.txt

mounting my google drive root folder where the dataset is located

In [None]:
from google.colab import drive 
drive.mount('/content/drive')

downlaod the mscoco pretrained mask

In [None]:
!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5

## CODE

initialize script with imports and other significant variables and classes definitions

In [4]:
import os
import sys
import json
import datetime
import numpy as np
import skimage.draw
import cv2
from mrcnn.visualize import display_instances
import matplotlib.pyplot as plt

# Root directory of the project
ROOT_DIR = os.path.abspath("./")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import model as modellib, utils

# Path to trained weights file
COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")

Using TensorFlow backend.


In [5]:
############################################################
#  Configurations
############################################################


class CustomConfig(Config):
    """Configuration for training on the dataset, provided from google drive.
    Derives from the base Config class and overrides some values.
    """
    # Give the configuration a recognizable name
    NAME = "document"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 2

    # Number of classes (including background)
    NUM_CLASSES = 1 + 2  # Background + documenti
    
    # Number of training steps per epoch
    STEPS_PER_EPOCH = 150

    VALIDATION_STEPS = 25

    LEARNING_RATE=0.006

    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9

CustomDataset, inherit the Dataset library class to override the significant load mask method, necesssary to load the information about the training image's data. the mask is the region of the image that represent the objects that the net should recognize

In [17]:
############################################################
#  Dataset
############################################################

class CustomDataset(utils.Dataset):      

    def load_custom(self, dataset_dir, is_train=True): # aggiungere modularita per diversi documenti
        """
          Load images from dataset directory, the images are stored in the structure own of the mask,
          need to write good via_regions.json for every folder in the dataset

          Parameters:
          dataset_dir: path of the root folder of the dataset
          is_train: boolean that represent the mode of the dataset
        """

        if is_train:
          dataset_dir = os.path.join(dataset_dir, "training/")
        else:
          dataset_dir = os.path.join(dataset_dir, "validating/")

        # Add classes.
        self.add_class("dataset", 1, "tesserino")
        self.add_class("dataset", 2, "patente")

        # Load annotations
        # VGG Image Annotator saves each image in the form:
        # { 'filename': '28503151_5b5b7ec140_b.jpg',
        #   'regions': {
        #       '0': {
        #           'region_attributes': {},
        #           'shape_attributes': {
        #               'all_points_x': [...],
        #               'all_points_y': [...],
        #               'name': 'polygon'}},
        #       ... more regions ...
        #   },
        #   'size': 100202
        # }
        # We mostly care about the x and y coordinates of each region

        # dir_list = [dir_ for dir_ in os.listdir(dataset_dir)] #if os.path.isdir(dir_)]

        dir_list = [dir_ for dir_ in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, dir_))]

        for dir_ in os.listdir(dataset_dir):
          print("{0}: {1}".format(dir_, os.path.isdir(os.path.join(dataset_dir, dir_))))

        for dir_ in dir_list:
          iter_dir = os.path.join(dataset_dir, dir_ + "/")

          print("iterating: {0} ...".format(iter_dir))

          annotations1 = json.load(open(os.path.join(iter_dir, "via_regions.json")))
          annotations = list(annotations1.values())  # don't need the dict keys

          # The VIA tool saves images in the JSON even if they don't have any
          # annotations. Skip unannotated images.
          annotations = [a for a in annotations if a['regions']]


          # Add images
          for a in annotations:
              # print(a)
              # Get the x, y coordinaets of points of the polygons that make up
              # the outline of each object instance. There are stores in the
              # shape_attributes (see json format above)
              polygons = [(r['region_attributes'], r['shape_attributes']) for r in a['regions']]

              # load_mask() needs the image size to convert polygons to masks.
              # Unfortunately, VIA doesn't include it in JSON, so we must read
              # the image. This is only managable since the dataset is tiny.
              image_path = os.path.join(iter_dir, a['filename'])
              image = skimage.io.imread(image_path)
              height, width = image.shape[:2]

              self.add_image(
                  "dataset",
                  image_id=a['filename'],  # use file name as a unique image id
                  path=image_path,
                  width=width, height=height,
                  polygons=polygons)
                  
          

    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a bottle dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "dataset":
            return super(self.__class__, self).load_mask(image_id)

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        

        class_ids = list()
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p[1]['all_points_y'], p[1]['all_points_x'])
            mask[rr, cc, i] = 1
            class_ids.append(self.class_names.index(p[0]['name']))

        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask.astype(np.bool), np.asarray(class_ids, dtype='int32')

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        print("reference...")
        if info["source"] == "dataset":
            print(info["path"])
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)


In [7]:
# Utilities

classes_ = None

def train(model):
    """ Train the model. """
    
    global classes_
    
    # Training dataset.
    dataset_train = CustomDataset()
    dataset_train.load_custom("/content/drive/My Drive/Ing_sw_testing/document_dataset/", is_train = True)
    dataset_train.prepare()

    classes_ = dataset_train.class_names

    # Validation dataset
    dataset_val = CustomDataset()
    dataset_val.load_custom("/content/drive/My Drive/Ing_sw_testing/document_dataset/", is_train = False) # cambiare directory
    dataset_val.prepare()

    # *** This training schedule is an example. Update to your needs ***
    # Since we're using a very small dataset, and starting from
    # COCO trained weights, we don't need to train too long. Also,
    # no need to train all layers, just the heads should do it.
    print("Training network heads")
    model.train(dataset_train, dataset_val,
                learning_rate=config.LEARNING_RATE,
                epochs=10,
                layers='heads')


initialize the data defined above to train our model

In [None]:
config = CustomConfig()
config.display()

In [None]:
# TRAINING
model = modellib.MaskRCNN(mode="training", config=config,
                                  model_dir="./")
weights_path = COCO_WEIGHTS_PATH
model.load_weights(weights_path, by_name=True, exclude=[
            "mrcnn_class_logits", "mrcnn_bbox_fc",
            "mrcnn_bbox", "mrcnn_mask"])

train(model)


save the weights trained referred to my dataset

In [None]:
import time
model_path = 'mask_rcnn_'  + '.' + str(time.time()) + '.h5'
model.keras_model.save_weights(model_path)
print(model_path)


download the weights calculated

In [None]:
from google.colab import files
files.download(model_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

define new ineheritanced config class, used only for detections

In [None]:
class InferenceConfig(CustomConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    DETECTION_MIN_CONFIDENCE = 0.9

In [None]:
config = InferenceConfig()
config.display()

create the model in inference mode for doing tests


In [None]:
model = modellib.MaskRCNN(mode="inference", config=config,  model_dir="./")

model.load_weights(model_path, by_name=True)

run detection on a testing never seen image

In [None]:
# Run object detection
image = skimage.io.imread("/content/drive/My Drive/Ing_sw_testing/document_dataset/testing/test1.jpg")
results = model.detect([image], verbose=1)

# Display results
r = results[0]

print(classes_)

display_instances(image, r['rois'], r['masks'], r['class_ids'], 
                            classes_, r['scores'], 
                            title="Predictions")

print(r['rois'])
