# Mask R-CNN Demo

A quick intro to using the pre-trained model to detect and segment objects.

In [1]:
import os
import sys
import scipy
import random
import math
import numpy as np
import skimage.io
import matplotlib
import datetime
import cv2
import time
%matplotlib inline

ROOT_DIR = os.path.abspath("../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize

from samples.blister import blister_mul_class

# Import COCO config
import coco

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
BLISTER_MODEL_PATH = "../mask_rcnn_blister_real.h5"

# import ROS
import rospy
import geometry_msgs.msg
import roslib
import actionlib
from std_msgs.msg import String
from sensor_msgs.msg import Image
from thin_obj_bin_picking.msg import blister_pose
from cv_bridge import CvBridge, CvBridgeError

Using TensorFlow backend.


## Configurations

We'll be using a model trained on the MS-COCO dataset. The configurations of this model are in the ```CocoConfig``` class in ```coco.py```.

For inferencing, modify the configurations a bit to fit the task. To do so, sub-class the ```CocoConfig``` class and override the attributes you need to change.

In [2]:
class InferenceConfig(blister_mul_class.BlisterConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.9
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                15
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTA

## Create Model and Load Trained Weights

In [3]:
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights(BLISTER_MODEL_PATH, by_name=True)

## Run Object Detection

In [4]:
# Rank the mask areas, output the maximum area
def max_area(area):
    area_max = 0
    index = 0
    for i, a in enumerate(area):
        if(a > area_max):
            area_max = a
            index = i
    return area_max, index

In [5]:
# Select the maximum area mask, output the mask and its corners
def mask_rank(img, mask):
    mask_int = mask.astype(np.uint8)
    area = np.zeros([mask_int.shape[2]], dtype=np.int)
    boxx = np.zeros([4, 2, mask_int.shape[2]], dtype = np.float)
    img_copy = img.copy()
    for i in range(mask_int.shape[2]):        
        mask_copy = mask_int[:,:,i].copy()
        plt.imshow(mask_copy)
        plt.show()
        area[i] = mask_copy.sum()
        print(area[i])
    
    mmax, index = max_area(area)
    im2,contours,hierarchy = cv2.findContours(mask_int[:,:,index].copy(), 1, 2)
    cnt = contours[0]     
    for j in range(len(contours)):
        if(len(contours[j]) > len(cnt)):
            cnt = contours[j]
    hull = cv2.convexHull(cnt,returnPoints = True)
    rect = cv2.minAreaRect(hull)
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    cv2.drawContours(img_copy,[box],0,(0,0,255),2)
    plt.imshow(img_copy)
    plt.show()
    print(box)
    return mask_int[:,:,index].copy(), box

In [6]:
# Compute position of mask center and rotation of mask
def get_mask_pose(max_boxx):
    position = (max_boxx[0] + max_boxx[2]) / 2
    # Counterclockwise is positive direction
    if(np.linalg.norm(max_boxx[0]-max_boxx[1]) > np.linalg.norm(max_boxx[1]-max_boxx[2])):
        rotation = math.atan2((max_boxx[2]-max_boxx[1])[1], (max_boxx[2]-max_boxx[1])[0])
    else:
        rotation = math.atan2((max_boxx[1]-max_boxx[0])[1], (max_boxx[1]-max_boxx[0])[0])
    pose={
        'x':position[0],
        'y':position[1],
        'rz':rotation
    }
    return pose

In [7]:
is_detect = 0
img_index = 0
def img_index_callback(data):
    global img_index
    global is_detect
    
    img_index = data.data
    is_detect = 1


In [None]:
rospy.init_node('img_segmentation')
rospy.Subscriber('/img_index', String, img_index_callback)

Unable to register with master node [http://localhost:11311]: master may not be running yet. Will keep trying.


In [None]:
# from camera calibration
cx = 320.37
cy = 236.51
fx = 508.6722
fy = 511.8043

while(True):
    
    if(is_detect == 1):
        pose_pub = rospy.Publisher('/blister_pose', blister_pose, queue_size=10)
        image = scipy.misc.imread('/home/zhekai/catkin_ws/src/thin_obj_bin_picking/image/'+ img_index +'.jpeg')

        # Run detection
        results = model.detect([image], verbose=1)
        
        # Visualize results
        r = results[0]
        visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 
                            ['BG', 'head', 'tail'], r['scores'])
        
        img = cv2.imread('/home/zhekai/catkin_ws/src/thin_obj_bin_picking/image/'+ img_index +'.jpeg',0)
        mask_max, box_max = mask_rank(img, r['masks'])

        plt.imshow(mask_max)
        plt.show()
        
        pose = get_mask_pose(box_max)
        print(pose['x'], pose['y'])
        print(pose['rz']*180/math.pi)
        
        blister_pose_msg = blister_pose()
        blister_pose_msg.x = (pose['x'] - cx) * (102 / 226.1) * 0.001 # transfer pixel position to camera frame coordinates
        blister_pose_msg.y = (pose['y'] - cy) * (102 / 226.1) * 0.001 
        blister_pose_msg.angle = pose['rz']*180/math.pi
        pose_pub.publish(blister_pose_msg)
        print(blister_pose_msg.x, " ", blister_pose_msg.y)
        
        is_detect = 0