In [1]:
import os
import sys
import itertools
import math
import logging
import json
import re
import random
import cv2
from collections import OrderedDict
import numpy as np
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
from matplotlib.patches import Polygon

# Root directory of the project
ROOT_DIR = os.path.abspath("./Mask_RCNN/")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

Using TensorFlow backend.


In [2]:
import glob

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [3]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

In [4]:
class SupercategoryConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "supercategory"
    
    BACKBONE = "resnet50"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 20  # background + 20

    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 64

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5
    
config = SupercategoryConfig()
config.display()


Configurations:
BACKBONE                       resnet50
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  512
IMAGE_META_SIZE                33
IMAGE_MIN_DIM                  512
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [512 512   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE              

In [5]:
class InferenceConfig(SupercategoryConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)








Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
box_ind is deprecated, use box_indices instead


Instructions for updating:
Use `tf.cast` instead.
Loading weights from  D:\Program\98semester\VRDL\HW4\Mask_RCNN\logs\supercategory20191218T1406\mask_rcnn_supercategory_0020.h5






Re-starting from epoch 20


In [6]:
cocoGt = COCO("test.json")

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [7]:
from itertools import groupby
from pycocotools import mask as maskutil

def binary_mask_to_rle(binary_mask):
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    counts = rle.get('counts')
    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
        if i == 0 and value == 1:
            counts.append(0)
        counts.append(len(list(elements)))
    compressed_rle = maskutil.frPyObjects(rle, rle.get('size')[0], rle.get('size')[1])
    compressed_rle['counts'] = str(compressed_rle['counts'], encoding='utf-8')
    return compressed_rle

In [8]:
cocoGt.cats

{1: {'supercategory': 'aeroplane', 'name': 'aeroplane', 'id': 1},
 2: {'supercategory': 'bicycle', 'name': 'bicycle', 'id': 2},
 3: {'supercategory': 'bird', 'name': 'bird', 'id': 3},
 4: {'supercategory': 'boat', 'name': 'boat', 'id': 4},
 5: {'supercategory': 'bottle', 'name': 'bottle', 'id': 5},
 6: {'supercategory': 'bus', 'name': 'bus', 'id': 6},
 7: {'supercategory': 'car', 'name': 'car', 'id': 7},
 8: {'supercategory': 'cat', 'name': 'cat', 'id': 8},
 9: {'supercategory': 'chair', 'name': 'chair', 'id': 9},
 10: {'supercategory': 'cow', 'name': 'cow', 'id': 10},
 11: {'supercategory': 'diningtable', 'name': 'diningtable', 'id': 11},
 12: {'supercategory': 'dog', 'name': 'dog', 'id': 12},
 13: {'supercategory': 'horse', 'name': 'horse', 'id': 13},
 14: {'supercategory': 'motorbike', 'name': 'motorbike', 'id': 14},
 15: {'supercategory': 'person', 'name': 'person', 'id': 15},
 16: {'supercategory': 'pottedplant', 'name': 'pottedplant', 'id': 16},
 17: {'supercategory': 'sheep', 'n

In [9]:
img_keys = list(cocoGt.imgs.keys())
print(img_keys)

[914, 1005, 1417, 940, 1310, 413, 967, 673, 1214, 305, 978, 395, 180, 125, 447, 1181, 969, 573, 950, 1278, 804, 17, 633, 176, 1264, 693, 34, 874, 846, 92, 1443, 468, 536, 270, 411, 332, 378, 1309, 94, 1405, 792, 1171, 101, 886, 223, 733, 35, 1400, 682, 1249, 834, 707, 1399, 593, 89, 160, 1114, 561, 714, 771, 368, 1220, 1324, 58, 1159, 1273, 524, 458, 219, 815, 288, 1070, 482, 867, 1048, 1412, 728, 748, 283, 947, 1206, 1315, 1112, 91, 152, 1042, 401, 769, 698, 539, 983, 1175, 1425, 1378, 904, 875, 691, 1243, 629, 1189]


In [10]:
imgIds = img_keys[0] # Use the key above to retrieve information of the image
img_info = cocoGt.loadImgs(ids=imgIds)
print(img_info)

[{'file_name': '2009_004886.jpg', 'id': 914, 'height': 333, 'width': 500}]


In [11]:
TEST_PATH = "test_images/"
test_img = cv2.imread(TEST_PATH + img_info[0]['file_name'])[:,:,::-1]
plt.imshow(test_img)

<matplotlib.image.AxesImage at 0x19aab22ef60>

In [12]:
class_ids = sorted(cocoGt.getCatIds())
class_names = []
# Add classes
class_names.append('BG')
for i in class_ids:
    class_names.append(cocoGt.loadCats(i)[0]["name"])
print(class_names)

['BG', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']


In [13]:
results = model.detect([test_img], verbose=1)

r = results[0]
print(r)
masks = r['masks']

Processing 1 images
image                    shape: (333, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  150.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
{'rois': array([[ 46, 211, 281, 324],
       [ 77, 141, 221, 282],
       [139, 432, 306, 483],
       [139, 223, 240, 273],
       [ 89, 284, 195, 320],
       [  0,  32, 205, 373]]), 'class_ids': array([ 3,  3, 16,  3,  3,  3]), 'scores': array([0.9998529 , 0.9921715 , 0.9230392 , 0.8902262 , 0.78168714,
       0.71022284], dtype=float32), 'masks': array([[[False, False, False, False, False, False],
        [False, False, False, False, False, False],
        [False, False, False, False, False, False],
        ...,
        [False, False, False, False, False, False],
        [False, False, F

In [14]:
plt.subplots(nrows=2, ncols=3, figsize=(18, 12))
for i in range(len(r['class_ids'])):
    mask = masks[:,:,i]
    cate = r['class_ids'][i]
    plt.subplot(2, 3, i+1)
    plt.title("Instance {}, category={}".format(i+1, class_names[cate]))
    new_mask = mask * 255
    plt.imshow(new_mask)
    cv2.imwrite(str(i) + '_output.jpg', new_mask)

In [15]:
coco_dt = []
k = 0
for imgid in img_keys:
    print(k,":",imgid)
    k += 1
    image = cv2.imread(TEST_PATH + cocoGt.loadImgs(ids=imgid)[0]['file_name'])[:,:,::-1] # load image
    results = model.detect([image], verbose=1)
    masks, categories, scores = results[0]['masks'], results[0]['class_ids'], results[0]['scores']  
    print(categories)
    n_instances = len(scores)    
    if len(categories) > 0: # If any objects are detected in this image
        for i in range(n_instances): # Loop all instances
            # save information of the instance in a dictionary then append on coco_dt list
            pred = {}
            pred['image_id'] = imgid # this imgid must be same as the key of test.json
            pred['category_id'] = int(categories[i])
            pred['segmentation'] = binary_mask_to_rle(masks[:,:,i]) # save binary mask to RLE, e.g. 512x512 -> rle
            pred['score'] = float(scores[i])
            coco_dt.append(pred)

0 : 914
Processing 1 images
image                    shape: (333, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  150.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[ 3  3 16  3  3  3]
1 : 1005
Processing 1 images
image                    shape: (375, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[18 18  9]
2 : 1417
Processing 1 images
image                    shape: (262, 350, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            

[15 15 14 15 15]
20 : 804
Processing 1 images
image                    shape: (375, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[19 19 19]
21 : 17
Processing 1 images
image                    shape: (230, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[19 19  6  6]
22 : 633
Processing 1 images
image                    shape: (375, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images 

[18  9  9 16  9  9]
40 : 792
Processing 1 images
image                    shape: (327, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  150.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[4]
41 : 1171
Processing 1 images
image                    shape: (375, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[]
42 : 101
Processing 1 images
image                    shape: (375, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            sh

[9 9 9]
60 : 368
Processing 1 images
image                    shape: (333, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[3 3 3 3 3 3 3 3 3 3 3]
61 : 1220
Processing 1 images
image                    shape: (347, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[7 7 7 7]
62 : 1324
Processing 1 images
image                    shape: (376, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_imag

[8 8]
80 : 1206
Processing 1 images
image                    shape: (375, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[13]
81 : 1315
Processing 1 images
image                    shape: (378, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 512, 512, 3)      min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 33)               min:    0.00000  max:  512.00000  float64
anchors                  shape: (1, 65472, 4)         min:   -0.35425  max:    1.22900  float32
[10 10]
82 : 1112
Processing 1 images
image                    shape: (375, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (

[13 15]


In [17]:
with open("0856066.json", "w") as f:
    json.dump(coco_dt, f)