In [None]:
#import Mask-RCNN
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize

# Import COCO config
from Mask_RCNN.samples.coco import coco

# Local path to trained weights file
COCO_MODEL_PATH = "./Mask_RCNN/mask_rcnn_coco.h5"

# Directory to save logs and trained model
MODEL_DIR = "logs"

In [None]:
class InferenceConfig(coco.CocoConfig):
 # Set batch size to 1 since we’ll be running inference on
 # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
 # BATCH_SIZE = 1   
 GPU_COUNT = 1  
 IMAGES_PER_GPU = 1

config = InferenceConfig()
config.display()
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)
# Define COCO Class names
class_names = ['BG', 'person', 'bicycle','car', 'motorcycle','airplane',
         'bus', 'train', 'truck', 'boat', 'traffic light',
         'fire hydrant', 'stop sign', 'parking meter','bench','bird',
         'cat','dog','horse','sheep','cow','elephant','bear',
         'zebra','giraffe','backpack','umbrella','handbag','tie',
         'suitcase', 'frisbee', 'skis','snowboard','sports ball',
         'kite','baseball bat','baseball glove', 'skateboard',
         'surfboard','tennis racket','bottle','wine glass','cup',
         'fork','knife','spoon','bowl','banana','apple',
         'sandwich','orange','broccoli','carrot','hot dog', 'pizza',
         'donut','cake','chair','couch','potted plant','bed',
         'dining table','toilet','tv','laptop','nouse','remote',
         'keyboard','cell phone','microwave','oven','toaster',
         'sink','refrigerator','book','clock','vase','scissors',
         'teddy bear','hair drier','toothbrush']


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                93
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              none
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE           

In [None]:
import cv2
import time
import numpy as np

font=cv2.FONT_HERSHEY_SIMPLEX

cam=cv2.VideoCapture(0)
timeStamp = time.time()
fpsFilt=0
while True:
    _,frame=cam.read()
    height=frame.shape[0]
    width=frame.shape[1]
    
    results=model.detect([frame])
    r=results[0]
    classIDs=r['class_ids'] 
#     for classID in classIDs:
#         className=class_names[classID]
#         print(className)
    rois=r['rois'] #return object position as rectangle by array like [[275 258 475 446] [195 378 474 629]]
    for rec,classID in zip(rois, classIDs):
        cv2.rectangle(frame, (rec[0],rec[1]), (rec[2],rec[3]),(0,255,0),1)
        className=class_names[classID]
        cv2.putText(frame,className,(rec[0],rec[1]), font, .75, (0,0,255),2)
        
    dt=time.time()-timeStamp
    timeStamp=time.time()
    fps=1/dt
    fpsFilt =.9*fpsFilt + .1*fps
    cv2.putText(frame,str(round(fpsFilt,1))+' fps ',(0,30),font,1,(0,0,255),2)
    cv2.imshow('detCam',frame)
    cv2.moveWindow('detCam',0,0)
    if cv2.waitKey(1)==ord('q'):
        break
    
cam.release()
cv2.destroyAllWindows()


# Reference
1. https://towardsdatascience.com/webcam-object-detection-with-mask-r-cnn-on-google-colab-b3b012053ed1. 
2. https://www.linkedin.com/pulse/ai-programming-face-detection-your-webcam-offer-sadey/. 
3. https://towardsdatascience.com/live-object-detection-26cd50cceffd. 
4. https://github.com/noxouille/rt-mrcnn
5. https://toarches.medium.com/image-video-and-real-time-webcam-object-detection-and-instance-segmentation-with-mask-r-cnn-37a4675dcb49