# Mask R-CNN demo

This notebook illustrates one possible way of using `maskrcnn_benchmark` for computing predictions on images from an arbitrary URL.

Let's start with a few standard imports

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab

import requests
from io import BytesIO
from PIL import Image
import numpy as np
import os, sys
import cv2
sys.path.append(os.path.dirname(os.getcwd()))

In [None]:
# this makes our figures bigger
pylab.rcParams['figure.figsize'] = 20, 12
import torch
print(torch.__version__)

Those are the relevant imports for the detection model

In [None]:
from maskrcnn_benchmark.config import cfg
from predictor import COCODemo

We provide a helper class `COCODemo`, which loads a model from the config file, and performs pre-processing, model prediction and post-processing for us.

We can configure several model options by overriding the config options.
In here, we make the model run on the CPU

In [None]:
config_file = "../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml"

# update the config options with the config file
cfg.merge_from_file(config_file)
# manual override some options
cfg.merge_from_list(["MODEL.DEVICE", "cpu"])

Now we create the `COCODemo` object. It contains a few extra options for conveniency, such as the confidence threshold for detections to be shown.

In [5]:
coco_demo = COCODemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.6,
)

Let's define a few helper functions for loading images from a URL

In [6]:
def load(url):
    """
    Given an url of an image, downloads the image and
    returns a PIL image
    """
    response = requests.get(url)
    pil_image = Image.open(BytesIO(response.content)).convert("RGB")
    # convert to BGR format
    image = np.array(pil_image)[:, :, [2, 1, 0]]
    return image

def imshow(img):
    plt.imshow(img[:, :, [2, 1, 0]])
    plt.axis("off")

Let's now load an image from the COCO dataset. It's reference is in the comment

In [7]:
# from http://cocodataset.org/#explore?id=345434
path = "/home/amine/Documents/3A MVA/Semestre 1/Object Recognition and Computer Vision/HW3/bird_dataset/train_images/021.Eastern_Towhee"
# image = load("http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg")
image = cv2.imread(path+"/Eastern_Towhee_0117_22741.jpg")
# cv2.imshow('Bird example',image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

### Computing the predictions

We provide a `run_on_opencv_image` function, which takes an image as it was loaded by OpenCV (in `BGR` format), and computes the predictions on them, returning an image with the predictions overlayed on the image.

In [15]:
# compute predictions
coco_demo = COCODemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.7,
)
# predictions = coco_demo.run_on_opencv_image(image)
# imshow(predictions)
# plt.show()
cropImage(image, coco_demo)

tensor(15)


TypeError: eq() received an invalid combination of arguments - got (str), but expected one of:
 * (Tensor other)
      didn't match because some of the arguments have invalid types: ([31;1mstr[0m)
 * (Number other)
      didn't match because some of the arguments have invalid types: ([31;1mstr[0m)


In [14]:
def cropImage(image, cocomodel):
    predictions = cocomodel.compute_prediction(image)
    top_predictions = cocomodel.select_top_predictions(predictions)
    result = image.copy()
    masks = top_predictions.get_field("mask").numpy()
    labels = top_predictions.get_field("labels")
    colors = cocomodel.compute_colors_for_labels(labels).tolist()
    contours = None
    for mask, color in zip(masks, colors):
        thresh = mask[0, :, :, None]
        _, contours, hierarchy = cv2.findContours(
            thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
        )
        image = cv2.drawContours(result, contours, -1, color, 3)
    idx=0
    if contours==None:
        return None
    for i,c in enumerate(contours):
        x,y,w,h = cv2.boundingRect(c)
        if w>50 and h>50:
            idx+=1
            new_img=image[y:y+h,x:x+w]
            cv2.imshow('Mask R-CNN example',new_img)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
            return new_img
    
#             cv2.imshow('image', new_img)
#             cv2.waitKey(0)
#             cv2.destroyAllWindows()
# from http://cocodataset.org/#explore?id=345434
# path = "/home/amine/Documents/3A MVA/Semestre 1/Object Recognition and Computer Vision/HW3/bird_dataset/train_images/021.Eastern_Towhee"
# # image = load("http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg")
# image = cv2.imread(path+"/Eastern_Towhee_0117_22741.jpg")
# # imshow(image)
# # plt.show()
# cropImage(image, coco_demo)

In [None]:
import os
rootDir = "../../../bird_dataset/"

def list_files(dir):
    r = []
    for root, dirs, files in os.walk(dir):
        for name in files:
            r.append(os.path.join(root, name))
    return r

directories = list_files(rootDir)

# Generating cropped images for each image in directories
for i,p in enumerate(directories):
    if i%100==0:
        print(i*100/len(directories))
    image = cv2.imread(p)
    new_img = cropImage(image, coco_demo)
    if new_img != None:
        cv2.imwrite(p[:len(p)-4]+'_cropped.jpg',new_img)
#     print(p[:len(p)-4]+'Z.jpg')
#     imshow(image)
#     plt.show()
#     predictions = coco_demo.compute_prediction(image)
#     top_predictions = coco_demo.select_top_predictions(predictions)
#     result = image.copy()
#     # if self.show_mask_heatmaps:
#     #     return self.create_mask_montage(result, top_predictions)
#     # result = self.overlay_boxes(result, top_predictions)
#     if coco_demo.cfg.MODEL.MASK_ON:
#         image = result
#         predictions = top_predictions
#         masks = predictions.get_field("mask").numpy()
#         labels = predictions.get_field("labels")

#         colors = coco_demo.compute_colors_for_labels(labels).tolist()

#         for mask, color in zip(masks, colors):
#             thresh = mask[0, :, :, None]
#             _, contours, hierarchy = cv2.findContours(
#                 thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
#             )
#             # image = cv2.drawContours(image, contours, -1, color, 3)

#     composite = image
#     plt.imshow(composite)
#     plt.show()
    
#     cv2.imwrite(p[:len(p)-4]+'Z.jpg',composite)
#     print(p[:len(p)-4]+'Z.jpg')
    # result = self.overlay_class_names(result, top_predictions)