# Imports

In [None]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# Download the object detection code from https://github.com/tensorflow/models/tree/master/research/object_detection
# Put them in the path specified below.
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append('/tensorflow/models/research/object_detection')
sys.path.append('/tensorflow/models/research/')
from object_detection.utils import ops as utils_ops

if tf.__version__ < '1.4.0':
    raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')

# This is needed to display the images.
%matplotlib inline

## Object detection imports
## Here are the imports from the object detection module.
from utils import label_map_util
from utils import visualization_utils as vis_util

import glob
import scipy.ndimage
import skimage
from skimage.transform import rescale, resize
import math
from IPython.display import clear_output

from numpy import linalg
from tensorflow.core.protobuf import config_pb2

# Model preparation 

In [2]:
# What model to download.
MODEL_NAME = 'mask_rcnn_inception_resnet_v2_atrous_coco_2018_01_28' #'mask_rcnn_inception_v2_coco_2018_01_28'#'' #'ssd_mobilenet_v1_coco_2017_11_17' 
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('/notebooks/tensorflow/models/research/object_detection/', 'data', 'mscoco_label_map.pbtxt')
NUM_CLASSES = 90

## Download Model
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
    file_name = os.path.basename(file.name)
    if 'frozen_inference_graph.pb' in file_name:
        tar_file.extract(file, os.getcwd())

## Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

## Loading label map
## Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Helper code

In [3]:
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)

# Detection
def run_inference_for_batch(images, sess):
    # this is faster than doing for single image
  # Get handles to input and output tensors
    ops = tf.get_default_graph().get_operations()
    all_tensor_names = {output.name for op in ops for output in op.outputs}
    tensor_dict = {}
    for key in [
        'num_detections', 'detection_boxes', 'detection_scores',
        'detection_classes', 'detection_masks'
    ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
            tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)
    if 'detection_masks' in tensor_dict:
        masks = []
        for i in range(images.shape[0]):
            detection_boxes = tensor_dict['detection_boxes'][i]
            detection_masks = tensor_dict['detection_masks'][i]
            # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
            real_num_detection = tf.cast(tensor_dict['num_detections'][i], tf.int32)
            detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
            detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, images.shape[1], images.shape[2])
            detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
            masks.append(detection_masks_reframed)
        tensor_dict['detection_masks'] = masks

    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

    # Run inference
    output_dict = sess.run(tensor_dict,
                           feed_dict={image_tensor: images}, 
                           options=config_pb2.RunOptions(report_tensor_allocations_upon_oom=True))
    
    # all outputs are float32 numpy arrays, so convert types as appropriate
    output_dict['num_detections'] = output_dict['num_detections'].astype(np.int)
    output_dict['detection_classes'] = output_dict[
        'detection_classes'].astype(np.uint8)
    output_dict['detection_boxes'] = output_dict['detection_boxes']
    output_dict['detection_scores'] = output_dict['detection_scores']
    if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks']
    return output_dict

# Detection
def run_inference_for_single_image(image, graph):
  # Get handles to input and output tensors
    ops = tf.get_default_graph().get_operations()
    all_tensor_names = {output.name for op in ops for output in op.outputs}
    tensor_dict = {}
    for key in [
        'num_detections', 'detection_boxes', 'detection_scores',
        'detection_classes', 'detection_masks'
    ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
            tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)
    if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0)
    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

    # Run inference
    output_dict = sess.run(tensor_dict,
                         feed_dict={image_tensor: np.expand_dims(image, 0)})

    # all outputs are float32 numpy arrays, so convert types as appropriate
    output_dict['num_detections'] = int(output_dict['num_detections'][0])
    output_dict['detection_classes'] = output_dict[
        'detection_classes'][0].astype(np.uint8)
    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
    output_dict['detection_scores'] = output_dict['detection_scores'][0]
    if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
    return output_dict

def setupFolder(clipname):
    annotations = os.path.join(ANNOTATIONS_DIR, clipname)
    if not os.path.exists(annotations):
        os.makedirs(annotations)
    dictionaries = os.path.join(DICTIONARY_DIR, clipname)
    if not os.path.exists(dictionaries):
        os.makedirs(dictionaries)

In [6]:
# using my images from videos
root_folder ='/notebooks/'
movies_name = ['01Brooklyn.mp4']
JPEG_DIR = 'FrameImages' # where to read the frame images from this folder
ANNOTATIONS_DIR = 'Annotations' # save the segmented and annotated images in this folder
DICTIONARY_DIR = 'DictOutput' # save the segmented numerical results in this folder
resize_width = 500 # first downsize frame images to this size (in pixels) to speed up the training.
# human_threshold = 0.7
for movie_name in movies_name:
    setupFolder(movie_name)

In [None]:
with detection_graph.as_default():
    for movie_id, movie_name in enumerate(movies_name):
        batch_size = 6 # if tensorflow gives out error message, use a smaller batch size.
        
        ## determine start index 
        finished_file_list = sorted(glob.glob(os.path.join(ANNOTATIONS_DIR, movie_name,'*.jpg')))
#         if movie_name == movies_name[0]:
#             start_index = int(finished_file_list[-7][-9:-4])
#         else:
        start_index = 0
    
        ## determine end index
        file_list = sorted(glob.glob(os.path.join(JPEG_DIR, movie_name,'*.jpg')))
        total_num = len(file_list)
        end_index = total_num #start_index+batch_size
        num_batch = int(np.ceil((end_index - start_index)/float(batch_size)))
        
        ## get image dimension for resizing
        image_path = (file_list[-1])
        image =  Image.open(image_path)
        image_np = load_image_into_numpy_array(image)
        image_width = image_np.shape[1]
        image_height = image_np.shape[0]
        resize_height = float(image_height)/float(image_width)*float(resize_width)
        
        for batch_id in range(num_batch):
            with tf.Session() as sess:
                if batch_id == (num_batch-1):
                    batch_start = int(start_index + batch_id*batch_size)
                    batch_end = end_index
                else:
                    batch_start = int(start_index + batch_id*batch_size)
                    batch_end = int(start_index + (batch_id+1)*batch_size)

                images = []
                images_name =[]
                for image_id in np.arange(batch_start,batch_end):
                    image_path = file_list[image_id]
                    image_name = image_path[-9:-4]
            #         print('stacking ' + image_name + '/' + str(total_num))
                    image =  Image.open(image_path)
                    image_np = load_image_into_numpy_array(image)
                    
                    ## resizing images
                    image_resized = resize(image_np, (int(resize_height), resize_width), anti_aliasing=True)
                    image_resized = np.uint8 (256 * image_resized)
                    images.append(image_resized)
                    images_name.append(image_name)
                images = np.stack(images, axis=0)
                
                # Actual detection.
                output_dict = run_inference_for_batch(images,sess)
                if np.remainder(batch_id,20)== 0:
                    clear_output()
                print('Processing movie '+ movie_name + ' :finished runing batch ' + str(batch_id) + '/' + str(num_batch))

                for image_out_id in range(output_dict['detection_boxes'].shape[0]):
                    
                    # Visualization of the results of a detection.
                    image_out = images[image_out_id]
                    image_name = images_name[image_out_id]
                    
#                     filter_index = []
#                     for mask_id, mask_score in enumerate(output_dict['detection_scores'][image_out_id]):
#                         if mask_score > human_threshold:
#                             filter_index.append(mask_id)
                            
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_out,
                        output_dict['detection_boxes'][image_out_id],#[filter_index, ...],
                        output_dict['detection_classes'][image_out_id],#[filter_index, ...],
                        output_dict['detection_scores'][image_out_id],#[filter_index, ...],
                        category_index,
                        instance_masks=output_dict.get('detection_masks')[image_out_id],#[filter_index, ...],
                        use_normalized_coordinates=True,
                        line_thickness=3)

                    plt.imsave(os.path.join(ANNOTATIONS_DIR,movie_name,image_name + '.png'),image_out)
                    im = Image.open(os.path.join(ANNOTATIONS_DIR, movie_name, image_name +'.png'))
                    rgb_im = im.convert('RGB')
                    rgb_im.save(os.path.join(ANNOTATIONS_DIR, movie_name, image_name + '.jpg'))
                    os.remove(os.path.join(ANNOTATIONS_DIR, movie_name, image_name +'.png'))

                    save_dict = dict()
                    save_dict['detection_boxes'] = output_dict['detection_boxes'][image_out_id],#[filter_index, ...]
                    save_dict['detection_classes'] = output_dict['detection_classes'][image_out_id],#[filter_index, ...]
                    save_dict['detection_scores'] = output_dict['detection_scores'][image_out_id],#[filter_index, ...]
                    save_dict['detection_masks'] = output_dict.get('detection_masks')[image_out_id],#[filter_index, ...]
                    np.savez_compressed(os.path.join(DICTIONARY_DIR, movie_name, image_name), **save_dict)

Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 0/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 1/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 2/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 3/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 4/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 5/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 6/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 7/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 8/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 9/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 10/183
Processing movie 73MyDaughteraskedmetoadopther_short :finished runing batch 11/183
Processing mov