In [2]:
# this script was first developed by Zhimin Chen and later maintained by Yitian Ma

import matplotlib
%matplotlib tk

import numpy as np
import os
import sys
import math
import glob
from matplotlib import pyplot as plt
import skimage.transform
import skimage.io
import skimage.filters
import skimage.util
import glob
import scipy.ndimage
import math
from skimage import color
from numpy import linalg
from PIL import Image
from IPython.display import clear_output
from matplotlib.widgets import Button

# FIXME
movie_name = '99StrangerOnBus' 
JPEG_DIR = 'FrameImages'
ANNOTATIONS_DIR = 'Annotations'
DICTIONARY_DIR = 'DictOutput'
BLURRED_DIR = 'BluredImages'
MOVIEDIR = 'MovieOutput'
NEWMASKDIR = 'NewMask'
CONTEXT_DIR = 'ContextOccluded'

outdir = os.path.join(BLURRED_DIR, movie_name)
if not os.path.exists(outdir):
    os.makedirs(outdir)
if not os.path.exists(MOVIEDIR):
    os.makedirs(MOVIEDIR)
outdir = os.path.join(NEWMASKDIR, movie_name)
if not os.path.exists(outdir):
    os.makedirs(outdir)
outdir = os.path.join(BLURRED_DIR, movie_name, CONTEXT_DIR)
if not os.path.exists(outdir):
    os.makedirs(outdir)

In [14]:
# Helpr functions

# Remove the duplicate member within the list
# Return a list with unique members
def Remove(duplicate):
    final_list = []
    for num in duplicate:
        if num not in final_list:
            final_list.append(num)
    return final_list

# Adjust the size of the image centered upon the center
# The image is enlarged / shrinked based on the zoom_factor
def zoom(image,center,zoom_factor):
    s_shape = image.shape
    image_resized = skimage.transform.rescale(image, zoom_factor, preserve_range = True)
    l_shape = image_resized.shape
    crop_width = []
    up_crop = center[0]*abs(1-zoom_factor)
    low_crop = (s_shape[0]-center[0])*abs(1-zoom_factor)
    left_crop = center[1]*abs(1-zoom_factor)
    right_crop = (s_shape[1]-center[1])*abs(1-zoom_factor)
    crop_width.append((up_crop,low_crop))
    crop_width.append((left_crop,right_crop))
    
    if zoom_factor >= 1:
        cropped_image = skimage.util.crop(image_resized, crop_width)
    else:
        cropped_image = skimage.util.crop(image, crop_width)
    return cropped_image

# Apply the blur effect on the mask
# Adjust the simga to sharpify the blurring effect
def applyBlur(image, mask, sigma=50.0, feather=20):
    image = skimage.util.img_as_float(image)
    image = skimage.transform.resize(image,mask.shape, preserve_range = True)
    blurred = skimage.filters.gaussian(image, sigma=sigma, multichannel = True) 
    blurredmask = skimage.filters.gaussian(skimage.util.img_as_float(mask), sigma=feather, multichannel=True)
    blurredmask = np.expand_dims(blurredmask, 2)
    inverted_mask =np.ones([blurredmask.shape[0],blurredmask.shape[1],1])-blurredmask
    blended = blurredmask*blurred + inverted_mask*image
    return blended

def applyBlurContext(image, mask, sigma=50.0, feather=30):
    image = skimage.util.img_as_float(image)
    image = skimage.transform.resize(image,mask.shape, preserve_range = True)
    blurred = skimage.filters.gaussian(image, sigma=sigma, multichannel=True)
    blurredmask = skimage.filters.gaussian(skimage.util.img_as_float(mask), sigma=feather, multichannel=True)
    blurredmask = np.expand_dims(blurredmask, 2)
    inverted_mask =np.ones([blurredmask.shape[0],blurredmask.shape[1],1])- blurredmask
    blended = inverted_mask*blurred + blurredmask*image
    return blended

# Check whether two masks are similar or not.
# Similarity = intersection( Mask1, Mask2 ) / union( Mask1, Mask2 ).
# The two masks are similar if the similarity level is above the predefined threshold.
def hasSimilarity(pre_mask, mask, IOU_THRESHOLD = 0.6):
    union_area = np.sum((pre_mask+mask)>0) #union area
    join_area = np.sum((pre_mask+mask)>1) #union area
    if (float(join_area)/float(union_area) > IOU_THRESHOLD):
        return True
    return False

# Flatten the nested array.
# This is used to handle conditions where the user provided data ( image data ) is somehow nested within a dummy array.
# e.g. flatte( [[ A, B]] ) -> [A, B]
def flatten( array, needToFlattenTwice=False ):
    if needToFlattenTwice:
        if len(array) == 1:
            array = array[0]
    else:
        if len(array.shape) == 3:
            return array
        if len(array) == 1:
            array = array[0]
    return array

# Check if the detected objects can be categorized as human.
# If the object has a score higher than the human_threshold, it is categoried as human.
# Returns two variables [ shouldCheck, num_target_directory ]
# shouldCheck = There are human within the frame and the program shoul process / check the frame
# num_target_category = The number of human within the frame
def satisfyThreshold(scores, classes, human_threshold):
    idx = 0
    shouldCheck = False
    num_target_category = 0
    while (idx < len( scores )):
        if (scores[idx] > human_threshold and classes[idx] == 1):
            shouldCheck = True;
            num_target_category += 1
        idx = idx + 1;
    return shouldCheck, num_target_category

# Check the degree of overlapping of two rectangles
# It is used to check whether the target characters in two consecutive frames 
# are the same so that we can semi-autonomously track the target character as long as it stays relatively still
def hasSimilarityBox(box1, box2, threshold = 0.7):
    left_1_x = box1[1]
    top_1_y = box1[0]
    right_1_x = box1[3]
    bot_1_y = box1[2]
    
    left_2_x = box2[1]
    top_2_y = box2[0]
    right_2_x = box2[3]
    bot_2_y = box2[2]
    
    overlap_x = max(0, min(right_1_x, right_2_x) - max(left_1_x, left_2_x))
    overlap_y = max(0, min(bot_1_y, bot_2_y) - max(top_1_y, top_2_y))

    box1_area = (right_1_x - left_1_x) * (bot_1_y - top_1_y)
    box2_area = (right_2_x - left_2_x) * (bot_2_y - top_2_y)
    overlap_area = overlap_x * overlap_y
    union_area = box1_area + box2_area - overlap_area
    
    if (union_area == 0):
        return False
    
    percentage = overlap_area / float(union_area)
    assert percentage >= 0 and percentage <= 1
    
    return percentage >= threshold
    

In [15]:
# get a list of file names
image_list = sorted(glob.glob(os.path.join(ANNOTATIONS_DIR, movie_name,'*.jpg')))
dict_list =  sorted(glob.glob(os.path.join(DICTIONARY_DIR, movie_name,'*.npz')))
frame_list =  sorted(glob.glob(os.path.join(JPEG_DIR, movie_name,'*.jpg')))

In [16]:
# ClipInfo class
# The video is broken down into multiple clips
# For each clip, user should specify wether they want the program to blur the clip or just simply skip it.
# The logic behind it is that for some frames, the traget character is not there. 
# So it is unnecessary for the program to process those frames.
# Note: finding the target character requires both user input ( click on the traget character box) and beefy amount of 
# computation resources as the program needs to iterate through all the detected masks, no matter those masks are actually 
# target characters or not.

# Usage:
# E.g. 
# To specify frames #30 to #40 dont require blurring, add the following line
# clipInfo.addInfo( start_index=30, end_index=40, shouldSkip=True)
class ClipInfo:
    def __init__(self):
        self.intervals = []
        self.flags = []
        self.num_intervals = 0
    
    # make sure the information entered is basically correct
    def sanityCheck(self):
        for i in range(self.num_intervals):
            if i == 0:
                assert self.intervals[i][0] == 0
            else:
                assert self.intervals[i][0] - self.intervals[i-1][1] == 1
            assert self.intervals[i][1] >= self.intervals[i][0]
    
    def addInfo(self, start_index, end_index, shouldSkip):
        self.intervals.append([start_index, end_index+1])
        self.flags.append(shouldSkip)
        self.num_intervals += 1
    
    # return start_index, end_index, flag
    def getInfo(self, index):
        if (index >= self.num_intervals):
            return -1, -1, False
        return self.intervals[index][0], self.intervals[index][1], self.flags[index];
    

In [20]:
import ctypes
def process(clipInfo, startClipIndex, IOU_THRESHOLD=0.6, human_threshold=0.7):
    
    # define some constants
    zoom_factor = 1.1
    
    # define some other stuffs
    image_first = plt.imread(frame_list[0])
    images_shape = image_first.shape
    
    # Process all the clips
    for i in range(startClipIndex, clipInfo.num_intervals):
        print("~~~~~~~~~Start working on clip #%d ~~~~~~~~~~~~" % i)
        # get info for this clip
        start_index, end_index, macroSkipFlag = clipInfo.getInfo(i)
        # define masks
        pre_mask = 0
        pre_unwanted_mask = 0
        pre_box = [0,0,0,0]
        # for all the frames within this clip
        for image_id, image_path in enumerate(frame_list[start_index:end_index]):
        
            # Get all the images, mask, and annotations for the related image
            # FIXME
            # image_name extraction might differ based on how the image naming convention
            image_name = image_path[-9:-4]
            original_frame = plt.imread(image_path)
            annotated_image = plt.imread(image_list[image_id+start_index])
            annotated_dict = np.load(dict_list[image_id+start_index],encoding = 'latin1',fix_imports = True)
            
            # Get the masks and other related fields for the specific annotat
            masks = flatten( annotated_dict['detection_masks'] )
            boxes = flatten( annotated_dict['detection_boxes'], True )
            classes = flatten( annotated_dict['detection_classes'] )
            scores = flatten( annotated_dict['detection_scores'] )

            # mask_selected stores all the wanted masks ( target character(s) )
            mask_selected=[]
            # unwanted_mask_selected stores all the unwanted masks. 
            # Those unwanted masks will be excluded from the blurring process.
            unwanted_mask_selected = []
            
            box_selected = [0,0,0,0]
            unwanted_box_selected = [0,0,0,0]
            
            skipFlag = macroSkipFlag
        
            # Set "shouldCheck" to true if the annotation of the given frame doesn't contain any human character
            # Otherwise, set "shouldCheck" to false. "num_target_category" is the number of human characters 
            # captured by the annotation of the given frame.
            shouldCheck, num_target_category = satisfyThreshold(scores, classes, human_threshold)
            num_unwanted_images = 0
            
            # We examine the frame if and only if the frame is worth checking and 
            # the skipFlag ( our manual input from the clipInfo ) is set to False
            if shouldCheck and (not skipFlag):
                # check all the masks
                for idx in range(len(masks)):
                    # we are only interested in the masks that belong to human category
                    if (classes[idx] != 1):
                        continue;
                    # we are not interested in low precision masks
                    if (scores[idx] < human_threshold):
                        break;

                    mask = masks[idx,:,:]
                    mask = skimage.transform.resize(mask,(images_shape[0],images_shape[1]), preserve_range = True)
                    
                    # If the image contains similar mask in the previous frame,
                    # append the index the mask_selected.
                    if (hasSimilarity(pre_mask, mask, IOU_THRESHOLD)):
                        mask_selected.append(idx)
                        # if the box_selectd has not been set a value
                        if (all(x == 0 for x in box_selected)):
                            box_selected = boxes[idx]
                            
                    # If the image contains unwanted mask, increment the num_wanted_images counter.
                    if (hasSimilarity(pre_unwanted_mask, mask, IOU_THRESHOLD)):
                        unwanted_mask_selected.append(idx)
                        num_unwanted_images += 1
                        
                # Remove duplicate masks from both unwanted_mask_selected & mask_selected
                unwanted_mask_selected = Remove(unwanted_mask_selected)
                mask_selected = Remove(mask_selected)

            # It only contains the unwanted images, skip it as there is nothing valuable
            if (num_unwanted_images == num_target_category):
                skipFlag = True

            # The position of the targeted mask only moves a little bit, skip it as the figure is just moving
            if (hasSimilarityBox(box_selected, pre_box)):
                skipFlag = True
            
            # As the selected mask is not empty, we dont require user input.
            # In other words, if the program is able to recognize the target character in this frame,
            # no user input is necessary
            if (mask_selected):
                skipFlag = True
            
            # Ask for user input if and only if we should check the frame and the skipFlag is set to False
            if shouldCheck and (not skipFlag):
                # show the first frame on a gui
                os.system('say "click"')

                hasAddedWanted = False
                hasAddedUnwanted = False
                while not (hasAddedWanted and hasAddedUnwanted):
                    user_input = input("press 'a' to add wanted character, press 'd' to add unwanted character: ")
                    plt.imshow(annotated_image)    

                    mouse_clicks=[]
                    mouse_clicks = np.array(plt.ginput(-1, timeout=0))
                    temp_mask_selected = []
                    
                    if len(mouse_clicks):
                        # Find the masks that were selected 
                        for mouse in range(mouse_clicks.shape[0]):
                            mouse_x = mouse_clicks[mouse,0]
                            mouse_y = mouse_clicks[mouse,1]
                            for idx in range(masks.shape[0]):
                                # For those human masks that have scores high enough, 
                                # append the masks to the selected_masks.
                                if (scores[idx] >= human_threshold and classes[idx] == 1):
                                    mask = masks[idx,:,:]
                                    if mask[mouse_y.astype(int),mouse_x.astype(int)]:
                                        temp_mask_selected.append(idx)
                                        
                        # Categorize wanted and unwanted masks
                        if (user_input == 'a'):
                            mask_selected = Remove(temp_mask_selected)
                        elif (user_input == 'd'):
                            unwanted_mask_selected = Remove(temp_mask_selected)
                            
                    if (user_input == 'a'):
                        hasAddedWanted = True;
                    elif (user_input == 'd'):
                        hasAddedUnwanted = True;
                    plt.close()

            # join all selected masks
            if len(mask_selected):
                new_mask = masks[mask_selected[0],:,:]
                for ind in mask_selected[1:]:
                    new_mask = np.logical_or(new_mask,masks[ind,:,:])
                # Find the box the wanted masks are in
                box_selected = boxes[mask_selected[0]]
                center_selected = []
                center_selected.append((box_selected[2]+box_selected[0])/2*new_mask.shape[0])
                center_selected.append((box_selected[3]+box_selected[1])/2*new_mask.shape[1])
                new_mask = zoom(new_mask, center_selected,zoom_factor)
            else:
                new_mask = np.zeros((images_shape[0],images_shape[1]))

            # join all unwanted selected masks
            if len(unwanted_mask_selected):
                new_unwanted_mask = masks[unwanted_mask_selected[0],:,:]
                for ind in unwanted_mask_selected[1:]:
                    new_unwanted_mask = np.logical_or(new_unwanted_mask,masks[ind,:,:])   
                # Find the box the unwanted masks are in
                unwanted_box_selected = boxes[unwanted_mask_selected[0]]
                center_selected = []
                center_selected.append((unwanted_box_selected[2]+unwanted_box_selected[0])/2*new_unwanted_mask.shape[0])
                center_selected.append((unwanted_box_selected[3]+unwanted_box_selected[1])/2*new_unwanted_mask.shape[1])
                new_unwanted_mask = zoom(new_unwanted_mask, center_selected,zoom_factor)
            else:
                new_unwanted_mask = np.zeros((images_shape[0],images_shape[1]))

            # Save wanted and unwanted masks
            new_mask = skimage.transform.resize(new_mask,(images_shape[0],images_shape[1]), preserve_range = True)
            new_unwanted_mask = skimage.transform.resize(new_unwanted_mask,(images_shape[0],images_shape[1]), preserve_range = True)
            # Write the wanted masks to the file
            np.savez_compressed(os.path.join(NEWMASKDIR, movie_name, image_name), new_mask) # save mask 
            
            # Blur the frame based on the wanted masks
            blurred_frame = applyBlur(original_frame, new_mask)
            blurred_frame = skimage.transform.resize(blurred_frame,(images_shape[0],images_shape[1]), preserve_range = True)
            # Save the blurred image for the latter pipeline 
            plt.imsave(os.path.join(BLURRED_DIR,movie_name, image_name + '.jpg'), blurred_frame)  

            # Save context occluded blurred image
            blurred_frame_context = applyBlurContext(original_frame, new_mask)
            blurred_frame_context = skimage.transform.resize(blurred_frame_context,(images_shape[0],images_shape[1]), preserve_range = True)
            # Save the blurred image for the latter pipeline 
            plt.imsave(os.path.join(BLURRED_DIR,movie_name,CONTEXT_DIR, image_name + '.jpg'), blurred_frame_context)  

            # Report progress
            if np.remainder(image_id,20)== 0:
                clear_output()
            print('Processing '+ 'image ' + str(image_id+start_index) + '/'+ str(end_index-1))

            # set previous frame's wanted masks, unwanted masks, and boxes
            pre_mask = new_mask
            pre_unwanted_mask = new_unwanted_mask
            pre_box = box_selected

    os.system('say "done"')

In [23]:
# input clips info
clipInfo = ClipInfo()
# True = skip
# False = need click
# clipInfo.addInfo(0, 50,False)
# clipInfo.addInfo(51, 100,True)
# clipInfo.addInfo(101, 150,False)
clipInfo.addInfo(151, 200,True)
process(clipInfo, 0)

Processing image 190/200
Processing image 191/200
Processing image 192/200
Processing image 193/200
Processing image 194/200
Processing image 195/200
Processing image 196/200
Processing image 197/200
Processing image 198/200
Processing image 199/200
Processing image 200/200


In [8]:
# DONT REMOVE
# Handle edge case
# Properly process the frame within the gap between two consecutive clips
blur =  sorted(glob.glob(os.path.join(BLURRED_DIR, movie_name,'*.jpg')))
prev = -1
missing = []
for i in range(len(blur)):
    index = int(blur[i][-9: -4])
    if (index - prev != 1):
        missing.append(index-1)
    prev = index
    
import shutil

for i in range(len(missing)):
    image_name = frame_list[missing[i]][-9:-4]
    pre_image_name = frame_list[missing[i]+1][-9:-4]
    shutil.copyfile(os.path.join(BLURRED_DIR,movie_name, pre_image_name + '.jpg'), \
             os.path.join(BLURRED_DIR,movie_name, image_name + '.jpg'))
    shutil.copyfile(os.path.join(BLURRED_DIR,movie_name, CONTEXT_DIR, pre_image_name + '.jpg'), \
             os.path.join(BLURRED_DIR,movie_name, CONTEXT_DIR, image_name + '.jpg'))
    shutil.copyfile(os.path.join(NEWMASKDIR,movie_name, pre_image_name + '.npz'), \
             os.path.join(NEWMASKDIR,movie_name, image_name + '.npz'))