In [None]:
!pip install protobuf==3.20.*

## Import libraries

_Note: In Google Colab, you need to restart the runtime to reload the modules you installed in the previous section. Else, you will get an error. You can do so by selecting `Runtime > Restart Runtime` in the Menu bar. **Please do not run the cell below without restarting.**_

In [None]:
# Clone the tensorflow models repository
!git clone --depth 1 https://github.com/tensorflow/models

In [None]:
# Compile the Object Detection API protocol buffers and install the necessary packages
!cd models/research/ && protoc object_detection/protos/*.proto --python_out=. && cp object_detection/packages/tf2/setup.py . && python -m pip install .

In [None]:
import tensorflow as tf
import tensorflow_hub as hub

import matplotlib
import matplotlib.pyplot as plt

import numpy as np
from six import BytesIO
from PIL import Image
from six.moves.urllib.request import urlopen

from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_ops

from google.colab .patches import cv2_imshow
import cv2
import numpy as np
import os
tf.get_logger().setLevel('ERROR')


%matplotlib inline

In [None]:
model_display_name = 'Mask R-CNN Inception ResNet V2 1024x1024'
model_handle = 'https://tfhub.dev/tensorflow/mask_rcnn/inception_resnet_v2_1024x1024/1'

print('Selected model:'+ model_display_name)
print('Model Handle at TensorFlow Hub: {}'.format(model_handle))

In [None]:
# This will take 10 to 15 minutes to finish
print('loading model...')
hub_model = hub.load(model_handle)
print('model loaded!')

All the images we deal with will be resized to 400 x 400 pixels

In [None]:
HEIGHT = 400
WIDTH = 400

Generating two folders for background and foreground images. The folder **back** will contain all the indoor backgrounds. The folder **front** will contain all the pedestrian images.

In [None]:
!mkdir front
!mkdir back

## Mask Extraction

We obtain a masks of objects detected in the outdoor image from the Mask R-CNN Inception ResNet V2. From all the objects detected, we only take the masks of class 1 which represent "humans". Any mask that is less than 9% of the total image area is discarded. If there are more that two human masks we only keep the first two and discard the rest.

In [None]:
def extract_multiple_mask(image):
  # run inference
  results = hub_model(image)

  # output values are tensors and we only need the numpy()
  # parameter when we visualize the results
  result = {key:value.numpy() for key,value in results.items()}
  if 'detection_masks' in result:
    # convert np.arrays to tensors
    classes = results['detection_classes'][0][:2].numpy()
    detection_masks = tf.convert_to_tensor(result['detection_masks'][0])
    detection_boxes = tf.convert_to_tensor(result['detection_boxes'][0])

    # reframe the the bounding box mask to the image size.
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
              detection_masks, detection_boxes,
                image.shape[1], image.shape[2])

    # filter mask pixel values that are above a specified threshold
    detection_masks_reframed = tf.cast(detection_masks_reframed > 0.6,
                                        tf.uint8)

    # get the numpy array
    result['detection_masks_reframed'] = detection_masks_reframed.numpy()
    masks = []

    #cls == 1 represents the "human" class
    i = 0
    for cls in classes:
      if cls == 1:
        mask = np.array(result['detection_masks_reframed'][i]) * 255
        mask = cv2.resize(mask, (WIDTH, HEIGHT))
        masks.append(mask)
        i += 1

    # Removing any masks that are <9% of the image area
    proper_masks = []
    for mask in masks:
      xmin, ymin, w, h = cv2.boundingRect(mask) # getting the bounding area of the mask
      xmax = xmin + w
      ymax = ymin + h
      area = (xmax-xmin) * (ymax-ymin)
      if area/(HEIGHT * WIDTH) < 0.09:
        continue
      else:
        proper_masks.append(mask)

    if len(proper_masks) == 0:
      print("No proper mask found .. skipping this image")
      return [np.zeros((WIDTH,HEIGHT))] # A list with blank masks is returned

    elif len(proper_masks) == 1:
      return [proper_masks[0]]

    else:
      return [proper_masks[0], proper_masks[1]] # only the first two masks are returned

In [None]:
# Merging the masks
def merge_mask(masks):
    full = np.zeros((WIDTH,HEIGHT))
    bounding_boxes = []

    for mask in masks:
      # Greyscaling the mask if it isn't already
      if len(mask.shape) == 3:
          mask = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY)
      # Convert the mask into binary format
      _, mask = cv2.threshold(mask, 128, 255, cv2.THRESH_BINARY)
      mask = mask.astype(np.uint8)
      # Get the bounding box for the mask
      x, y, w, h = cv2.boundingRect(mask)
      bounding_boxes.append((x, y, w, h))

      # Merge the mask with the full mask
      full = full + mask

    blurred = cv2.GaussianBlur(full, (5, 5), 0)
    return blurred, bounding_boxes

In [None]:
def load_input_image(path, height,width):
  image = cv2.imread(path)
  image = cv2.resize(image, (height, width))

  return image, np.array(image.reshape(
    (1, height, width, 3)).astype(np.uint8))

## Simple Cut and Paste

In [None]:
def get_simple_cutouts(input_image, target_image, final_mask, bounding_box):

    # Convert the mask to 3D
    mask_3d = cv2.merge([final_mask,final_mask,final_mask])/255
    # Blended image
    blended = input_image * mask_3d + target_image * (1 - mask_3d)
    blended_box = blended.copy()
    # Adding the bounding boxes for the individual human masks
    for box in bounding_box:
      x, y, w, h = box
      cv2.rectangle(blended_box, (x, y), (x + w, y + h), (0, 255, 0), 2)

    return blended, blended_box

## Poisson Blending

In [None]:
def get_poisson_image(input_image, target_image, final_mask, bounding_box):
    mask_3d = cv2.merge([final_mask,final_mask,final_mask])/255
    final_mask = final_mask.astype(np.uint8)
    # Blended image
    blended = input_image * mask_3d + target_image * (1 - mask_3d)
    blended = blended.astype(np.uint8)

    # Poisson Blend the image (need to get the start of the whole mask (multiple masks))
    x_, y_, w_, h_ = cv2.boundingRect(final_mask)
    offset = (x_,y_)
    center = (offset[0] + w_ // 2, offset[1] + h_ // 2)
    blended = np.clip(cv2.seamlessClone(input_image, target_image, final_mask, center, cv2.NORMAL_CLONE),0,255)
    blended_box = blended.copy()

    # Adding individual boxes for each mask
    for box in bounding_box:
      x, y, w, h = box
      cv2.rectangle(blended_box, (x, y), (x + w, y + h), (0, 255, 0), 2)

    return blended, blended_box



## Laplacian Blending

In [None]:
# Function to generate a Gaussian pyramid
def generate_gaussian_pyramid(image, levels):
    pyramid = [image]
    for i in range(levels - 1):
        image = cv2.pyrDown(image)
        pyramid.append(image)
    return pyramid

# Function to generate a Laplacian pyramid
def generate_laplacian_pyramid(gaussian_pyramid):
    laplacian_pyramid = []
    for i in range(len(gaussian_pyramid) - 1):
        upsampled = cv2.pyrUp(gaussian_pyramid[i + 1])
        laplacian = cv2.subtract(gaussian_pyramid[i], upsampled)
        laplacian_pyramid.append(laplacian)
    laplacian_pyramid.append(gaussian_pyramid[-1])
    return laplacian_pyramid

# Function to reconstruct the image from the Laplacian pyramid
def reconstruct_image_from_pyramid(laplacian_pyramid):
    image = laplacian_pyramid[-1]
    for i in range(len(laplacian_pyramid) - 2, -1, -1):
        image = cv2.pyrUp(image)
        image = cv2.add(image, laplacian_pyramid[i])
    return image

# Function to perform Laplacian blending of foreground (source) and background (target) images
def get_laplacian_image(input_image, target_image, final_mask, bounding_box, levels=3):

    # Generate Gaussian pyramids for the source, target, and mask
    source_pyramid = generate_gaussian_pyramid(input_image, levels)
    target_pyramid = generate_gaussian_pyramid(target_image, levels)
    mask_pyramid = generate_gaussian_pyramid(final_mask, levels)

    # Generate Laplacian pyramids for the source and target
    source_laplacian = generate_laplacian_pyramid(source_pyramid)
    target_laplacian = generate_laplacian_pyramid(target_pyramid)


    # Blend the Laplacian pyramids at each level using the mask
    blended_laplacian = []
    for source_lap, target_lap, mask in zip(source_laplacian, target_laplacian, mask_pyramid):
        mask_3d = cv2.merge([mask,mask,mask])/255
        blended = source_lap * (mask_3d) + target_lap * (1 - (mask_3d))  # Normalize mask to 0-1
        blended_laplacian.append(blended)

    # Reconstruct the final image from the blended Laplacian pyramids
    blended_image = np.clip(reconstruct_image_from_pyramid(blended_laplacian),0,255)
    blended_box = blended_image.copy()

    for box in bounding_box:
      x, y, w, h = box
      cv2.rectangle(blended_box, (x, y), (x + w, y + h), (0, 255, 0), 2)

    return blended_image, blended_box


## Telea Inpaint Blending

In [None]:
def get_inpaint_cutout(input_image, target_image, masks, pr=0.05):

    full = np.zeros((WIDTH,HEIGHT))
    full_trimap = np.zeros((WIDTH,HEIGHT))
    bounding_boxes = []

    for mask in masks:
      # Greyscaling the mask if it isn't already
      if len(mask.shape) == 3:
          mask = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY)
      # Convert the mask into binary format
      _, mask = cv2.threshold(mask, 128, 255, cv2.THRESH_BINARY)
      mask = mask.astype(np.uint8)
      x, y, w, h = cv2.boundingRect(mask)
      bounding_boxes.append((x, y, w, h))

      # Generate a Trimap from the Mask
      trimap = np.zeros_like(mask, dtype=np.uint8)
      trimap[mask == 255] = 255 # foreground
      trimap[mask == 0] = 0 # background
      back = cv2.dilate(mask, np.ones((int(pr*w), int(pr*w)), np.uint8), iterations=1)
      fore = cv2.erode(mask, np.ones((int(pr*w),int(pr*w)), np.uint8), iterations=1)

      # Adjusting the trimap
      bord1 = back - fore
      trimap[bord1 == 255] = 128

      # For visualization purposes
      visual_trimap = cv2.bitwise_or(trimap, fore)
      # cv2_imshow(visual_trimap) # Uncomment to visualize what the trimap looks like

      # Back to 0 and 255 for computational purposes
      trimap[trimap == 255] = 0
      trimap[trimap == 128] = 255

      full = full + mask
      full_trimap = full_trimap + trimap

    blurred = cv2.GaussianBlur(full_trimap, (5, 5), 0)
    blurred = blurred.astype(np.uint8)
    mask_3d = cv2.merge([full,full,full])/255

    cutpaste = mask_3d * input_image + (1 - mask_3d) * target_image
    cutpaste = cutpaste.astype(np.uint8)

    blended_image = cv2.inpaint(cutpaste, blurred, 5, flags=cv2.INPAINT_TELEA)
    blended_box = blended_image.copy()

    # Adding individual boxes for each mask
    for box in bounding_boxes:
      x, y, w, h = box
      cv2.rectangle(blended_box, (x, y), (x + w, y + h), (0, 255, 0), 2)

    return blended_image, blended_box


# Dataset Generation (Multiple Techniques)

In [None]:
!mkdir poisson
!mkdir laplacian
!mkdir cutpaste
!mkdir inpaint

In [None]:
# blending - "all", "cutpaste", "poisson", "laplacian"
def generate_dataset(input_folder="front/", output_folder="back/", blending="all"):

    if blending not in ["all", "inpaint", "poisson", "laplacian","cutpaste"]:
        print("Sorry we don't have this blending option.")
        exit()

    # Getting all the images
    input_images = os.listdir(input_folder)
    target_images = os.listdir(output_folder)

    for i in range(len(input_images)):
      for j in range(len(target_images)):
        if ".png" in input_images[i] and ".jpg" in target_images[j]:
          print(f"Processing {input_images[i]} and {target_images[j]}")
          input_image, input = load_input_image("front/" + input_images[i],HEIGHT, WIDTH)
          target_image = cv2.resize(cv2.imread("back/" + target_images[j]),(HEIGHT,WIDTH))
          input_image = input_image.astype(np.uint8)
          target_image = target_image.astype(np.uint8)

          # Get the mask from the Inception Model
          masks = extract_multiple_mask(input)
          final_mask, bounding_box = merge_mask(masks)

          if blending == "cutpaste" or blending == "all":
              # Simple cut and paste
              simple, simple_boxed = get_simple_cutouts(input_image, target_image, final_mask, bounding_box)
              # Simple cut and paste dataset
              cv2.imwrite(f"cutpaste/{input_images[i].split('.')[0]}_{target_images[j].split('.')[0]}.jpg",simple)
              cv2.imwrite(f"cutpaste/{input_images[i].split('.')[0]}_{target_images[j].split('.')[0]}_box.jpg",simple_boxed)

          if blending == "poisson" or blending == "all":
              # Poisson Blending
              poisson, poisson_boxed = get_poisson_image(input_image, target_image, final_mask, bounding_box)
              # Poisson dataset
              cv2.imwrite(f"poisson/{input_images[i].split('.')[0]}_{target_images[j].split('.')[0]}.jpg",poisson)
              cv2.imwrite(f"poisson/{input_images[i].split('.')[0]}_{target_images[j].split('.')[0]}_box.jpg",poisson_boxed)

          if blending == "laplacian" or blending == "all":
              # Laplacian Blending
              laplacian, laplacian_boxed = get_laplacian_image(input_image, target_image, final_mask, bounding_box)
              # Laplacian dataset
              cv2.imwrite(f"laplacian/{input_images[i].split('.')[0]}_{target_images[j].split('.')[0]}.jpg",laplacian)
              cv2.imwrite(f"laplacian/{input_images[i].split('.')[0]}_{target_images[j].split('.')[0]}_box.jpg",laplacian_boxed)

          if blending == "inpaint" or blending == "all":
              # Telea Inpainting Blending
              telea, telea_boxed = get_inpaint_cutout(input_image, target_image, masks)
              # Telea dataset
              cv2.imwrite(f"inpaint/{input_images[i].split('.')[0]}_{target_images[j].split('.')[0]}.jpg",telea)
              cv2.imwrite(f"inpaint/{input_images[i].split('.')[0]}_{target_images[j].split('.')[0]}_box.jpg",telea_boxed)


Generating all datasets

In [None]:
generate_dataset() # will generate all datasets

Downloading zip files for all datasets

In [None]:
import shutil
from google.colab import files

shutil.make_archive("inpaint".replace('.zip', ''), 'zip', "inpaint")
zip_filename = "inpaint.zip"
files.download(zip_filename)

shutil.make_archive("poisson".replace('.zip', ''), 'zip', "poisson")
zip_filename = "poisson.zip"
files.download(zip_filename)

shutil.make_archive("laplacian".replace('.zip', ''), 'zip', "laplacian")
zip_filename = "laplacian.zip"
files.download(zip_filename)

shutil.make_archive("cutpaste".replace('.zip', ''), 'zip', "cutpaste")
zip_filename = "cutpaste.zip"
files.download(zip_filename)