### <img src="https://img.icons8.com/dusk/64/000000/python.png" style="height:50px;display:inline"> Import Python Libraries


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import time
import json
from PIL import Image
from skimage.io import imread

# pytorch
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Variable

# define device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print('PIL',torch.__version__)

#### Part 1 - Classic Vs. Deep Learning-based Semantic Segmentation
---
In this part you are going to compare classic methods for segmentation to deep learning-based methods. 


1. Load the images in the `./data/frogs` and `./data/horses` folders and display them.

In [None]:
def load_images(images_path):
  '''
  Loads the images from the given path to an array
  -----------------------------------------------------------------
  Inputs:
  image_path     |     Path to the folder containing the images
  -----------------------------------------------------------------
  Output:
  images         |     Array of images
  '''
  images = []
  ims = os.listdir(images_path)
  for im in ims:
    image = images_path + im
    if image is not None:
      images.append(image)
    
  return images

def show_images(images_path, labels=None):
  '''
  Show images with their labels (if they have any)
  -----------------------------------------------------------------
  Inputs:
  image_path     |     Path to the folder containing the images
  labels         |     Labels of the images 
  -----------------------------------------------------------------
  Output:        
  Plot the images with their labels     
  '''
  fig, axes = plt.subplots(1,len(images_path),figsize=(14,6)) # create a figure object
  
  if(type(images_path[0])==str and np.size(images_path) == 1):
    if(type(images_path[0])==str):
      image = Image.open(images_path[0])
    else:
      image = images_path[0]
    
    plt.imshow(image)
    plt.axis('off')

    if (labels):
      plt.title(labels)

  else:
    for idx, image_path in enumerate(images_path):
      if(type(image_path)==str):
        image = Image.open(image_path)
      else:
        image = image_path

      axes[idx].imshow(image)

      if (labels):
        axes[idx].set_title(labels[idx])
      
      axes[idx].set_xticks([])
      axes[idx].set_yticks([])
      axes[idx].axis('off')
	  

In [None]:
# Define paths to the folders
frogs_path = '../data/frogs/'
horses_path = '../data/horses/'

# Load the images
frogs_images = load_images(frogs_path)
horses_images = load_images(horses_path)


# Labels for images
frogs_labels = ['Frog 1', 'Frog 2']
horses_labels = ['Horse 1', 'Horse 2']

# Show the images
show_images(frogs_images, frogs_labels)
show_images(horses_images, horses_labels)



2. Pick 1 classic method for segmentation and 1 deep learning-based method and segment the given images. Display the results.
    * **Briefly** summarize each method you picked and discuss the advantages and disadvantages of each method. In your answer, relate to the results you received in this section.
    * You can use a ready implementation from the internet or OpenCV, no need to implement it yourselves.
    * Note: the classic method **must not** use any neural network.

**Classic method - GrabCut**

GrabCut is an image segmentation method based on graph cuts. 

([Here](https://docs.opencv.org/3.4/d8/d83/tutorial_py_grabcut.html) is a good explanation of the algorithm)

In [None]:
def grabCut(image, rect):
  '''
  Show images with their labels (if they have any)
  -----------------------------------------------------------------
  Inputs:
  image          |     Input image
  rects          |     Coordinates of a rectangle that includes the foreground object
                 |      x coordinates - Top left corner
                 |      y coordinates - Top left corner
                 |      w - Width of the rectangle
                 |      h - Height of the rectangle
  -----------------------------------------------------------------
  Output: 
  masked         |     Segmented image       
  Plot the images with their labels     
  '''
  mask = np.zeros(image.shape[:2],np.uint8) # Mask image where we specify which areas are background

  # These arrays used by the algorithm internally.
  bgdModel = np.zeros((1,65),np.float64)
  fgdModel = np.zeros((1,65),np.float64)

  cv2.grabCut(image,mask,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT)

  mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8') 
  image = image*mask2[:,:,np.newaxis]
  
  return image

Pick the RoI of the Images (Need to be done manualy):

In [None]:
# Choose the image

# img = frogs_images[0]
# img = frogs_images[1]
# img = horses_images[0]
# img = horses_images[1]

#image = cv2.imread(img)

#roi= cv2.selectROI("select the area", image)
#print(roi)

# Frog 1 RoI - (117, 91, 281, 253)
# Frog 2 RoI - (163, 145, 185, 154)
# Horse 1 RoI - (30, 3, 865, 573)
# Horse 2 RoI - (611, 191, 401, 499)

In [None]:

frog_1 = Image.open('../data/frogs/frog1.jpg')
frog_2 = Image.open('../data/frogs/frog2.jpg')
horse_1 = Image.open('../data/horses/horse1.png')
horse_2 = Image.open('../data/horses/horse2.jpg')

images = [frog_1, frog_2, horse_1, horse_2]

In [None]:
# Using classic method - GrabCut

#x,y,w,h coordinates of the images - from the picked RoI (before)
frogs_rects = [(117, 90, 281, 253), (163, 145, 185, 154)]
horses_rects = [(30, 3, 865, 573), (611, 191, 401, 499)]

labels = frogs_labels + horses_labels
rects = frogs_rects + horses_rects

fig = plt.figure(figsize=(10,8))

for i,img in enumerate(images):

  npImage = np.array(img)            # Numpy array
  im_crop = grabCut(npImage,rects[i]) # Use grabcut algorithm
  
  # Plot results
  ax = fig.add_subplot(2,2,i+1)
  ax.imshow(im_crop)
  ax.set_title(labels[i])
  ax.set_axis_off()



In [None]:
def deprocess(image):
    # Converts pre-processed image to a plotable one
    # -----------------------------------------------------------------
    # Inputs:
    # image     |     Processed image to deprocess
    # -----------------------------------------------------------------
    # output:
    # A plotable version of the input image
    
    
    # Reverse image adaption for model statistics 
    image = image[0].cpu().numpy() * np.array([[[0.229, 0.224, 0.225]]]).T + np.array([[[0.485, 0.456, 0.406]]]).T
    return np.minimum(1,np.maximum(0,image.transpose(1, 2, 0)))

In [None]:
def deeplabv3_segmentation(images_path):
  """
  this function preform semantic segmentation on all images in a given path
  preform with black background and colorpallet
  input
    images_path: path for the dir with all images
  output:
  segmented: a list of segmented images with black background
   segmented_colors: a list of segmented images with colors
  """
  # Download and load the pre-trained model
  model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet101', pretrained=True)   
  model.eval(); # Inference mode

  # Define the pre-processing steps
  preprocess = transforms.Compose([
      transforms.ToTensor(), # Image to tensor
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalization
  ])
  
  
  frames =  sorted(os.listdir(images_path)) # Put all the paths in array
  segmented=[] # Will contain the output
  segmented_colors = [] # Output colorpalete
  for frame in frames:

    input_image = Image.open(images_path + frame)
    print("\n \n \n \n **********")
    print(input_image)
    print("\n \n \n \n **********")
    input_image_array = imread(images_path + frame)

    # Pre-processing
    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0) # Create a mini-batch of size 1 as expected by the model

    # Send to device
    model = model.to(device)
    input_batch = input_batch.to(device)
    
    # Forward pass
    with torch.no_grad():
        output = model(input_batch)['out'][0]
    output_predictions = output.argmax(0)

    #create a mask to delete background
    masked_seg = np.array(output_predictions.byte().cpu().numpy(), dtype=np.bool)
    input_image_array[masked_seg == 0 ] = 0
    
    # create a color pallette, selecting a color for each class
    palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
    colors = torch.as_tensor([i for i in range(21)])[:, None] * palette
    colors = (colors % 255).numpy().astype("uint8")
    # plot the semantic segmentation predictions of 21 classes in each color
    r = Image.fromarray(output_predictions.byte().cpu().numpy()).resize(input_image.size)
    r.putpalette(colors)
     
    segmented_colors.append(r)
    segmented.append(input_image_array)
  return segmented, segmented_colors

In [None]:
_, dl_seg_f = deeplabv3_segmentation(frogs_path)
_, dl_seg_h = deeplabv3_segmentation(horses_path)

show_images(dl_seg_f)
show_images(dl_seg_h)

3. Pick 3 images (download from the internet or take them yourself) that satisfy the following, and dispaly them:
    * One image of a living being (human, animal,...).
    * One image of commonly-used object (car, chair, smartphone, glasses,...).
    * One image of not-so-commonly-used object (fire extinguisher, satellite,... **BE CREATIVE**).

In [None]:
# load my images
animal_image = Image.open('../data/my_data/Humpback_Whale_underwater_shot.jpg')
object_image = Image.open('../data/my_data/Credit_card_samp.jpg')
uncommon_image = Image.open('../data/my_data/Nic_Cage_Pillow.jpg')

my_images = [animal_image,object_image,uncommon_image] 
my_labels = ['Whale', 'Credit Cards','Nick Cage Pillow']

fig = plt.figure(figsize=(10,8))

# plot
for i,im in enumerate(my_images):
  ax = fig.add_subplot(1,3,i+1)
  ax.imshow(im)
  ax.set_title(my_labels[i])
  ax.set_axis_off()

4. Apply each method (one classic and one deep learning-based) on the 3 images. Display the results (mask and segmented image).
    * Which method performed better on each image? Describe your thoughts on why one method is better than the other.
    * For the classic method you can change parameters per-image, document them in the report.
    * You can add manual post-processing to get a mask if needed. If you do that, document in your report "how hard" you had to work in the post-processing stage, as it's an indication of the quality of the method.

5. As you probably have noticed, segmentation can be rough around the edges, i.e., the mask is not perfect and may be noisy around the edges. What can be done to fix or at least alleviate this problem? Your suggestions can be in pre-processing, inside the segmentation algorithm or in post-processing.