# Saliency Mapper

> A tool to generate the saliency maps of images using a variety of techniques

Code was written by Nicholas M. Synovic, Oscar Yanek, and Rohan Sethi

## Optimal Performance

For optimal performance, in the *Runtime* tab of Google Collab, click *Change runtime type*, then choose **GPU** from the *Hardware Accelerator* dropdown.

## Upgrade Python `pip` Tool

Upgrade the Python `pip` tool to the latest version

In [None]:
%pip install --upgrade pip

## Install Python libaries via `pip`

Installed libraries are:

- opencv-contrib-python
- torch
- torchvision
- pandas
- progress
- timm
- ipywidgets

In [None]:
%pip install opencv-contrib-python torch torchvision torchaudio pandas progress timm ipywidgets

## Import Dependencies 

In [None]:
from os import listdir
from os.path import join
from pathlib import PurePath

import cv2
import torch
#from google.colab import drive
from numpy import ndarray
from progress.bar import Bar

## Allow Data to be Loaded From Google Drive



In [None]:
#drive.mount('/content/gdrive')

## Main Application

**Note**: Work is being done to better document the steps taken throughout this prototype, as well as to remove code duplication.

In [None]:
spectralSaliency = cv2.saliency.StaticSaliencySpectralResidual_create()
fineGrainSaliency = cv2.saliency.StaticSaliencyFineGrained_create()
depth_DPTLarge: str = "DPT_Large"
depth_DPTHybrid: str = "DPT_Hybrid"
depth_MiDaSsmall: str = "MiDaS_small"


def estimateDepth(imagePaths: list, modelType: str, outputFolder: str = "data") -> None:
    midas = torch.hub.load("intel-isl/MiDaS", modelType)
    midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    midas.to(device)
    if modelType == "DPT_Large" or modelType == "DPT_Hybrid":
        transform = midas_transforms.dpt_transform
    else:
        transform = midas_transforms.small_transform

    with Bar(f"Estimating depth with {modelType}...", max=(len(imagePaths))) as bar:
        imagePath: str
        for imagePath in imagePaths:
            imageName: str = (
                PurePath(imagePath).with_suffix("").name
                + f'_{modelType.replace("_", "-")}.jpg'
            )
            outputPath: str = join(outputFolder, imageName)

            image: ndarray = cv2.imread(imagePath)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            input_batch = transform(image).to(device)

            with torch.no_grad():
                prediction = midas(input_batch)

                prediction = torch.nn.functional.interpolate(
                    prediction.unsqueeze(1),
                    size=image.shape[:2],
                    mode="bicubic",
                    align_corners=False,
                ).squeeze()

            output = prediction.cpu().numpy()
            cv2.imwrite(outputPath, output)
            bar.next()


def readDirectory(dir: str) -> list:
    files: list = listdir(dir)
    filepaths: list = [join(dir, f) for f in files]
    return filepaths


def computeSpectralSaliency(imagePath: str, outputFolder: str = "data") -> None:
    imageName: str = PurePath(imagePath).with_suffix("").name + "_spectralResidual.jpg"
    outputPath: str = join(outputFolder, imageName)
    image: ndarray = cv2.imread(imagePath)
    (success, saliencyMap) = spectralSaliency.computeSaliency(image)
    saliencyMap: ndarray = (saliencyMap * 255).astype("uint8")
    cv2.imwrite(outputPath, saliencyMap)


def computeFineGrainSaliency(imagePath: str, outputFolder: str = "data") -> None:
    imageName: str = PurePath(imagePath).with_suffix("").name + "_fineGrain.jpg"
    outputPath: str = join(outputFolder, imageName)
    image: ndarray = cv2.imread(imagePath)
    (success, saliencyMap) = fineGrainSaliency.computeSaliency(image)
    saliencyMap: ndarray = (saliencyMap * 255).astype("uint8")
    cv2.imwrite(outputPath, saliencyMap)


def writeImage(image: ndarray, imagePath: str) -> None:
    cv2.imwrite(imagePath, image)


def main() -> None:
    # dirImage = input("image dir path is: ")
    # dirOut = input("output image dir path is: ")
    # imagePaths: list = readDirectory(dir=dirImage)
    imagePaths: list = ["test.jpg"]
    dirOut = "data"
    with Bar(
        "Creating saliency maps of PascalVOC images...", max=len(imagePaths)
    ) as bar:
        imagePath: str
        for imagePath in imagePaths:
            computeSpectralSaliency(imagePath)
            computeFineGrainSaliency(imagePath)
            bar.next()

    estimateDepth(imagePaths, depth_DPTHybrid, dirOut)
    estimateDepth(imagePaths, depth_DPTLarge, dirOut)
    estimateDepth(imagePaths, depth_MiDaSsmall, dirOut)


if __name__ == "__main__":
    main()


In [None]:

def showimage(myimage):
  if(myimage.ndim > 2): #This only applies to RGB or RGBA images (e.g. not to Black and White images)
    myimage = myimage[:,:,::-1]

  fig, ax = plt.subplots(figsize=[10,10])
  ax.imshow(myimage, cmap = 'gray', interpolation = 'bicubic')
  plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
  plt.show()

def bgremove1(myimage):
 
    # Blur to image to reduce noise
    myimage = cv2.GaussianBlur(myimage,(5,5), 0)
 
    # We bin the pixels. Result will be a value 1..5
    bins=numpy.array([0,51,102,153,204,255])
    myimage[:,:,:] = numpy.digitize(myimage[:,:,:],bins,right=True)*51
 
    # Create single channel greyscale for thresholding
    myimage_grey = cv2.cvtColor(myimage, cv2.COLOR_BGR2GRAY)
 
    # Perform Otsu thresholding and extract the background.
    # We use Binary Threshold as we want to create an all white background
    ret,background = cv2.threshold(myimage_grey,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
 
    # Convert black and white back into 3 channel greyscale
    background = cv2.cvtColor(background, cv2.COLOR_GRAY2BGR)
 
    # Perform Otsu thresholding and extract the foreground.
    # We use TOZERO_INV as we want to keep some details of the foregorund
    ret,foreground = cv2.threshold(myimage_grey,0,255,cv2.THRESH_TOZERO_INV+cv2.THRESH_OTSU)  #Currently foreground is only a mask
    foreground = cv2.bitwise_and(myimage,myimage, mask=foreground)  # Update foreground with bitwise_and to extract real foreground
 
    # Combine the background and foreground to obtain our final image
    finalimage = background+foreground
 
    return finalimage

def bgremove2(myimage):
    # First Convert to Grayscale
    myimage_grey = cv2.cvtColor(myimage, cv2.COLOR_BGR2GRAY)
 
    ret,baseline = cv2.threshold(myimage_grey,127,255,cv2.THRESH_TRUNC)
 
    ret,background = cv2.threshold(baseline,126,255,cv2.THRESH_BINARY)
 
    ret,foreground = cv2.threshold(baseline,126,255,cv2.THRESH_BINARY_INV)
 
    foreground = cv2.bitwise_and(myimage,myimage, mask=foreground)  # Update foreground with bitwise_and to extract real foreground
 
    # Convert black and white back into 3 channel greyscale
    background = cv2.cvtColor(background, cv2.COLOR_GRAY2BGR)
 
    # Combine the background and foreground to obtain our final image
    finalimage = background+foreground
    return finalimage

def bgremove3(myimage):
    # BG Remover 3
    myimage_hsv = cv2.cvtColor(myimage, cv2.COLOR_BGR2HSV)
     
    #Take S and remove any value that is less than half
    s = myimage_hsv[:,:,1]
    s = numpy.where(s < 127, 0, 1) # Any value below 127 will be excluded
 
    # We increase the brightness of the image and then mod by 255
    v = (myimage_hsv[:,:,2] + 127) % 255
    v = numpy.where(v > 127, 1, 0)  # Any value above 127 will be part of our mask
 
    # Combine our two masks based on S and V into a single "Foreground"
    foreground = numpy.where(s+v > 0, 1, 0).astype(numpy.uint8)  #Casting back into 8bit integer
 
    background = numpy.where(foreground==0,255,0).astype(numpy.uint8) # Invert foreground to get background in uint8
    background = cv2.cvtColor(background, cv2.COLOR_GRAY2BGR)  # Convert background back into BGR space
    foreground=cv2.bitwise_and(myimage,myimage,mask=foreground) # Apply our foreground map to original image
    finalimage = background+foreground # Combine foreground and background
 
    return finalimage

def main() -> None:
  imagePath: str = input("Enter path: ")
  image: ndarray = cv2.imread(imagePath)
  showimage(bgremove1(image))
  showimage(bgremove2(image))
  showimage(bgremove3(image))


  

if __name__ == "__main__":
    main()