# Saliency Mapper

> A tool to generate the saliency maps of images using a variety of techniques

Code was written by Nicholas M. Synovic, Oscar Yanek, and Rohan Sethi

## Optimal Performance

For optimal performance, in the *Runtime* tab of Google Collab, click *Change runtime type*, then choose **GPU** from the *Hardware Accelerator* dropdown.

## Upgrade Python `pip` Tool

Upgrade the Python `pip` tool to the latest version

In [None]:
%pip install --upgrade pip

## Install Python libaries via `pip`

Installed libraries are:

- opencv-contrib-python
- torch
- torchvision
- pandas
- progress
- timm

In [None]:
%pip install opencv-contrib-python torch torchvision torchaudio pandas progress timm

## Import Dependencies 

In [None]:
from os import listdir
from os.path import join
from pathlib import PurePath

import cv2
import torch
from numpy import ndarray
from progress.bar import Bar

## Allow Data to be Loaded From Google Drive



In [None]:
#from google.colab import drive
#drive.mount('/content/gdrive')

## Main Application

Initialize variables for program scope 

In [None]:
spectralSaliency = cv2.saliency.StaticSaliencySpectralResidual_create()
fineGrainSaliency = cv2.saliency.StaticSaliencyFineGrained_create()
depth_DPTLarge: str = "DPT_Large"
depth_DPTHybrid: str = "DPT_Hybrid"
depth_MiDaSsmall: str = "MiDaS_small"

## Simple Directory Reader

when giving the program a dataset instead of singular image

In [None]:
def readDirectory(dir: str) -> list:
    files: list = listdir(dir)
    filepaths: list = [join(dir, f) for f in files]
    return filepaths

## EstimateDepth Main Logic

Determines desired training model from the arguments. Uses loading bar to parse through imagePaths List, performs transformation on each image and outputs the result to a new folder. 

In [None]:
def estimateDepth(imagePaths: list, modelType: str, outputFolder: str = "data") -> None:
    midas = torch.hub.load("intel-isl/MiDaS", modelType)
    midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    midas.to(device)
    if modelType == "DPT_Large" or modelType == "DPT_Hybrid":
        transform = midas_transforms.dpt_transform
    else:
        transform = midas_transforms.small_transform

    with Bar(f"Estimating depth with {modelType}...", max=(len(imagePaths))) as bar:
        imagePath: str
        for imagePath in imagePaths:
            imageName: str = (
                PurePath(imagePath).with_suffix("").name
                + f'_{modelType.replace("_", "-")}.jpg'
            )
            outputPath: str = join(outputFolder, imageName)

            image: ndarray = cv2.imread(imagePath)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            input_batch = transform(image).to(device)

            with torch.no_grad():
                prediction = midas(input_batch)

                prediction = torch.nn.functional.interpolate(
                    prediction.unsqueeze(1),
                    size=image.shape[:2],
                    mode="bicubic",
                    align_corners=False,
                ).squeeze()

            output = prediction.cpu().numpy()
            cv2.imwrite(outputPath, output)
            bar.next()

## ComputeSpectralSaliency Main Logic

Creates output folder and path, takes input imagePath and creates a saliency map for the image using the spectral residual approach. Starting from the principle of natural image statistics, this method simulate the behavior of pre-attentive visual search. The algorithm analyze the log spectrum of each image and obtain the spectral residual. Then transform the spectral residual to spatial domain to obtain the saliency map, which suggests the positions of proto-objects.

In [None]:
def computeSpectralSaliency(imagePath: str, outputFolder: str = "data") -> None:
    imageName: str = PurePath(imagePath).with_suffix("").name + "_spectralResidual.jpg"
    outputPath: str = join(outputFolder, imageName)
    image: ndarray = cv2.imread(imagePath)
    (success, saliencyMap) = spectralSaliency.computeSaliency(image)
    saliencyMap: ndarray = (saliencyMap * 255).astype("uint8")
    cv2.imwrite(outputPath, saliencyMap)

## ComputeFineGrainSaliency Main Logic

Creates output folder and path, takes input imagePath and creates a saliency map for the image using the fine grained approach. This method calculates saliency based on center-surround differences. High resolution saliency maps are generated in real time by using integral images.

In [None]:
def computeFineGrainSaliency(imagePath: str, outputFolder: str = "data") -> None:
    imageName: str = PurePath(imagePath).with_suffix("").name + "_fineGrain.jpg"
    outputPath: str = join(outputFolder, imageName)
    image: ndarray = cv2.imread(imagePath)
    (success, saliencyMap) = fineGrainSaliency.computeSaliency(image)
    saliencyMap: ndarray = (saliencyMap * 255).astype("uint8")
    cv2.imwrite(outputPath, saliencyMap)

## Triangle Threshold Background Removal

Blurs image to reduce noise. Bins the pixels between 1-5. Creates a single channel greyscale for thresholding. Performs Otsu Thresholding and extracts the background. Uses binary thershold to create an all white background. Converts black and white back into 3 channel greyscale. Performs Triangle thresholding and extracts the foreground. Uses TOZERO_INV to keep some detail of the foreground. Combines the background and foreground to make final image. returns final image.

In [None]:
def triangleBackgroundRemoval(imagePath: str, outputFolder: str = "data") -> None:
    imageName: str = PurePath(imagePath).with_suffix("").name + "_triangleBackgroundRemoval.jpg"
    outputPath: str = join(outputFolder, imageName)
    bins: ndarray = numpy.array([0, 51, 102, 153, 204, 255])

    image: ndarray = cv2.imread(imagePath)
    blurredImage: ndarray = cv2.GaussianBlur(image, (5, 5), 0)

    blurredImage[:, :, :] = numpy.digitize(blurredImage[:, :, :], bins, right=True) * 51
    grayBlurredImage: ndarray = cv2.cvtColor(blurredImage, cv2.COLOR_BGR2GRAY)

    ret, foreground = cv2.threshold(
        grayBlurredImage, 0, 255, cv2.THRESH_TOZERO_INV + cv2.THRESH_TRIANGLE
    )

    foreground[foreground > 0] = 255    # Convert from grayscale to black and white

    cv2.imwrite(outputPath, foreground)

## Otsu Threshold Background Removal

Blurs image to reduce noise. Bins the pixels between 1-5. Creates a single channel greyscale for thresholding. Performs Otsu Thresholding and extracts the background. Uses binary thershold to create an all white background. Converts black and white back into 3 channel greyscale. Performs Otsu thresholding and extracts the foreground. Uses TOZERO_INV to keep some detail of the foreground. Combines the background and foreground to make final image. returns final image.

In [None]:
def otsuThreshold(imagePath: str, outputFolder: str = "data") -> None:
    imageName: str = PurePath(imagePath).with_suffix("").name + "_otsu.jpg"
    outputPath: str = join(outputFolder, imageName)
    bins: ndarray = numpy.array([0, 51, 102, 153, 204, 255])

    image: ndarray = cv2.imread(imagePath)
    blurredImage: ndarray = cv2.GaussianBlur(image, (5, 5), 0)

    blurredImage[:, :, :] = numpy.digitize(blurredImage[:, :, :], bins, right=True) * 51
    grayBlurredImage: ndarray = cv2.cvtColor(blurredImage, cv2.COLOR_BGR2GRAY)

    ret, foreground = cv2.threshold(
        grayBlurredImage, 0, 255, cv2.THRESH_TOZERO_INV + cv2.THRESH_OTSU
    )

    foreground[foreground > 0] = 255    # Convert from grayscale to black and white

    cv2.imwrite(outputPath, foreground)

## Basic Thresholding Background Removal

First converts to greyscale. Performs truncate threshold to get baseline. Extracts binary threshold using the baseline for the background and foreground. Updates foreground with bitwise_and to extract real foreground. Converts black and white back into 3 channel greyscale. Combines the background and foreground to obtain our final image.

In [None]:
def basicBackgroundRemoval(imagePath: str, outputFolder: str = "data") -> None:
    imageName: str = PurePath(imagePath).with_suffix("").name + "_basicBackgroundRemoval.jpg"
    outputPath: str = join(outputFolder, imageName)

    image: ndarray = cv2.imread(imagePath)

    grayImage: ndarray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    baseline: ndarray = cv2.threshold(grayImage,127,255,cv2.THRESH_TRUNC)[1]
    foreground: ndarray = cv2.threshold(baseline,126,255,cv2.THRESH_BINARY_INV)[1]

    foreground[foreground > 0] = 255    # Convert from grayscale to black and white

    cv2.imwrite(outputPath, foreground)

## Hue Saturation Value

Converts image from BGR to HSV. Takes saturation and removes any values that are less than half creating the saturation mask. Increases the brightness of the image and then mods by 255 . Extracts any value above 127 to be a part of the value mask. Combines the two masks into unified foreground. Casts back into 8-bit integer. Inverts foreground to get background in uint8. Converts background back into BGR. Applies foreground map to original image. Combines foreground and background.

Documentation: https://docs.opencv.org/4.x/df/d9d/tutorial_py_colorspaces.html

In [None]:
def hsvBackgroundRemoval(imagePath: str, outputFolder: str = "data") -> None:
    imageName: str = PurePath(imagePath).with_suffix("").name + "_hsvBackgroundRemoval.jpg"
    outputPath: str = join(outputFolder, imageName)

    image: ndarray = cv2.imread(imagePath)

    hsvImage = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    saturation: hsvImage = hsvImage[:,:,1]
    saturation = numpy.where(saturation < 127, 0, 1)

    colorValue: ndarray = (hsvImage[:,:,2] + 127) % 255
    colorValue = numpy.where(colorValue > 127, 1, 0)

    foreground = numpy.where(saturation + colorValue > 0, 1, 0).astype(numpy.uint8)

    foreground[foreground > 0] = 255    # Convert from grayscale to black and white

    cv2.imwrite(outputPath, foreground)

## Combined Main Method

In [None]:
def main() -> None:
    # dirImage = input("image dir path is: ")
    # dirOut = input("output image dir path is: ")
    # imagePaths: list = readDirectory(dir=dirImage)
    imagePath: str = input("Enter path: ")
    imagePaths: list = [imagePath]
    dirOut = "data"
    with Bar(
        "Creating saliency maps of PascalVOC images...", max=len(imagePaths)
    ) as bar:
        imagePath: str
        for imagePath in imagePaths:
            computeSpectralSaliency(imagePath)
            computeFineGrainSaliency(imagePath)
            bar.next()

    
    image: ndarray = cv2.imread(imagePath)
    showimage(bgremove1(image))
    showimage(bgremove2(image))
    showimage(bgremove3(image))

    estimateDepth(imagePaths, depth_DPTHybrid)
    estimateDepth(imagePaths, depth_DPTLarge)
    estimateDepth(imagePaths, depth_MiDaSsmall)


if __name__ == "__main__":
    main()
