#### **SET D - Image Segmentation**

---------

In [None]:
!pip install transformers
!pip install gradio
!pip install timm
!pip install torchvision

In [None]:
#Suppresses warning messages

from transformers.utils import logging
logging.set_verbosity_error()

**Mask Generation with SAM (Segment Anything Model by Meta AI)**

In [None]:
from transformers import pipeline

In [None]:
sam_pipe = pipeline("mask-generation", "./models/Zigeng/SlimSAM-uniform-77")

In [None]:
from PIL import Image

In [None]:
#The following image link wont load, upload your own image path
raw_image = Image.open('meta_llamas.jpg')
raw_image.resize((720, 375))

The following cell will take time to run, adjust the *points_per_batch* variable according to your hardware capability.

In [None]:
output = sam_pipe(raw_image, points_per_batch=16)

In [None]:
from helper import show_pipe_masks_on_image

In [None]:
show_pipe_masks_on_image(raw_image, output)

Faster Inference - Infering an image based on a **single point**

In [None]:
from transformers import SamModel, SamProcessor

In [None]:
model = SamModel.from_pretrained(
    "./models/Zigeng/SlimSAM-uniform-77")

processor = SamProcessor.from_pretrained(
    "./models/Zigeng/SlimSAM-uniform-77")

Again, the next two cells vary based on the image uploaded and the object you want to mask (select)

In [None]:
raw_image.resize((720, 375))

In [None]:
input_points = [[[1600, 700]]]

return_tensors="pt" means to return PyTorch Tensors.

In [None]:
inputs = processor(raw_image,
                 input_points=input_points,
                 return_tensors="pt")

In [None]:
import torch

In [None]:
with torch.no_grad():
    outputs = model(**inputs)

In [None]:
predicted_masks = processor.image_processor.post_process_masks(
    outputs.pred_masks,
    inputs["original_sizes"],
    inputs["reshaped_input_sizes"]
)

In [None]:
#Can execute these seperately if you want to see the values each of those variables hold
len(predicted_masks)

predicted_mask = predicted_masks[0]
predicted_mask.shape

outputs.iou_scores

In [None]:
from helper import show_mask_on_image

In [None]:
for i in range(3):
    show_mask_on_image(raw_image, predicted_mask[:, i])

**Depth Estimation with DPT**

In [None]:
depth_estimator = pipeline(task="depth-estimation",
                        model="./models/Intel/dpt-hybrid-midas")

In [None]:
raw_image = Image.open('gradio_tamagochi_vienna.png')
raw_image.resize((806, 621))

In [None]:
#Can execute these seperately if you want to see the values each of those variables hold
output = depth_estimator(raw_image)

output

output["predicted_depth"].shape

output["predicted_depth"].unsqueeze(1).shape

In [None]:
prediction = torch.nn.functional.interpolate(
    output["predicted_depth"].unsqueeze(1),
    size=raw_image.size[::-1],
    mode="bicubic",
    align_corners=False,
)

In [None]:
prediction.shape

raw_image.size[::-1],

prediction

In [None]:
import numpy as np 

Normalize the predicted tensors (between 0 and 255) so that they can be displayed.

In [None]:
output = prediction.squeeze().numpy()
formatted = (output * 255 / np.max(output)).astype("uint8")
depth = Image.fromarray(formatted)

In [None]:
depth

**Finally, Demo using Gradio**

In [None]:
import os
import gradio as gr
from transformers import pipeline

In [None]:
def launch(input_image):
    out = depth_estimator(input_image)

    # resize the prediction
    prediction = torch.nn.functional.interpolate(
        out["predicted_depth"].unsqueeze(1),
        size=input_image.size[::-1],
        mode="bicubic",
        align_corners=False,
    )

    # normalize the prediction
    output = prediction.squeeze().numpy()
    formatted = (output * 255 / np.max(output)).astype("uint8")
    depth = Image.fromarray(formatted)
    return depth

In [None]:
iface = gr.Interface(launch, 
                     inputs=gr.Image(type='pil'), 
                     outputs=gr.Image(type='pil'))

In [None]:
iface.launch(share=True, server_port=int(os.environ['PORT1']))

In [None]:
iface.close()

-----------