<a href="https://colab.research.google.com/github/AdityaDhaulakhandi/cv_tasks/blob/main/task1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from torchvision.io.image import read_image
from torchvision.utils import draw_segmentation_masks
from torchvision.transforms.functional import to_pil_image
import torch
import numpy as np

In [None]:
test_list =[]
for i in range(1,11):
  test_list.append(read_image( str(i) + '.jpg'))
  # print(test_list[i-1].shape)

In [None]:
# Load pretrained weights for semantic segmentation and generated results on 10 test images(512x512)

from torchvision.models.segmentation import deeplabv3_mobilenet_v3_large, DeepLabV3_MobileNet_V3_Large_Weights
# from torchvision.models.segmentation import DeepLabV3_ResNet50_Weights,deeplabv3_resnet50
# from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
# from torchvision.models.segmentation import lraspp_mobilenet_v3_large, LRASPP_MobileNet_V3_Large_Weights

In [None]:
weights = DeepLabV3_MobileNet_V3_Large_Weights.DEFAULT
model = deeplabv3_mobilenet_v3_large(weights=weights)
model.eval()

Calculating the inference time for the deep network
1. GPU warm-up
2. Asynchronous execution

In [None]:
transforms = weights.transforms(resize_size=None)
batch = torch.stack( [transforms(img) for img in test_list])

In [None]:
# Transfer all the data and the model to the GPU to remove the transferring time from the cal

device = torch.device("cuda")  # selecting the default gpu
model.to(device) # moves the model to the cuda device

# Creating dummy input for GPU warm-up and moving it to the GPU
dummy_input = torch.randn(1, 3,224,224, dtype=torch.float).to(device)

# moving the test data to the GPU
batch_gpu = batch.to(device)

In [None]:
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True) #to cal timing

# Creating an arr to store the timings for no of repeatitions
repetitions = 300
timings=np.zeros((repetitions,1))

#GPU-WARM-UP
for _ in range(10):
    _ = model(dummy_input)


# MEASURE PERFORMANCE on the test data batch
with torch.no_grad():
    for rep in range(repetitions):
        starter.record()
        _ = model(batch_gpu)  #run the model on the gpu
        ender.record()
        # WAIT FOR GPU SYNC
        torch.cuda.synchronize()
        curr_time = starter.elapsed_time(ender)
        timings[rep] = curr_time


mean_syn = np.sum(timings) / repetitions
std_syn = np.std(timings)
print(mean_syn," millisec")

85.66366861979166  millisec


In [None]:
import math
model_inference_time = [85.66366861979166,903.0307497151692, 646.3992173258464 ,43.98113770802816]  # inference time for 10 test images
model_inference_time = [x / 10.0 for x in model_inference_time] #for a single test image

model_fps = [ math.floor(1000.0/x) for x in model_inference_time ] #to get the frames per sec
model_fps

[116, 11, 15, 227]

In [None]:
repetitions=100
total_time = 0
with torch.no_grad():
    for rep in range(repetitions):
        starter, ender = torch.cuda.Event(enable_timing=True),   torch.cuda.Event(enable_timing=True)
        starter.record()
        _ = model(batch_gpu)
        ender.record()
        torch.cuda.synchronize() # wait til the gpu completes
        curr_time = starter.elapsed_time(ender)/1000 #convert to second
        total_time += curr_time
Throughput =   (repetitions*10)/total_time
print('Final Throughput:',Throughput)

Final Throughput: 114.68034474294737


In [None]:
pred = model(batch)["out"]

In [None]:
print(pred.shape,pred.min().item(),pred.max().item())
# test_img classes H W
# print(weights.meta["categories"])

torch.Size([10, 21, 512, 512]) -8.665416717529297 25.846508026123047


In [None]:
normalized_masks = pred.softmax(dim=1) #convert the values to [0,1] and interpret them as prob for pixel according to the class
# normalized_masks[0][0]

In [None]:
class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])} # create a dictionary for each class
# class_to_idx

In [None]:
class_dim = 1 # to pick up the max from the column, here col is for each class

all_classes_masks = normalized_masks.argmax(class_dim) == torch.arange(normalized_masks.shape[1])[:, None, None, None]
# Picks up the max for each class from all the test imgs and compare to the classes in order to create boolean mask
all_classes_masks = all_classes_masks.swapaxes(0, 1)
# the tensor is for the classes so we swap to make it for each test img

In [None]:
image_with_masks = [
    draw_segmentation_masks(img, masks=mask, alpha=0.7)
    for img, mask in zip(test_list, all_classes_masks)
]

In [None]:
# for i in range(10):
#   to_pil_image(image_with_masks[i]).show()