# Segmentation on CPU

We have lots of pretrained models for different tasks: classification, segmentation, object detection, ...
https://pytorch.org/vision/stable/models.html#object-detection-instance-segmentation-and-person-keypoint-detection

In [14]:
import torch
from torchvision.models import segmentation
from torchvision.io.image import read_image
from torchvision.transforms.functional import to_pil_image
from torchvision import transforms
from PIL import Image
from matplotlib.pyplot import show
from cpu import utils
import cv2
import numpy as np

# Pytorch Model

Download the pretrained model and set it to evaluation (no backpropagation or weight update)

In [2]:
model = segmentation.deeplabv3_resnet50(pretrained=True)
model.eval()



DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se

Let's read an image

In [3]:
input_image = Image.open("road.jpg")
preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(512),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)

Let's make inference with it

In [4]:
with torch.no_grad():
    output = model(input_batch)['out'][0]

In [5]:
normalized_masks = torch.nn.functional.softmax(output, dim=1)

In [6]:
normalized_masks.shape

torch.Size([21, 512, 767])

In [20]:
segmented_image = utils.draw_segmentation_map(output)
input_image_resized = cv2.resize(np.array(input_image), (767, 512))
image_to_plot = utils.image_overlay(input_image_resized, segmented_image)

array([[[ 1,  3, 13],
        [ 0,  5, 14],
        [ 4, 11, 17],
        ...,
        [ 6,  7, 17],
        [ 6,  8, 16],
        [ 2,  6,  7]],

       [[ 3,  5, 15],
        [12,  7, 18],
        [ 2,  4, 15],
        ...,
        [ 5,  6, 16],
        [ 8, 10, 18],
        [ 5,  9, 10]],

       [[ 0,  0,  5],
        [ 0,  0,  4],
        [ 4,  7, 12],
        ...,
        [ 5,  8, 18],
        [ 7,  8, 18],
        [ 6,  7, 17]],

       ...,

       [[34, 27, 32],
        [38, 33, 33],
        [39, 35, 34],
        ...,
        [ 1,  0,  4],
        [ 1,  0,  4],
        [ 1,  0,  4]],

       [[36, 27, 33],
        [47, 36, 38],
        [24, 25, 23],
        ...,
        [ 1,  0,  2],
        [ 1,  0,  2],
        [ 1,  0,  2]],

       [[37, 30, 35],
        [25, 28, 28],
        [12, 13, 11],
        ...,
        [ 1,  0,  2],
        [ 1,  0,  2],
        [ 1,  0,  2]]], dtype=uint8)

In [21]:
input_image_resized.shape

(512, 767, 3)

In [22]:
segmented_image.shape

(512, 767, 3)