In [1]:
# This notebook should be used inside the repository (https://github.com/VainF/DeepLabV3Plus-Pytorch)
# This repository should be downloaded and all requirements should be installed.

  import skimage.viewer as V


In [69]:
from tqdm import tqdm
import network
import utils
import os
import random
import argparse
import numpy as np
from collections import namedtuple

from torch.utils import data
from datasets import VOCSegmentation, Cityscapes
from utils import ext_transforms as et
from metrics import StreamSegMetrics

import torch
import torch.nn as nn
from torchvision import transforms

from utils.visualizer import Visualizer

from PIL import Image
import matplotlib
import matplotlib.pyplot as plt

In [3]:
!pwd

/home/ghadeer/Projects/KAMAZ/DeepLabV3Plus-Pytorch


In [135]:
# Parameters
seed = 0
num_classes = 19
output_stride = 8
batch_size = 4
crop_size = 513

In [5]:
def validate(opts, model, loader, device, metrics, ret_samples_ids=None):
    """Do validation and return specified samples"""
    metrics.reset()
    ret_samples = []
    if opts.save_val_results:
        if not os.path.exists('results'):
            os.mkdir('results')
        denorm = utils.Denormalize(mean=[0.485, 0.456, 0.406], 
                                   std=[0.229, 0.224, 0.225])
        img_id = 0

    with torch.no_grad():
        for i, (images, labels) in tqdm(enumerate(loader)):
            
            images = images.to(device, dtype=torch.float32)
            labels = labels.to(device, dtype=torch.long)

            outputs = model(images)
            preds = outputs.detach().max(dim=1)[1].cpu().numpy()
            targets = labels.cpu().numpy()

            metrics.update(targets, preds)
            if ret_samples_ids is not None and i in ret_samples_ids:  # get vis samples
                ret_samples.append(
                    (images[0].detach().cpu().numpy(), targets[0], preds[0]))

            if opts.save_val_results:
                for i in range(len(images)):
                    image = images[i].detach().cpu().numpy()
                    target = targets[i]
                    pred = preds[i]

                    image = (denorm(image) * 255).transpose(1, 2, 0).astype(np.uint8)
                    target = loader.dataset.decode_target(target).astype(np.uint8)
                    pred = loader.dataset.decode_target(pred).astype(np.uint8)

                    Image.fromarray(image).save('results/%d_image.png' % img_id)
                    Image.fromarray(target).save('results/%d_target.png' % img_id)
                    Image.fromarray(pred).save('results/%d_pred.png' % img_id)

                    fig = plt.figure()
                    plt.imshow(image)
                    plt.axis('off')
                    plt.imshow(pred, alpha=0.7)
                    ax = plt.gca()
                    ax.xaxis.set_major_locator(matplotlib.ticker.NullLocator())
                    ax.yaxis.set_major_locator(matplotlib.ticker.NullLocator())
                    plt.savefig('results/%d_overlay.png' % img_id, bbox_inches='tight', pad_inches=0)
                    plt.close()
                    img_id += 1

        score = metrics.get_results()
    return score, ret_samples

Defining the encoding for the result of the network according to the pretrained model dataset

In [113]:
CityscapesClass = namedtuple('CityscapesClass', ['name', 'id', 'train_id', 'category', 'category_id',
                                                 'has_instances', 'ignore_in_eval', 'color'])
classes = [
    CityscapesClass('unlabeled',            0, 255, 'void', 0, False, True, (0, 0, 0)),
    CityscapesClass('ego vehicle',          1, 255, 'void', 0, False, True, (0, 0, 0)),
    CityscapesClass('rectification border', 2, 255, 'void', 0, False, True, (0, 0, 0)),
    CityscapesClass('out of roi',           3, 255, 'void', 0, False, True, (0, 0, 0)),
    CityscapesClass('static',               4, 255, 'void', 0, False, True, (0, 0, 0)),
    CityscapesClass('dynamic',              5, 255, 'void', 0, False, True, (111, 74, 0)),
    CityscapesClass('ground',               6, 255, 'void', 0, False, True, (81, 0, 81)),
    CityscapesClass('road',                 7, 0, 'flat', 1, False, False, (128, 64, 128)),
    CityscapesClass('sidewalk',             8, 1, 'flat', 1, False, False, (244, 35, 232)),
    CityscapesClass('parking',              9, 255, 'flat', 1, False, True, (250, 170, 160)),
    CityscapesClass('rail track',           10, 255, 'flat', 1, False, True, (230, 150, 140)),
    CityscapesClass('building',             11, 2, 'construction', 2, False, False, (70, 70, 70)),
    CityscapesClass('wall',                 12, 3, 'construction', 2, False, False, (102, 102, 156)),
    CityscapesClass('fence',                13, 4, 'construction', 2, False, False, (190, 153, 153)),
    CityscapesClass('guard rail',           14, 255, 'construction', 2, False, True, (180, 165, 180)),
    CityscapesClass('bridge',               15, 255, 'construction', 2, False, True, (150, 100, 100)),
    CityscapesClass('tunnel',               16, 255, 'construction', 2, False, True, (150, 120, 90)),
    CityscapesClass('pole',                 17, 5, 'object', 3, False, False, (153, 153, 153)),
    CityscapesClass('polegroup',            18, 255, 'object', 3, False, True, (153, 153, 153)),
    CityscapesClass('traffic light',        19, 6, 'object', 3, False, False, (250, 170, 30)),
    CityscapesClass('traffic sign',         20, 7, 'object', 3, False, False, (220, 220, 0)),
    CityscapesClass('vegetation',           21, 8, 'nature', 4, False, False, (107, 142, 35)),
    CityscapesClass('terrain',              22, 9, 'nature', 4, False, False, (152, 251, 152)),
    CityscapesClass('sky',                  23, 10, 'sky', 5, False, False, (70, 130, 180)),
    CityscapesClass('person',               24, 11, 'human', 6, True, False, (220, 20, 60)),
    CityscapesClass('rider',                25, 12, 'human', 6, True, False, (255, 0, 0)),
    CityscapesClass('car',                  26, 13, 'vehicle', 7, True, False, (0, 0, 142)),
    CityscapesClass('truck',                27, 14, 'vehicle', 7, True, False, (0, 0, 70)),
    CityscapesClass('bus',                  28, 15, 'vehicle', 7, True, False, (0, 60, 100)),
    CityscapesClass('caravan',              29, 255, 'vehicle', 7, True, True, (0, 0, 90)),
    CityscapesClass('trailer',              30, 255, 'vehicle', 7, True, True, (0, 0, 110)),
    CityscapesClass('train',                31, 16, 'vehicle', 7, True, False, (0, 80, 100)),
    CityscapesClass('motorcycle',           32, 17, 'vehicle', 7, True, False, (0, 0, 230)),
    CityscapesClass('bicycle',              33, 18, 'vehicle', 7, True, False, (119, 11, 32)),
    CityscapesClass('license plate',        -1, 255, 'vehicle', 7, False, True, (0, 0, 142)),
]

train_id_to_color = [c.color for c in classes if (c.train_id != -1 and c.train_id != 255)]
train_id_to_color.append([0, 0, 0])
train_id_to_color = np.array(train_id_to_color)
id_to_train_id = np.array([c.train_id for c in classes])


def decode_result(res):
    return train_id_to_color[res]

In [6]:
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device: %s" % device)

Device: cuda


In [7]:
# Setup random seed
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [10]:
!ls data/RGB

0000000.png  0000071.png  0000142.png  0000213.png  0000284.png  0000355.png
0000001.png  0000072.png  0000143.png  0000214.png  0000285.png  0000356.png
0000002.png  0000073.png  0000144.png  0000215.png  0000286.png  0000357.png
0000003.png  0000074.png  0000145.png  0000216.png  0000287.png  0000358.png
0000004.png  0000075.png  0000146.png  0000217.png  0000288.png  0000359.png
0000005.png  0000076.png  0000147.png  0000218.png  0000289.png  0000360.png
0000006.png  0000077.png  0000148.png  0000219.png  0000290.png  0000361.png
0000007.png  0000078.png  0000149.png  0000220.png  0000291.png  0000362.png
0000008.png  0000079.png  0000150.png  0000221.png  0000292.png  0000363.png
0000009.png  0000080.png  0000151.png  0000222.png  0000293.png  0000364.png
0000010.png  0000081.png  0000152.png  0000223.png  0000294.png  0000365.png
0000011.png  0000082.png  0000153.png  0000224.png  0000295.png  0000366.png
0000012.png  0000083.png  0000154.png  0000225.png  0000296.png 

In [48]:
# Define the model architecture
model = network.deeplabv3plus_mobilenet(num_classes=num_classes, output_stride=output_stride)

In [56]:
# Load pretrained model
checkpoint = torch.load("checkpoints/best_deeplabv3plus_mobilenet_cityscapes_os16.pth", map_location=torch.device('cpu'))
model.load_state_dict(checkpoint["model_state"])
model.to(device)

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (low_level_features): Sequential(
      (0): ConvBNReLU(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): InvertedResidual(
        (conv): Sequential(
          (0): ConvBNReLU(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU6(inplace=True)
          )
          (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (2): InvertedResidual(
        (conv): Sequential(
          (0): ConvBNReLU(
            (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        

In [57]:
model.eval()

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (low_level_features): Sequential(
      (0): ConvBNReLU(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): InvertedResidual(
        (conv): Sequential(
          (0): ConvBNReLU(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU6(inplace=True)
          )
          (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (2): InvertedResidual(
        (conv): Sequential(
          (0): ConvBNReLU(
            (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        

In [58]:
# Transform the Image to be the same as the training data
img_transform = transforms.Compose([
    transforms.Resize((crop_size-1, crop_size-1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

denorm = utils.Denormalize(mean=[0.485, 0.456, 0.406], 
                            std=[0.229, 0.224, 0.225])

In [127]:
img = Image.open("data/RGB/0000011.png").convert('RGB')
# img.show()
img = img_transform(img).unsqueeze(0)


In [128]:
img = img.to(device, dtype=torch.float32)

In [129]:
outputs = model(img)
preds = outputs.detach().max(dim=1)[1].cpu().numpy()

In [130]:
image = img[0].detach().cpu().numpy()
image = (denorm(image) * 255).transpose(1, 2, 0).astype(np.uint8)
Image.fromarray(image).show() 

In [131]:
pred = preds[0]
decoded_preds = decode_result(preds).astype(np.uint8)
Image.fromarray(decoded_preds[0]).show()

In [136]:
train_transform = et.ExtCompose([
    #et.ExtResize( 512 ),
    et.ExtRandomCrop(size=(crop_size, crop_size)),
    et.ExtColorJitter( brightness=0.5, contrast=0.5, saturation=0.5 ),
    et.ExtRandomHorizontalFlip(),
    et.ExtToTensor(),
    et.ExtNormalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]),
])

In [162]:
class Loader(data.Dataset):
    # Add the dictionary of colors to id 
    color_to_id =   {(0, 0, 0)    :0 ,
                     (128,64,128) :1 ,
                     (244,35,232) :2 ,
                     (70,70,70)   :3 ,
                     (102,102,156):4 ,
                     (153,153,153):5 ,
                     (250,170,30) :6 ,
                     (220,220,0)  :7 ,
                     (107,142,35) :8 ,
                     (152,251,152):9 ,
                     (70,130,180) :10,
                     (220,20,60)  :11,
                     (255,0,0)    :12,
                     (0,0,142)    :13,
                     (0,0,70)     :14,
                     (0,60,100)   :15,
                     (0,80,100)   :16,
                     (0,0,230)    :17,
                     (119,11,32)  :18,
                     (157.234,50) :1 ,
                     (72,0,98)    :0 ,
                     (167,106,29) :1
                    }
    def __init__(self, root, split='train', transform=None):
        self.root = os.path.expanduser(root)
        self.images_dir = os.path.join(self.root, 'data', split, 'RGB')
        self.targets_dir = os.path.join(self.root, 'data', split, 'GTDebug')
        self.transform = transform
        self.images = []
        self.targets = []
        
        if split not in ['train', 'test', 'val']:
            raise ValueError('Invalid split for mode! Please use split="train", split="test"'
                             ' or split="val"')
        for file_name in os.listdir(self.images_dir):
            self.images.append(os.path.join(self.images_dir,file_name))
            self.targets.append(os.path.join(self.targets_dir,file_name))

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is a tuple of all target types if target_type is a list with more
            than one item. Otherwise target is a json object if target_type="polygon", else the image segmentation.
        """
        image = Image.open(self.images[index]).convert('RGB')
        target = Image.open(self.targets[index])
        if self.transform:
            image, target = self.transform(image, target)
        #target = self.encode_target(target)
        return image, target
    
    def __len__(self):
        return len(self.images)
    
    @classmethod
    def encode_target(cls, target):
        pass


In [153]:
loader = Loader("", transform=train_transform)

In [154]:
x = loader[1]

In [157]:
target = x [1]

In [182]:
np_target = target.detach().cpu().numpy()
arr_size = np_target.shape[:2]

In [186]:
new_target = np.zeros((arr_size))
for i in range(arr_size[0]):
    for j in range(arr_size[1]):
        new_target[i][j] = color_to_id[tuple(np_target[i][j][:3])]


In [188]:
color_to_id =   {(0, 0, 0)    :0 ,
                 (128,64,128) :0 ,
                 (244,35,232) :1 ,
                 (70,70,70)   :2 ,
                 (102,102,156):3 ,
                 (190,153,153):4 ,
                 (153,153,153):5 ,
                 (250,170,30) :6 ,
                 (220,220,0)  :7 ,
                 (107,142,35) :8 ,
                 (152,251,152):9 ,
                 (70,130,180) :10,
                 (220,20,60)  :11,
                 (255,0,0)    :12,
                 (0,0,142)    :13,
                 (0,0,70)     :14,
                 (0,60,100)   :15,
                 (0,80,100)   :16,
                 (0,0,230)    :17,
                 (119,11,32)  :18,
                 (157,234,50) :1 ,
                 (72,0,98)    :0 ,
                 (167,106,29) :1 ,
                 (72,0,97)    :0
                }

SyntaxError: invalid syntax (<ipython-input-188-b2e6c8cbb3e7>, line 7)

In [187]:
print(new_target)

[[0. 0. 0. ... 3. 3. 3.]
 [0. 0. 0. ... 3. 3. 3.]
 [0. 0. 0. ... 3. 3. 3.]
 ...
 [2. 2. 2. ... 1. 1. 1.]
 [2. 2. 2. ... 1. 1. 1.]
 [2. 2. 2. ... 1. 1. 1.]]
