In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Mar  9 13:55:26 2021

@author: remco
"""

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class CNN(nn.Module):
    def __init__(self,input_channels,output_channels):
        super(CNN, self).__init__()
        
        self.input_channels = input_channels
        self.output_channels = output_channels
        
        self.VGG16 = models.vgg16(pretrained=True)
        
        
        ###Encoder
        #First layer
        self.encoder_conv_11    = nn.Conv2d(input_channels,64,kernel_size=3,padding=1)
        self.encoder_bn_11      = nn.BatchNorm2d(64)
        self.encoder_conv_12    = nn.Conv2d(64,64,kernel_size=3,padding=1)
        self.encoder_bn_12      = nn.BatchNorm2d(64)
        
        #Second layer
        self.encoder_conv_21    = nn.Conv2d(64,128,kernel_size=3,padding=1)
        self.encoder_bn_21      = nn.BatchNorm2d(128)
        self.encoder_conv_22    = nn.Conv2d(128,128,kernel_size=3,padding=1)
        self.encoder_bn_22      = nn.BatchNorm2d(128) 
        
        #Third layer
        self.encoder_conv_31     = nn.Conv2d(128,256,kernel_size=3,padding=1)
        self.encoder_bn_31      = nn.BatchNorm2d(256)
        self.encoder_conv_32    = nn.Conv2d(256,256,kernel_size=3,padding=1)
        self.encoder_bn_32      = nn.BatchNorm2d(256) 
        self.encoder_conv_33    = nn.Conv2d(256,256,kernel_size=3,padding=1)
        self.encoder_bn_33      = nn.BatchNorm2d(256) 
        
        #Fourth layer
        self.encoder_conv_41    = nn.Conv2d(256,512,kernel_size=3,padding=1)
        self.encoder_bn_41      = nn.BatchNorm2d(512)
        self.encoder_conv_42    = nn.Conv2d(512,5126,kernel_size=3,padding=1)
        self.encoder_bn_42      = nn.BatchNorm2d(512) 
        self.encoder_conv_43    = nn.Conv2d(512,512,kernel_size=3,padding=1)
        self.encoder_bn_43      = nn.BatchNorm2d(512) 
        
        #Fift layer
        self.encoder_conv_51    = nn.Conv2d(512,512,kernel_size=3,padding=1)
        self.encoder_bn_51      = nn.BatchNorm2d(512)
        self.encoder_conv_52    = nn.Conv2d(512,5126,kernel_size=3,padding=1)
        self.encoder_bn_52      = nn.BatchNorm2d(512) 
        self.encoder_conv_53    = nn.Conv2d(512,512,kernel_size=3,padding=1)
        self.encoder_bn_53      = nn.BatchNorm2d(512) 
        
        self.set_encoder_params()
        
        ###Decoder
        #Fift Layer
        self.decoder_conv_53  = nn.ConvTranspose2d(512,512,kernel_size=3,padding=1)
        self.decoder_bn_53      = nn.BatchNorm2d(512)
        self.decoder_conv_52  = nn.ConvTranspose2d(512,512,kernel_size=3,padding=1)
        self.decoder_bn_52      = nn.BatchNorm2d(512)
        self.decoder_conv_51  = nn.ConvTranspose2d(512,512,kernel_size=3,padding=1)
        self.decoder_bn_51      = nn.BatchNorm2d(512)
        
        #Fourd Layer
        self.decoder_conv_43  = nn.ConvTranspose2d(512,512,kernel_size=3,padding=1)
        self.decoder_bn_43      = nn.BatchNorm2d(512)
        self.decoder_conv_42  = nn.ConvTranspose2d(512,512,kernel_size=3,padding=1)
        self.decoder_bn_42      = nn.BatchNorm2d(512)
        self.decoder_conv_41  = nn.ConvTranspose2d(in_channels=512,out_channels=256,kernel_size=3,padding=1)
        self.decoder_bn_41      = nn.BatchNorm2d(256)
        
        #Third Layer
        self.decoder_conv_33  = nn.ConvTranspose2d(256,256,kernel_size=3,padding=1)
        self.decoder_bn_33      = nn.BatchNorm2d(256)
        self.decoder_conv_32  = nn.ConvTranspose2d(256,256,kernel_size=3,padding=1)
        self.decoder_bn_32      = nn.BatchNorm2d(256)
        self.decoder_conv_31  = nn.ConvTranspose2d(256,128,kernel_size=3,padding=1)
        self.decoder_bn_31      = nn.BatchNorm2d(128)
        
        #Second Layer
        self.decoder_conv_22  = nn.ConvTranspose2d(128,128,kernel_size=3,padding=1)
        self.decoder_bn_22      = nn.BatchNorm2d(128)
        self.decoder_conv_21  = nn.ConvTranspose2d(128,64,kernel_size=3,padding=1)
        self.decoder_bn_21      = nn.BatchNorm2d(64)
        
        #First Layer
        self.decoder_conv_12  = nn.ConvTranspose2d(64,64,kernel_size=3,padding=1)
        self.decoder_bn_12      = nn.BatchNorm2d(64)
        self.decoder_conv_11  = nn.ConvTranspose2d(64,output_channels,kernel_size=3,padding=1)
        
    def set_encoder_params(self):
        #First layer
        self.encoder_conv_11.weight.data = self.VGG16.features[0].weight.data
        self.encoder_conv_11.bias.data = self.VGG16.features[0].bias.data
        
        self.encoder_conv_12.weight.data = self.VGG16.features[2].weight.data
        self.encoder_conv_12.bias.data = self.VGG16.features[2].bias.data
        
        #Second Layer
        self.encoder_conv_21.weight.data = self.VGG16.features[5].weight.data
        self.encoder_conv_21.bias.data = self.VGG16.features[5].bias.data
        
        self.encoder_conv_22.weight.data = self.VGG16.features[7].weight.data
        self.encoder_conv_22.bias.data = self.VGG16.features[7].bias.data
                
        #Third layer
        self.encoder_conv_31.weight.data = self.VGG16.features[10].weight.data
        self.encoder_conv_31.bias.data = self.VGG16.features[10].bias.data
        
        self.encoder_conv_32.weight.data = self.VGG16.features[12].weight.data
        self.encoder_conv_32.bias.data = self.VGG16.features[12].bias.data
        
        self.encoder_conv_33.weight.data = self.VGG16.features[14].weight.data
        self.encoder_conv_33.bias.data = self.VGG16.features[14].bias.data
        
        #Fourth Layer
        self.encoder_conv_41.weight.data = self.VGG16.features[17].weight.data
        self.encoder_conv_41.bias.data = self.VGG16.features[17].bias.data
        
        self.encoder_conv_42.weight.data = self.VGG16.features[19].weight.data
        self.encoder_conv_42.bias.data = self.VGG16.features[19].bias.data
        
        self.encoder_conv_43.weight.data = self.VGG16.features[21].weight.data
        self.encoder_conv_43.bias.data = self.VGG16.features[21].bias.data
        
        #Fift layer
        self.encoder_conv_51.weight.data = self.VGG16.features[24].weight.data
        self.encoder_conv_51.bias.data = self.VGG16.features[24].bias.data
        
        self.encoder_conv_52.weight.data = self.VGG16.features[26].weight.data
        self.encoder_conv_52.bias.data = self.VGG16.features[26].bias.data
        
        self.encoder_conv_53.weight.data = self.VGG16.features[28].weight.data
        self.encoder_conv_53.bias.data = self.VGG16.features[28].bias.data
        
    def forward(self,input_image):
        #Encoder
        #First Layer
        size_1 = input_image.size()
        
        x = F.relu(self.encoder_bn_11(self.encoder_conv_11(input_image)))
        x = F.relu(self.encoder_bn_12(self.encoder_conv_12(x)))
        x, idx1 = F.max_pool2d(x,kernel_size=2,stride=2,return_indices=True)
        
        #Second Layer
        size_2 = x.size()
        x = F.relu(self.encoder_bn_21(self.encoder_conv_21(x)))
        x = F.relu(self.encoder_bn_22(self.encoder_conv_22(x)))
        x, idx2 = F.max_pool2d(x,kernel_size=2,stride=2,return_indices=True)
        
        #Third Layer
        size_3 = x.size()
        x = F.relu(self.encoder_bn_31(self.encoder_conv_31(x)))
        x = F.relu(self.encoder_bn_32(self.encoder_conv_32(x)))
        x = F.relu(self.encoder_bn_33(self.encoder_conv_33(x)))
        x, idx3 = F.max_pool2d(x,kernel_size=2,stride=2,return_indices=True)
        
        #Fourth Layer
        size_4 = x.size()
        x = F.relu(self.encoder_bn_41(self.encoder_conv_41(x)))
        x = F.relu(self.encoder_bn_42(self.encoder_conv_42(x)))
        x = F.relu(self.encoder_bn_43(self.encoder_conv_43(x)))
        x, idx4 = F.max_pool2d(x,kernel_size=2,stride=2,return_indices=True)
        
        #Fifth layer
        size_5 = x.size()
        x = F.relu(self.encoder_bn_51(self.encoder_conv_51(x)))
        x = F.relu(self.encoder_bn_52(self.encoder_conv_52(x)))
        x = F.relu(self.encoder_bn_53(self.encoder_conv_53(x)))
        x, idx5 = F.max_pool2d(x,kernel_size=2,stride=2,return_indices=True)
        
        #Decoder
        #Fifth Layer
        x = F.max_unpool2d(x, idx5, kernel_size=2,stride=2,output_size= size_5)
        x = F.relu(self.decoder_bn_53(self.decoder_conv_53(x)))
        x = F.relu(self.decoder_bn_52(self.decoder_conv_52(x)))
        x = F.relu(self.decoder_bn_51(self.decoder_conv_51(x)))
        
        #Fourth Layer
        x = F.max_unpool2d(x, idx4, kernel_size=2,stride=2,output_size= size_4)
        x = F.relu(self.decoder_bn_43(self.decoder_conv_43(x)))
        x = F.relu(self.decoder_bn_42(self.decoder_conv_42(x)))
        x = F.relu(self.decoder_bn_41(self.decoder_conv_41(x)))
        
        
        #Third Layer
        x = F.max_unpool2d(x, idx3, kernel_size=2,stride=2,output_size= size_3)
        x = F.relu(self.decoder_bn_33(self.decoder_conv_33(x)))
        x = F.relu(self.decoder_bn_32(self.decoder_conv_32(x)))
        x = F.relu(self.decoder_bn_31(self.decoder_conv_31(x)))
                   
        #Second Layer
        x = F.max_unpool2d(x, idx2, kernel_size=2,stride=2,output_size= size_2)
        x = F.relu(self.decoder_bn_22(self.decoder_conv_22(x)))
        x = F.relu(self.decoder_bn_21(self.decoder_conv_21(x)))
        
        #First Layer
        x = F.max_unpool2d(x, idx1, kernel_size=2,stride=2,output_size= size_1)
        x = F.relu(self.decoder_bn_12(self.decoder_conv_12(x)))
        x = self.decoder_conv_11(x)
        x_softmax = F.softmax(x,dim=1)
        return x, x_softmax




In [None]:
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 24 10:58:21 2021

@author: remco
"""
import json
import os
from collections import namedtuple
from typing import Any, Callable, Dict, List, Optional, Union, Tuple
import torchvision.transforms as transforms
import torchvision
from torchvision.datasets.vision import VisionDataset
from PIL import Image
import matplotlib.pyplot as plt
import torch
import numpy as np

class CamVid(VisionDataset):
    """`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset.
    Args:
        root (string): Root directory of dataset where directory ``leftImg8bit``
            and ``gtFine`` or ``gtCoarse`` are located.
        split (string, optional): The image split to use, ``train``, ``test`` or ``val`` if mode="fine"
            otherwise ``train``, ``train_extra`` or ``val``
        mode (string, optional): The quality mode to use, ``fine`` or ``coarse``
        target_type (string or list, optional): Type of target to use, ``instance``, ``semantic``, ``polygon``
            or ``color``. Can also be a list to output a tuple with all specified target types.
        transform (callable, optional): A function/transform that takes in a PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        transforms (callable, optional): A function/transform that takes input sample and its target as entry
            and returns a transformed version.
    Examples:
        Get semantic segmentation target
        .. code-block:: python
            dataset = Cityscapes('./data/cityscapes', split='train', mode='fine',
                                 target_type='semantic')
            img, smnt = dataset[0]
        Get multiple targets
        .. code-block:: python
            dataset = Cityscapes('./data/cityscapes', split='train', mode='fine',
                                 target_type=['instance', 'color', 'polygon'])
            img, (inst, col, poly) = dataset[0]
        Validate on the "coarse" set
        .. code-block:: python
            dataset = Cityscapes('./data/cityscapes', split='val', mode='coarse',
                                 target_type='semantic')
            img, smnt = dataset[0]
    """

    # Based on https://github.com/mcordts/cityscapesScripts
    CamVidClass = namedtuple('Class', ['name', 'id', 'train_id', 'category', 'category_id',
                                                     'has_instances', 'ignore_in_eval', 'color'])

    classes = [
        CamVidClass('Void', 0, 255, 'void', 0, False, True, (0, 0, 0)),
        
        CamVidClass('Archway', 2, 255, 'void', 0, True, True, (192, 0, 128)),
        CamVidClass('Animal', 1, 255, 'void', 0, True, True, (64, 128, 64)),
        CamVidClass('Bicyclist', 3, 0, 'Bicyclist', 1, True, False, (0, 128, 192)),  #
        CamVidClass('Bridge', 4, 255, 'void', 0, True, True, (0, 128, 64)),
        CamVidClass('Building', 5, 1, 'Building', 2, True, False, (128,0,0)), #
        CamVidClass('Car', 6, 2, 'Car', 3, True, False, (64,0,128)), #
        CamVidClass('CartLuggagePram', 7, 255, 'void', 0, False, True, (64,0,192)),
        CamVidClass('Child', 8, 255, 'void', 0, False, True, (192,128,64)),
        CamVidClass('Column_pole', 9, 3, 'Pole', 4, True, False, (192,192,128)), #
        CamVidClass('Fence', 10, 4, 'Fence', 5, True, False, (64,64,128)), #
        CamVidClass('LaneMkgsDriv', 11, 255, 'void', 0, True, True, (128,0,192)),
        CamVidClass('LaneMkgsNonDriv', 12, 255, 'void', 0, True, True, (192,0,64)),
        CamVidClass('Misc_Text', 13, 255, 'void', 0, False, True, (128,128,64)),
        CamVidClass('MotorcycleScooter', 14, 255, 'void', 0, True, True, (192,0,192)),
        CamVidClass('OtherMoving', 15, 255, 'void', 0, True, True, (128,64,64)),
        CamVidClass('ParkingBlock', 16, 255, 'void', 0, True, True, (64,192,128)),
        CamVidClass('Pedestrian', 17, 5, 'Pedestrian', 6, True, False,  (64,64,0)), #
        CamVidClass('Road', 18, 6, 'Road', 7, True, False, (128,64,128)), #
        CamVidClass('RoadShoulder', 19, 255, 'void', 0, False, True, (128,128,192)),
        CamVidClass('Sidewalk', 20, 7, 'Sidewalk', 8, True, False, (0,0,192)), #
        CamVidClass('SignSymbol', 21, 8, 'SignSymbol', 9, True, False, (192,128,128)), #
        CamVidClass('Sky', 22, 9, 'Sky', 10, True, False, (128,128,128)), #
        CamVidClass('SUVPickupTruck', 23, 255, 'void', 0, False, True, (64,128,192)),
        CamVidClass('TrafficCone', 24, 255, 'void', 0, True, True, (0,0,64)),
        CamVidClass('TrafficLight', 25, 255, 'void', 0, True, True, (0,64,64)),
        CamVidClass('Train', 26, 255, 'void', 0, True, True, (192,64,128)),
        CamVidClass('Tree', 27, 10, 'Tree', 11, True, False, (128,128,0)), #
        CamVidClass('Truck_Bus', 28, 255, 'void', 0, True, True, (192,128,192)),
        CamVidClass('Tunnel', 29, 255, 'void', 0, True, True, (64,0,64)),
        CamVidClass('VegetationMisc', 30, 255, 'void', 0, True, True, (192,192,0)),
        CamVidClass('Wall', 31, 255, 'void', 0, True, True, (64,192,0)),
        
    ]

    def __init__(
            self,
            root: str,
            split: str = "train",
            target_type: Union[List[str], str] = "instance",
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None,
            transforms: Optional[Callable] = None,
    ) -> None:
        super(CamVid, self).__init__(root, transforms, transform, target_transform)
        self.images_dir = os.path.join(self.root, split, 'images')
        self.targets_dir = os.path.join(self.root, split, 'labels')
        self.target_type = target_type
        self.split = split
        self.images = []
        self.targets = []
        self.colours = []
        self.colour_names = []
        self.only_colours = []
        self.correct_classes = 0
        self.ignore_classes= 0
        
        for i in range(len(self.classes)):
            if self.classes[i][2] != 255:
                self.colours.append((self.classes[i][0],self.classes[i][7]))
                self.only_colours.append(self.classes[i][7])
                self.correct_classes += 1
                self.colour_names.append(self.classes[i][0])

        for i in range(len(self.classes)):
            if self.classes[i][2] == 255:
                self.colours.append((self.classes[i][0],self.classes[i][7]))
        
        for file_name in os.listdir(self.images_dir):
                #target_types = []
                #for t in self.target_type:
                target_name = (os.path.splitext(file_name)[0])
                new_name ="_".join([target_name,"L.png"])
                target_types = (os.path.join(self.targets_dir, new_name))
                    
                #print(target_types)

                self.images.append(os.path.join(self.images_dir, file_name))
                self.targets.append(target_types)

        self.NUM_CLASSES = len(self.classes)
        #only in training set
        if split == "train":
           self.counts = self.__compute_class_probability()    
        if split == "train_small":
           self.counts = self.__compute_class_probability()     


    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is a tuple of all target types if target_type is a list with more
            than one item. Otherwise target is a json object if target_type="polygon", else the image segmentation.
        """

        image = Image.open(self.images[index]).convert('RGB')

        targets: Any = []
        target = Image.open(self.targets[index])

        targets.append(target)

        target = tuple(targets) if len(targets) > 1 else targets[0]

        if self.transforms is not None:
            image, target = self.transforms(image, target)
        
            
        target = self.make_mask(target)
        image = torchvision.transforms.functional.to_tensor(image)
        target = torchvision.transforms.functional.to_tensor(target)
        
        image = self.LocalContrastNorm(image,9) #if rgb image is used
        #image = Transform_Maddern(image,HS=True)  #Maddern (if HS=True: Hue Saturated)
        
        return image, target

    def __len__(self) -> int:
        return len(self.images)

    def extra_repr(self) -> str:
        lines = ["Split: {split}", "Mode: {mode}", "Type: {target_type}"]
        return '\n'.join(lines).format(**self.__dict__)


    def _get_target_suffix(self, target_type: str) -> str:
        if target_type == 'instance':
            return '{}_instanceIds.png'
        elif target_type == 'semantic':
            return '{}_labelIds.png'
        elif target_type == 'color':
            return '{}_color.png'
        else:
            return '{}_polygons.json'

   
    def LocalContrastNorm(self, image, radius=9):
      """
      image: torch.Tensor , .shape => (1,channels,height,width)

        radius: Gaussian filter size (int), odd
      """
      if radius % 2 == 0:
          radius += 1

      def get_gaussian_filter(kernel_shape):
          x = np.zeros(kernel_shape, dtype='float64')

          def gauss(x, y, sigma=2.0):
              Z = 2 * np.pi * sigma ** 2
              return 1. / Z * np.exp(-(x * 2 + y * 2) / (2. * sigma ** 2))

          mid = np.floor(kernel_shape[-1] / 2.)
          for kernel_idx in range(0, kernel_shape[1]):
              for i in range(0, kernel_shape[2]):
                  for j in range(0, kernel_shape[3]):
                      x[0, kernel_idx, i, j] = gauss(i - mid, j - mid)

          return x / np.sum(x)
      
      n, c, h, w = 1, image.shape[0], image.shape[1], image.shape[2]
      image_correct = torch.Tensor(n,c,h,w)
      image_correct[0,:,:,:] = image
      gaussian_filter = torch.Tensor(get_gaussian_filter((1, c, radius, radius)))
      filtered_out = torch.nn.functional.conv2d(image_correct, gaussian_filter, padding=radius - 1)
      mid = int(np.floor(gaussian_filter.shape[2] / 2.))
      ### Subtractive Normalization
      centered_image = image_correct - filtered_out[:, :, mid:-mid, mid:-mid]

      ## Variance Calc
      sum_sqr_image = torch.nn.functional.conv2d(centered_image.pow(2), gaussian_filter, padding=radius - 1)
      s_deviation = sum_sqr_image[:, :, mid:-mid, mid:-mid].sqrt()
      per_img_mean = s_deviation.mean()

      ## Divisive Normalization
      divisor = np.maximum(per_img_mean.numpy(), s_deviation.numpy())
      divisor = np.maximum(divisor, 1e-4)
      new_image = centered_image / torch.Tensor(divisor)
      return new_image[0,:,:,:]
    
    def __compute_class_probability(self):
        counts = dict((i, 0) for i in range(self.correct_classes+1))
        for name in self.images:
            first_partname = name.split('.')[0]
            name_2 = first_partname.split( '/' )[-1]
            mask_path = os.path.join(self.targets_dir, "_".join([name_2,"L.png"]))
            raw_image = Image.open(mask_path).resize((360, 480))
            #raw_image = torchvision.transforms.functional.to_tensor(raw_image)
            masked = self.make_mask(raw_image).transpose(2,0,1)
            masked = torchvision.transforms.functional.to_tensor(masked)
            masked = torch.max(masked,1)[1]
            #print(masked.shape)
            imx_t = np.array(masked).reshape(360* 480)
            imx_t[imx_t==255] = len(self.classes)

            for i in range(self.correct_classes+1):
                counts[i] += np.sum(imx_t == i)
        #counts[counts== np.inf] = 0
        return counts

    def get_class_probability(self):
        values = np.array(list(self.counts.values()))
        p_values = values/np.sum(values)
        p_values[p_values== -np.inf] = 0
        return torch.Tensor(p_values)
        
    def make_mask(self,mask):
          semantic_map = []
          for colour in self.colours:  
            equality = np.equal(mask, colour[1])
            class_map = np.all(equality, axis = -1)
            semantic_map.append(class_map)
          semantic_map = np.stack(semantic_map, axis=-1)
        
          return semantic_map


In [None]:
def Transform_Maddern(image, a=0.48, HS=False):
    I_maddern_first = 0.5* torch.ones(image[0].shape)
    I_maddern_first[image[1]>0] += torch.log(image[1][image[1]>0])
    I_maddern_first[image[2]>0] += - a * torch.log(image[2][image[2]>0])
    I_maddern_first[image[0]>0] += - (1 - a) * torch.log(image[0][image[0]>0])

    if HS:
        hue, sat, _ = RGB2HSV(image)
        I_maddern = torch.dstack((I_maddern_first, hue, sat))
        I_maddern = I_maddern.permute(2,0,1)
    else:
      I_maddern = torch.ones(3,image[0].shape[0],image[0].shape[1])
      I_maddern[0,:,:] = I_maddern_first
      I_maddern[1,:,:] = torch.zeros(image[0].shape)
      I_maddern[2,:,:] = torch.zeros(image[0].shape)


    return I_maddern

def RGB2HSV(image):
    max = torch.max(image, dim=0)
    min = torch.min(image, dim=0)
    range = max.values-min.values

    hue, sat = torch.zeros(image[0].shape), torch.zeros(image[0].shape)

    idx = (max.indices == 0) & (range != 0)
    hue[idx] = (image[1][idx] - image[2][idx])/(range[idx]) % 6
    idx = (max.indices == 1) & (range != 0)
    hue[idx] = 2 + (image[2][idx] - image[0][idx])/(range[idx])
    idx = (max.indices == 2) & (range != 0)
    hue[idx] = 4 + (image[0][idx] - image[1][idx])/(range[idx])
    # If range = 0, hue = 0
    #heb niet het idee dat heb probleem
    idx = max.values != 0
    sat[idx] = range[idx]/max.values[idx]
    # if max = 0, sat = 0

    return 60*hue, sat, max.values

In [None]:
import torchvision.transforms as transforms
import torchvision
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import torch
import os
import time 
from google.colab import drive

read_path = '/content/drive/MyDrive/ColabNotebooks/ProjectDL/CamVidData_2'

batch_size = 12
num_epoch = 150

USE_GPU = True

GPU_ID = 0



transform = transforms.Compose(
    [transforms.Resize((360,480)), 
     ])

target_transform = transforms.Compose(
    [transforms.Resize((360,480)),
     ])

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 24 15:46:21 2021

@author: remco
"""

        
camvid_train = CamVid(read_path,'train',transform=transform,target_transform=target_transform)
camvid_val = CamVid(read_path,'val',transform=transform,target_transform=target_transform)



dataloader_train = DataLoader(camvid_train, batch_size=batch_size, shuffle=True)
dataloader_val = DataLoader(camvid_val, batch_size=batch_size, shuffle=True)

class_weights = 1.0/camvid_train.get_class_probability()
n_classes = len(camvid_train.colours)

if USE_GPU == True:
  model = CNN(3,camvid_train.correct_classes+1).cuda(GPU_ID)
  criterion = nn.CrossEntropyLoss(weight=class_weights,ignore_index=12).cuda(GPU_ID)
else:
  model = CNN(3,camvid_train.correct_classes+1)
  criterion = nn.CrossEntropyLoss(weight=class_weights,ignore_index=12)



optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5*10**(-4))


prev_loss = float('inf')


for epoch in range(num_epoch):
    model.train()
    t_start = time.time()
    loss_val = 0
    for i, img_num in enumerate(dataloader_train, 0):
      images,labels = img_num
      if USE_GPU == True:
          images = images.cuda(GPU_ID)
          labels = labels.cuda(GPU_ID)
      #loss = 0.0
      optimizer.zero_grad()

      predicted, softmaxed = model(images)
      labels = torch.clip(torch.max(labels,1)[1],0,camvid_train.correct_classes+1)
      loss = criterion(predicted,labels)
      loss.backward()
      optimizer.step() 
      torch.cuda.empty_cache()
      torch.cuda.memory_summary(device=None, abbreviated=False)

    with torch.no_grad():
      model.eval()  
      for i, img_num in enumerate(dataloader_val,0):
        images ,labels = img_num
        if USE_GPU == True:
            images = images.cuda(GPU_ID)
            labels = labels.cuda(GPU_ID)
        predicted, softmaxed = model(images)
        labels = torch.clip(torch.max(labels,1)[1],0,camvid_train.correct_classes+1) #to make all the classes above the correct classes be ignored with the ignore index
        los_enumerate = criterion(predicted,labels)
        loss_val += los_enumerate.item()
        torch.cuda.empty_cache()

        
    delta = time.time() - t_start
    print("Epoch #{}\tvalidationLoss: {:.8f}\t Time: {:2f}s".format(epoch+1, loss_val
                                                          , delta))

    if loss_val < prev_loss:
      print('#', epoch+1 , " is currently the best epoch on evaluation data")
      prev_loss = loss_val
      model_save_name = 'classifier_maddern_test.pt'
      path = F"/content/drive/MyDrive/ColabNotebooks/ProjectDL/CamVidData_2/{model_save_name}" 
      torch.save(model.state_dict(), path)
    
print("finished")

tensor([   7.3657,    4.4705,   31.8390,  884.2386,   75.8206,  284.2722,
           3.7967,   16.4691, 1546.2291,    7.0343,   10.0539,   40.1144])
Epoch #1	validationLoss: 23.82055593	 Time: 445.504700s
# 1  is currently the best epoch on evaluation data


KeyboardInterrupt: ignored

In [None]:
from sklearn.metrics import confusion_matrix
import itertools

camvid_test = CamVid(read_path,'test',transform=transform,target_transform=target_transform)
dataloader_test = DataLoader(camvid_test, batch_size=batch_size, shuffle=True)

confmat = np.zeros((camvid_test.correct_classes, camvid_test.correct_classes))

only_test = True

if only_test:
  print("loading model")
  model = CNN(3,camvid_test.correct_classes+1).cuda(GPU_ID)
  model_save_name = 'classifier_maddern_normal.pt'
  path = F"/content/drive/MyDrive/ColabNotebooks/ProjectDL/CamVidData_2/{model_save_name}" 
  model.load_state_dict(torch.load(path))

for i, data in enumerate(dataloader_test, 0):
    with torch.no_grad():
      inputs, labels = data
      inputs = inputs.cuda(GPU_ID)
      _, softmaxed = model(inputs)
      
      pred_int = torch.argmax(softmaxed,dim=1)
      true_int = torch.clip(torch.max(labels,1)[1],0,camvid_test.correct_classes+1)

      conf_new = confusion_matrix(true_int.cpu().data.numpy().flatten(), pred_int.cpu().data.numpy().flatten(),np.arange(11)+1)
      confmat += conf_new

confmat_chance = confmat / confmat.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(12,9))
plt.imshow(confmat_chance, interpolation='nearest', cmap=plt.cm.Blues)
colour_names = []
for i in range(11):
  colour_names.append(camvid_test.colours[i][0])

plt.xticks(np.arange(11),colour_names, rotation=90)
plt.yticks(np.arange(11),colour_names)

for i, j in itertools.product(range(confmat_chance.shape[0]), range(confmat_chance.shape[1])):
          plt.text(j, i, "{:0.2f}".format(confmat_chance[i, j]),
                     horizontalalignment="center",
                     color="white" if confmat_chance[i, j] > 0.5 else "black")

plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.colorbar()
plt.show()

class_accuracy = np.diagonal(confmat_chance)
class_average = np.mean(class_accuracy)
global_accuracy = np.trace(confmat)/np.sum(confmat)

sum_pred = np.sum(confmat, axis=0)
sum_true = np.sum(confmat, axis=1)
true_pos = np.diagonal(confmat)

precision = np.mean(true_pos /sum_pred)
recall = np.mean(true_pos / sum_true)

mIoU = np.mean(true_pos/(sum_pred + sum_true - true_pos))

print("Class accuracy: ")
for i in range(11):
  print(camvid_test.colours[i][0], " : ", class_accuracy[i])

print("Class average accuracy: ", class_average)
print("global accuracy: ", global_accuracy)
print("precision: ", precision)
print("recall: ", recall)
print("mIoU: ", mIoU)


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)


from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

In [None]:
from google.colab import drive 
drive.mount('/content/drive')