In [1]:
#Mountaining the drive for dataset
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
cd "/content/drive/MyDrive/NN_Project"

/content/drive/MyDrive/NN_Project


In [3]:
"""
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
!tar -xvf VOCtrainval_11-May-2012.tar
"""

'\n!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar\n!tar -xvf VOCtrainval_11-May-2012.tar\n'

In [4]:
!pip install scipy==1.1.0

Collecting scipy==1.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/40/de/0c22c6754370ba6b1fa8e53bd6e514d4a41a181125d405a501c215cbdbd6/scipy-1.1.0-cp37-cp37m-manylinux1_x86_64.whl (31.2MB)
[K     |████████████████████████████████| 31.2MB 103kB/s 
[31mERROR: plotnine 0.6.0 has requirement scipy>=1.2.0, but you'll have scipy 1.1.0 which is incompatible.[0m
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m
Installing collected packages: scipy
  Found existing installation: scipy 1.4.1
    Uninstalling scipy-1.4.1:
      Successfully uninstalled scipy-1.4.1
Successfully installed scipy-1.1.0


In [5]:
import os
from os.path import join as pjoin
import collections
import json
import torch
import imageio
import numpy as np
import scipy.misc as m
import scipy.io as io
import matplotlib.pyplot as plt
import glob
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
from torch.utils import data
from torchvision import transforms
import torchvision
import torch
import torch.nn.functional as F
from sklearn import metrics
import torch.nn.functional as fun
import sklearn.metrics as metric
from matplotlib import colors


class pascalVOCDataset(data.Dataset):
    """Data loader for the Pascal VOC semantic segmentation dataset.

    Annotations from both the original VOC data (which consist of RGB images
    in which colours map to specific classes) and the SBD (Berkely) dataset
    (where annotations are stored as .mat files) are converted into a common
    `label_mask` format.  Under this format, each mask is an (M,N) array of
    integer values from 0 to 21, where 0 represents the background class.

    The label masks are stored in a new folder, called `pre_encoded`, which
    is added as a subdirectory of the `SegmentationClass` folder in the
    original Pascal VOC data layout.

    A total of five data splits are provided for working with the VOC data:
        train: The original VOC 2012 training data - 1464 images
        val: The original VOC 2012 validation data - 1449 images
        trainval: The combination of `train` and `val` - 2913 images
        train_aug: The unique images present in both the train split and
                   training images from SBD: - 8829 images (the unique members
                   of the result of combining lists of length 1464 and 8498)
        train_aug_val: The original VOC 2012 validation data minus the images
                   present in `train_aug` (This is done with the same logic as
                   the validation set used in FCN PAMI paper, but with VOC 2012
                   rather than VOC 2011) - 904 images
    """

    def __init__(
        self,
        root,
        sbd_path=None,
        split="train_aug",
        is_transform=False,
        img_size=512,
        augmentations=None,
        img_norm=True,
        test_mode=False,
    ):
        self.root = root
        self.sbd_path = sbd_path
        self.split = split
        self.is_transform = is_transform
        self.augmentations = augmentations
        self.img_norm = img_norm
        self.test_mode = test_mode
        self.n_classes = 21
        self.mean = np.array([104.00699, 116.66877, 122.67892])
        self.files = collections.defaultdict(list)
        self.img_size = img_size if isinstance(img_size, tuple) else (img_size, img_size)

        if not self.test_mode:
            for split in ["train", "val", "trainval"]:
                path = pjoin(self.root, "ImageSets/Segmentation", split + ".txt")
                file_list = tuple(open(path, "r"))
                file_list = [id_.rstrip() for id_ in file_list]
                self.files[split] = file_list
            self.setup_annotations()

        self.tf = transforms.Compose(
            [
                # add more trasnformations as you see fit
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        )

    def __len__(self):
        return len(self.files[self.split])

    def __getitem__(self, index):
        im_name = self.files[self.split][index]
        im_path = pjoin(self.root, "JPEGImages", im_name + ".jpg")
        lbl_path = pjoin(self.root, "SegmentationClass/pre_encoded", im_name + ".png")
        im = Image.open(im_path)
        lbl = Image.open(lbl_path)
        if self.augmentations is not None:
            im, lbl = self.augmentations(im, lbl)
        if self.is_transform:
            im, lbl = self.transform(im, lbl)
        return im, torch.clamp(lbl, max=20)

    def transform(self, img, lbl):
        if self.img_size == ("same", "same"):
            pass
        else:
            img = img.resize((self.img_size[0], self.img_size[1]))  # uint8 with RGB mode
            lbl = lbl.resize((self.img_size[0], self.img_size[1]))
        img = self.tf(img)
        lbl = torch.from_numpy(np.array(lbl)).long()
        lbl[lbl == 255] = 0
        return img, lbl

    def get_pascal_labels(self):
        """Load the mapping that associates pascal classes with label colors

        Returns:
            np.ndarray with dimensions (21, 3)
        """
        return np.asarray(
            [
                [0, 0, 0],
                [128, 0, 0],
                [0, 128, 0],
                [128, 128, 0],
                [0, 0, 128],
                [128, 0, 128],
                [0, 128, 128],
                [128, 128, 128],
                [64, 0, 0],
                [192, 0, 0],
                [64, 128, 0],
                [192, 128, 0],
                [64, 0, 128],
                [192, 0, 128],
                [64, 128, 128],
                [192, 128, 128],
                [0, 64, 0],
                [128, 64, 0],
                [0, 192, 0],
                [128, 192, 0],
                [0, 64, 128],
            ]
        )

    def encode_segmap(self, mask):
        """Encode segmentation label images as pascal classes

        Args:
            mask (np.ndarray): raw segmentation label image of dimension
              (M, N, 3), in which the Pascal classes are encoded as colours.

        Returns:
            (np.ndarray): class map with dimensions (M,N), where the value at
            a given location is the integer denoting the class index.
        """
        mask = mask.astype(int)
        label_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.int16)
        for ii, label in enumerate(self.get_pascal_labels()):
            label_mask[np.where(np.all(mask == label, axis=-1))[:2]] = ii
        label_mask = label_mask.astype(int)
        # print(np.unique(label_mask))
        return label_mask

    def decode_segmap(self, label_mask, plot=False):
        """Decode segmentation class labels into a color image

        Args:
            label_mask (np.ndarray): an (M,N) array of integer values denoting
              the class label at each spatial location.
            plot (bool, optional): whether to show the resulting color image
              in a figure.

        Returns:
            (np.ndarray, optional): the resulting decoded color image.
        """
        label_colours = self.get_pascal_labels()
        r = label_mask.copy()
        g = label_mask.copy()
        b = label_mask.copy()
        for ll in range(0, self.n_classes):
            r[label_mask == ll] = label_colours[ll, 0]
            g[label_mask == ll] = label_colours[ll, 1]
            b[label_mask == ll] = label_colours[ll, 2]
        rgb = np.zeros((label_mask.shape[0], label_mask.shape[1], 3))
        rgb[:, :, 0] = r / 255.0
        rgb[:, :, 1] = g / 255.0
        rgb[:, :, 2] = b / 255.0
        if plot:
            plt.imshow(rgb)
            plt.show()
        else:
            return rgb

    def setup_annotations(self):
        """Sets up Berkley annotations by adding image indices to the
        `train_aug` split and pre-encode all segmentation labels into the
        common label_mask format (if this has not already been done). This
        function also defines the `train_aug` and `train_aug_val` data splits
        according to the description in the class docstring
        """
        sbd_path = self.sbd_path
        target_path = pjoin(self.root, "SegmentationClass/pre_encoded")
        if not os.path.exists(target_path):
            os.makedirs(target_path)
        train_aug = self.files["train"]

        # keep unique elements (stable)
        train_aug = [train_aug[i] for i in sorted(np.unique(train_aug, return_index=True)[1])]
        self.files["train_aug"] = train_aug
        set_diff = set(self.files["val"]) - set(train_aug)  # remove overlap
        self.files["train_aug_val"] = list(set_diff)

        pre_encoded = glob.glob(pjoin(target_path, "*.png"))
        expected = np.unique(self.files["train_aug"] + self.files["val"]).size

        if len(pre_encoded) != expected:
            print("Pre-encoding segmentation masks...")

            for ii in tqdm(self.files["trainval"]):
                fname = ii + ".png"
                lbl_path = pjoin(self.root, "SegmentationClass", fname)
                lbl = self.encode_segmap(m.imread(lbl_path))
                lbl = m.toimage(lbl, high=lbl.max(), low=lbl.min())
                m.imsave(pjoin(target_path, fname), lbl)

        assert expected == 2913, "unexpected dataset sizes"

In [6]:
#Model Structure
#VGG16 pretrained model is used to build the model structre, using torch load model is loaded 

#class name is segment for our model structure 
class segment(nn.Module):
  def __init__(self, num_classes):
    super(segment,self).__init__()

    self.features = torch.hub.load('pytorch/vision:v0.9.0', 'vgg16', pretrained=True).features
    #last layer of VGG-16 model is modified for image segmentation 
    self.last_layer= nn.Sequential(nn.Conv2d(512,4096,3),
                          nn.ReLU(inplace= True),
                          nn.Dropout(p=0.5, inplace=False),
                          nn.Conv2d(4096,4096,3),
                          nn.ReLU(inplace= True),
                          nn.Dropout(p=0.5, inplace=False))
    
    self.out= nn.Conv2d(4096,num_classes,3)

  def forward(self, x):
    f1= self.features(x)
    f2= self.last_layer(f1)
    f3= self.out(f2)

    return F.upsample_bilinear(f3, x.size()[2:])
    
  def classify(self, x):
    return self.forward(x).argmax(dim=1)  



In [13]:
#Check if CUDA is available, if not use the CPU.
train_on_GPU = torch.cuda.is_available()
CUDA = torch.cuda.is_available()
device = torch.device('cuda' if train_on_GPU else 'cpu')

if CUDA:
    print("CUDA is available")
    model = segment(21).cuda()
    
else:
    
    print("CUDA not available, model will be on CPU")
    model = segment(21)

print(model)

CUDA is available


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.9.0


segment(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilati

In [8]:
#load model 
#model= segment(21).cuda()
#model

In [9]:
local_path = 'VOCdevkit/VOC2012/' # modify it according to your device
bs =10   #change later 
epochs = 20  #change later 
learning_rate = 0.0001  #change later 

In [10]:
import scipy.misc
from scipy.misc.pilutil import imread

# dataset variable
dst = pascalVOCDataset(local_path,is_transform=True)
quantity_train = len(dst)

# dataloader variable
trainloader = torch.utils.data.DataLoader(dst,batch_size=bs,shuffle=True)

In [11]:
# loss function- CrossEntropy 
loss_f = nn.CrossEntropyLoss()

# optimizer variable
# using Adam as optimizer 
opt = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
#model training 
dir= ""
for epoch in range(epochs): 
    running_loss = 0.0
        
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs=inputs.to(device)
        labels=labels.to(device)

    

        # forward + backward + optimize
        outputs = model.forward(inputs)
        loss = loss_f(outputs, labels)
        loss.backward()
        opt.step()

        # zero the parameter gradients
        opt.zero_grad()

        # print statistics
        running_loss += loss.item()
        if i % 10 == 0:    # print every 10 iteration
            print('epoch{}, iter{}, loss: {}'.format(epoch,i,loss.data))
    #To save model with epoch
    #torch.save(model.state_dict(), os.path.join(dir, 'models/epoch-{}.pt'.format(epoch)))
    #save whole model         
    torch.save(model,os.path.join(dir,'Model.pt' ))  
print('Finished Training')




epoch0, iter0, loss: 3.152629852294922
epoch0, iter10, loss: 3.1488473415374756
epoch0, iter20, loss: 3.165856122970581
epoch0, iter30, loss: 3.1882784366607666


In [None]:
#Evaluation metrics calculation
def evaluate(ground_truth, predictions):
    ground_truth = torch.flatten(ground_truth, start_dim = 0, end_dim = 2)
    pred = torch.argmax(predictions, dim=1)
    pred = torch.flatten(pred, start_dim = 0, end_dim = 2)
    ground_truth = ground_truth.cpu().numpy()
    pred = pred.cpu().numpy()

    f1_score = metrics.f1_score(ground_truth, pred,average = 'micro')
    dice_coeficient = metrics.jaccard_score(ground_truth,pred, average = 'weighted')

    return f1_score,dice_coeficient

In [None]:
"""
This will evaluate the metrics for number of epochs provided
"""
F1_Score = []
Dice_Score = []
F1=0
dice=0
count=0
epochs=10

for epoch in range(epochs):     
    for i, data in enumerate(trainloader, 0):
        #getting the inputs:data is a list of [inputs, labels]
        inputs, labels = data
        inputs=inputs.to(device)
        labels=labels.to(device)

        outputs = model(inputs)
        loss = loss_f(outputs, labels)
        a,b= evaluate(labels,outputs)
        F1=F1+a
        dice=dice+b
        counter=counter+1
        if i % 10 == 0:# print every 10 iteration
          print('epoch{}, iter{}, loss: {}'.format(epoch,i,loss.data))
    #Averaging the F1 score and Dice Score for number of epochs used       
    F1_Score.append(F1/count)
    Dice_Score.append(dice/count)

In [None]:
F1,dice= evaluate(labels,outputs)
print("F1 Score: " + str(F1))
print("Dice Score: " + str(dice))

In [None]:
#Plotting the evaluation metrics

titles = ['F1 Score', 'Dice Score']
figures, axis = plt.subplots(nrows=1, ncols=2, figsize =(12,4))
current = [F1_Score, Dice_Score]
e=[1,2,3,4,5,6,7,8,9,10]
for epoch in range(epochs): 
  for i,axs in enumerate(axis.flatten()):
      plt.sca(axs)
      plt.title(titles[i])
      plt.xlabel('Epoch')
      plt.ylabel('Score')
      plt.plot(e,current[i])

plt.show()

In [None]:
#Ploting the image 


data_loader = torch.utils.data.DataLoader(dst, batch_size=1, shuffle=True)
#creating iteration of data loader 
data_loader = iter(data_loader)
#Generate colormap object instance 
color_map = colors.ListedColormap(dst.get_pascal_labels() / 255)

Range = list(range(22))
#setting seed 
torch.manual_seed(4444)
#generate colormap index to range value 
norm_instance = colors.BoundaryNorm(Range, color_map.N)
for _ in range(10):
    image, solution = next(data_loader)
     
    #subplot for Original Image 
    Fig = plt.figure(figsize=(10,5))
    org = Fig.add_subplot(131)
    org.imshow(image[0].transpose(0, 2).transpose(0, 1).numpy())
    org.title.set_text("Original")
    org.axis("off")

    #subplot for Ground Truth Image
    grd_truth = Fig.add_subplot(132)
    grd_truth.imshow(solution[0].numpy().astype('uint8'), cmap=color_map, norm=norm_instance)
    grd_truth.title.set_text("GroundTruth")
    grd_truth.axis("off")

    #subplot for Output Image
    out_img = Fig.add_subplot(133)
    with torch.no_grad():
        model_output = model.classify(image.to(device)).squeeze().cpu()
    out_img.imshow(model_output.numpy().astype('uint8'),  cmap=color_map, norm=norm_instance)
    out_img.title.set_text("Output Image")
    out_img.axis("off")
    plt.show();
  
    