<a href="https://colab.research.google.com/github/Sagarsud93/Computer-Vision_Object_Proposals_OpenCV/blob/master/fcn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

import numpy as np
import time
import sys
import os

from glob import glob
import re


import cv2
import json

from skimage import io, transform
import matplotlib.pyplot as plt

np.random.seed(1234)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)



cuda


In [34]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

!ls "/content/drive/My Drive/data_road"
!cd "/content/drive/My Drive/data_road"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
testing  training


In [35]:
def normalize(img, mean, std):
    img = img/255.0
    img[0] = (img[0] - mean[0]) / std[0]
    img[1] = (img[1] - mean[1]) / std[1]
    img[2] = (img[2] - mean[2]) / std[2]
    img = np.clip(img, 0.0, 1.0)

    return img

def denormalize(img, mean, std):
    img[0] = (img[0] * std[0]) + mean[0]
    img[1] = (img[1] * std[1]) + mean[1]
    img[2] = (img[2] * std[2]) + mean[2]
    img = img * 255

    img = np.clip(img, 0, 255)
    return img


def get_label_paths(label_path):
    label_paths = {re.sub(r'_(lane|road)_', '_', os.path.basename(path)): path
                   for path in glob(os.path.join(label_path, '*_road_*.png'))}

    return label_paths

def get_test_paths(test_path):
    test_paths = [os.path.basename(path)
                      for path in glob(os.path.join(test_path, '*.png'))]

    return test_paths

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v

    return nn.Sequential(*layers)

def gen_test_output(n_class, testloader, model, test_folder):
    model.eval();
    with torch.no_grad():
        for i, data in enumerate(testloader):
            sample = data
            images = sample['image']
            images = images.float()
            images = Variable(images.to(device))

            output = model(images)
            output = torch.sigmoid(output)
            output = output.cpu()
            N, c, h, w = output.shape
            pred = np.squeeze(output.detach().cpu().numpy(), axis=0)

            pred = pred.transpose((1, 2, 0))
            pred = pred.argmax(axis=2)
            pred = (pred > 0.5)

            pred = pred.reshape(*pred.shape, 1)
            pred = np.concatenate((pred, np.invert(pred)), axis=2).astype('float')
            pred = np.concatenate((pred, np.zeros((*pred[:,:,0].shape, 1))), axis=2).astype('float')

            pred[pred == 1.0] = 127.0
            # images = images.cpu().detach().numpy()
            # images = np.squeeze(images)
            # images = images.transpose((1, 2, 0))

            # images = denormalize(images, mean=[0.485, 0.456, 0.406],
            #                         std=[0.229, 0.224, 0.225])
            test_paths = get_test_paths(test_folder)
            output = resize_label(os.path.join(test_folder, test_paths[i]), pred)
            output = output/127.0
            output = np.clip(output, 0.0, 1.0)
            yield test_paths[i], output

def save_inference_samples(output_dir, testloader, model, test_folder):
    print('Training Finished. Saving test images to: {}'.format(output_dir))
    image_outputs = gen_test_output(2, testloader, model, test_folder)
    for name, image in image_outputs:
        plt.imsave(os.path.join(output_dir, name), image)

def resize_label(image_path, label):
    image = io.imread(image_path)
    label = transform.resize(label, image.shape)
    output = cv2.addWeighted(image, 0.6, label, 0.4, 0, dtype = 0)
    return output

In [36]:

class KittiDatasetTrain(Dataset):
    def __init__(self, rootdir, transform=None):
        self.transform = transform
        self.rootdir = rootdir
        self.traindir = rootdir + "/training/image_2"
        self.labeldir = rootdir + "/training/gt_image_2"
        
    def __getitem__(self, index):

        label_paths = get_label_paths(label_path=self.labeldir)
        image_path = list(label_paths)[index]
        img = io.imread(os.path.join(self.traindir, image_path))
        label = io.imread(label_paths[image_path])
        background_color = np.array([255, 0, 0])
        img = cv2.resize(img, (256, 256))
        label = cv2.resize(label, (256, 256))
        gt_bg = np.all(label == background_color, axis=2)
        gt_bg = gt_bg.reshape(*gt_bg.shape, 1)
        gt_image = np.concatenate((gt_bg, np.invert(gt_bg)), axis=2)
        img = normalize(img, mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

        img = img.transpose((2, 0, 1))
        gt_image = gt_image.transpose((2, 0, 1))
        gt_image = gt_image.astype("float")

        img = torch.from_numpy(img)
        #img = torch.clamp(img, 0, 1)
        sample = {'image': img,
                'label': torch.from_numpy(gt_image)}
        if self.transform:
            sample = self.transform(sample)

        return sample

    def __len__(self):
        path, dirs, files = next(os.walk(self.traindir))
        n = len(files)
        return n # of how many examples(images?)

class KittiDatasetTest(Dataset):
    def __init__(self, rootdir, transform=None):
        self.transform = transform
        self.rootdir = rootdir
        self.testdir = rootdir + "/testing/image_2"

    def __getitem__(self, index):

        test_paths = get_test_paths(test_path=self.testdir)
        image_path = test_paths[index]
        img = io.imread(os.path.join(self.testdir, image_path))

        img = cv2.resize(img, (256, 256))
        img = normalize(img, mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
        img = img.transpose((2, 0, 1))

        img = torch.from_numpy(img)
        #img = torch.clamp(img, 0, 1)
        sample = {'image': img}
        if self.transform:
            sample = self.transform(sample)

        return sample

    def __len__(self):
        path, dirs, files = next(os.walk(self.testdir))
        n = len(files)
        return n

In [37]:
import torchvision.models as models
#from utils import make_layers

ranges = {
    'vgg11': ((0, 3), (3, 6),  (6, 11),  (11, 16), (16, 21)),
    'vgg13': ((0, 5), (5, 10), (10, 15), (15, 20), (20, 25)),
    'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)),
    'vgg19': ((0, 5), (5, 10), (10, 19), (19, 28), (28, 37))
}

cfg = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

class VGGNet(models.VGG):
    def __init__(self, pretrained=True, model='vgg16', requires_grad=True, remove_fc=True, show_params=False):
        super().__init__(make_layers(cfg[model]))
        self.ranges = ranges[model]

        if pretrained:
            exec("self.load_state_dict(models.%s(pretrained=True).state_dict())" % model)

        if not requires_grad:
            for param in super().parameters():
                param.requires_grad = False

        if remove_fc:  # delete redundant fully-connected layer params, can save memory
            del self.classifier

        if show_params:
            for name, param in self.named_parameters():
                print(name, param.size())

    def forward(self, x):
        output = {}

        # get the output of each maxpooling layer (5 maxpool in VGG net)
        for idx in range(len(self.ranges)):
            for layer in range(self.ranges[idx][0], self.ranges[idx][1]):
                x = self.features[layer](x)
            output["x%d"%(idx+1)] = x

        return output

In [38]:
class FCN32s(nn.Module):

    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net
        self.relu    = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1     = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2     = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3     = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn4     = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn5     = nn.BatchNorm2d(32)
        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']  # size=(N, 512, x.H/32, x.W/32)

        score = self.bn1(self.relu(self.deconv1(x5)))     # size=(N, 512, x.H/16, x.W/16)
        score = self.bn2(self.relu(self.deconv2(score)))  # size=(N, 256, x.H/8, x.W/8)
        score = self.bn3(self.relu(self.deconv3(score)))  # size=(N, 128, x.H/4, x.W/4)
        score = self.bn4(self.relu(self.deconv4(score)))  # size=(N, 64, x.H/2, x.W/2)
        score = self.bn5(self.relu(self.deconv5(score)))  # size=(N, 32, x.H, x.W)
        score = self.classifier(score)                    # size=(N, n_class, x.H/1, x.W/1)

        return score  # size=(N, n_class, x.H/1, x.W/1)


class FCN16s(nn.Module):

    def __init__(self, pretrained_net, n_class):
        super().__init__()
        self.n_class = n_class
        self.pretrained_net = pretrained_net
        self.relu    = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1     = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2     = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3     = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn4     = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn5     = nn.BatchNorm2d(32)
        self.classifier = nn.Conv2d(32, n_class, kernel_size=1)

    def forward(self, x):
        output = self.pretrained_net(x)
        x5 = output['x5']  # size=(N, 512, x.H/32, x.W/32)
        x4 = output['x4']  # size=(N, 512, x.H/16, x.W/16)

        score = self.relu(self.deconv1(x5))               # size=(N, 512, x.H/16, x.W/16)
        score = self.bn1(score + x4)                      # element-wise add, size=(N, 512, x.H/16, x.W/16)
        score = self.bn2(self.relu(self.deconv2(score)))  # size=(N, 256, x.H/8, x.W/8)
        score = self.bn3(self.relu(self.deconv3(score)))  # size=(N, 128, x.H/4, x.W/4)
        score = self.bn4(self.relu(self.deconv4(score)))  # size=(N, 64, x.H/2, x.W/2)
        score = self.bn5(self.relu(self.deconv5(score)))  # size=(N, 32, x.H, x.W)
        score = self.classifier(score)                    # size=(N, n_class, x.H/1, x.W/1)

        return score  # size=(N, n_class, x.H/1, x.W/1)



In [39]:
import torch
import cv2
from glob import glob
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import argparse
import os
#from torch.utils.data import DataLoader
#from dataset import KittiDatasetTrain, KittiDatasetTest
#from vgg import VGGNet
#from fcn import FCN8s
#from utils import save_inference_samples, get_test_paths

#np.random.seed(1234)

##parser = argparse.ArgumentParser()

#parser.add_argument('--output_dir', type=str, required=True,help='output directory for test inference')
output_dir = "/content/drive/My Drive/data_road/inference"
#parser.add_argument('--root_dir', type=str, required=True,help='root directory for the dataset')
root_dir = "/content/drive/My Drive/data_road"
#parser.add_argument('--model', type=str, default='vgg19',help='model architecture to be used for FCN')
model='vgg16'
epochs=100
n_class=2
batch_size=16
lr=1e-3
momentum=0.9
weight_decay=5e-4
#parser.add_argument('--epochs', type=int, default=100,help='num of training epochs')
#parser.add_argument('--n_class', type=int, default=2,help='number of label classes')
#parser.add_argument('--batch_size', type=int, default=16,help='training batch size')
#parser.add_argument('--lr', type=float, default=1e-3,help='learning rate')
#parser.add_argument('--momentum', type=float, default=0.9,help='momentum for SGD')
#parser.add_argument('--weight_decay', type=float, default=5e-4,help='weight decay for L2 penalty')
#args = parser.parse_args()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train(n_epoch, trainloader):
    vgg_model = VGGNet('vgg16', requires_grad=True)
    model = FCN16s(pretrained_net=vgg_model, n_class=2)
    model = model.to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr,
                                momentum, weight_decay)
    criterion = nn.BCELoss()

    for epoch in range(n_epoch):
        running_loss = 0.0
        for i, data in enumerate(trainloader):
            sample = data
            images = sample['image']
            images = images.float()
            labels = sample['label']
            labels = labels.float()
            images = Variable(images.cuda())
            labels = Variable(labels.cuda(), requires_grad=False)

            optimizer.zero_grad()
            output = model(images)
            output = torch.sigmoid(output)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # print every 10 mini-batches
                print('Epoch: %d, Loss: %.4f' %
                      (epoch + 1, running_loss / 10))
                running_loss = 0.0
    return model

def main():
    kitti_train = KittiDatasetTrain(root_dir)
    kitti_test = KittiDatasetTest(root_dir)

    trainloader = DataLoader(kitti_train, batch_size)
    testloader = DataLoader(kitti_test, batch_size=1)

    print("Training model..")
    model = train(epochs, trainloader)
    print("Completed training!")
    print("Starting inference...")
    test_folder = os.path.join(root_dir, "testing/image_2")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    save_inference_samples(output_dir, testloader,
                            model, test_folder)
    print("Inference completed!")

if __name__ == "__main__":
    main()

Training model..
Epoch: 1, Loss: 0.7403
Epoch: 2, Loss: 0.7291
Epoch: 3, Loss: 0.7152
Epoch: 4, Loss: 0.7020
Epoch: 5, Loss: 0.6895
Epoch: 6, Loss: 0.6779
Epoch: 7, Loss: 0.6670
Epoch: 8, Loss: 0.6567
Epoch: 9, Loss: 0.6469
Epoch: 10, Loss: 0.6377
Epoch: 11, Loss: 0.6288
Epoch: 12, Loss: 0.6203
Epoch: 13, Loss: 0.6122
Epoch: 14, Loss: 0.6043
Epoch: 15, Loss: 0.5968
Epoch: 16, Loss: 0.5895
Epoch: 17, Loss: 0.5824
Epoch: 18, Loss: 0.5755
Epoch: 19, Loss: 0.5688
Epoch: 20, Loss: 0.5622
Epoch: 21, Loss: 0.5558
Epoch: 22, Loss: 0.5495
Epoch: 23, Loss: 0.5432
Epoch: 24, Loss: 0.5370
Epoch: 25, Loss: 0.5309
Epoch: 26, Loss: 0.5246
Epoch: 27, Loss: 0.5183
Epoch: 28, Loss: 0.5118
Epoch: 29, Loss: 0.5051
Epoch: 30, Loss: 0.4980
Epoch: 31, Loss: 0.4906
Epoch: 32, Loss: 0.4826
Epoch: 33, Loss: 0.4739
Epoch: 34, Loss: 0.4646
Epoch: 35, Loss: 0.4544
Epoch: 36, Loss: 0.4433
Epoch: 37, Loss: 0.4316
Epoch: 38, Loss: 0.4193
Epoch: 39, Loss: 0.4070
Epoch: 40, Loss: 0.3947
Epoch: 41, Loss: 0.3827
Epoch: 4