In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import torch.nn as nn
import os
import os.path as osp
import collections
import PIL
import imageio
from distutils.version import LooseVersion
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [2]:
# # Downloading dataset from google drive, 밑의 코드의 주석을 풀면 구글 드라이브로 부터 데이터셋 다운, 압축이 풀리고, Kitti라는 폴더가 생성됩니다. 

!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=19EiycfOQtf6uDKvMgwlHZB50cAxX_U4z' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=19EiycfOQtf6uDKvMgwlHZB50cAxX_U4z" -O Kitti.zip && rm -rf /tmp/cookies.txt
!mkdir Kitti
!unzip Kitti.zip -d Kitti


--2022-06-23 01:09:33--  https://docs.google.com/uc?export=download&confirm=t&id=19EiycfOQtf6uDKvMgwlHZB50cAxX_U4z
Resolving docs.google.com (docs.google.com)... 142.251.42.174, 2404:6800:4004:826::200e
Connecting to docs.google.com (docs.google.com)|142.251.42.174|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-04-44-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/0c4pjnh4k6rle976rt6flfkf2qa46q2j/1655946525000/05110516000762554458/*/19EiycfOQtf6uDKvMgwlHZB50cAxX_U4z?e=download [following]
--2022-06-23 01:09:33--  https://doc-04-44-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/0c4pjnh4k6rle976rt6flfkf2qa46q2j/1655946525000/05110516000762554458/*/19EiycfOQtf6uDKvMgwlHZB50cAxX_U4z?e=download
Resolving doc-04-44-docs.googleusercontent.com (doc-04-44-docs.googleusercontent.com)... 172.217.161.33, 2404:6800:4004:80a::2001
Connecting to doc-04-44-docs.googleusercontent.com (doc-04-44-docs.goo

# Dataloader

In [3]:
imgsets_file = osp.join('Kitti', '{}.txt'.format('train'))
for line in open(imgsets_file):
    line = line.strip()
    print(line)
    line = line.split()

training/image_2/um_000000.png training/gt_image_2/um_road_000000.png
training/image_2/um_000001.png training/gt_image_2/um_road_000001.png
training/image_2/um_000002.png training/gt_image_2/um_road_000002.png
training/image_2/um_000003.png training/gt_image_2/um_road_000003.png
training/image_2/um_000004.png training/gt_image_2/um_road_000004.png
training/image_2/um_000005.png training/gt_image_2/um_road_000005.png
training/image_2/um_000006.png training/gt_image_2/um_road_000006.png
training/image_2/um_000007.png training/gt_image_2/um_road_000007.png
training/image_2/um_000008.png training/gt_image_2/um_road_000008.png
training/image_2/um_000009.png training/gt_image_2/um_road_000009.png
training/image_2/um_000010.png training/gt_image_2/um_road_000010.png
training/image_2/um_000011.png training/gt_image_2/um_road_000011.png
training/image_2/um_000012.png training/gt_image_2/um_road_000012.png
training/image_2/um_000013.png training/gt_image_2/um_road_000013.png
training/image_2/um_

In [7]:
class KITTIdataset(torch.utils.data.Dataset):
    class_names = np.array(['background', 'road'])

    def __init__(self, root, transform, split='train'): # root: "./Kitti"
        ## split에 맞는 txt파일 읽어서, 
        ## input image label image경로 리스트로 각각 저장
        self.root = root
        self.transform = transform
        self.split = split
        
        self.images_path = []
        self.ys = []
        
        imgsets_file = osp.join('Kitti', '{}.txt'.format(split)) # train, val
        for line in open(imgsets_file):
            line = line.strip()
            line = line.split()# line[0]: input image path, # line[1]: label image
            img_file = osp.join(root, 'data_road/{}'.format(line[0]))#load image file
            lbl_file = osp.join(root, 'data_road/{}'.format(line[1]))#load label file
            self.images_path.append(img_file)
            self.ys.append(lbl_file)
            
    def __len__(self):
        ## length return
        return len(self.ys)
    
    def __getitem__(self, index):
        ## index에 맞는 image, label image읽어오기
        # load image
        img_file = self.images_path[index]
        img = PIL.Image.open(img_file)
        img = np.array(img)
        
        lbl_file = self.ys[index]
        lbl = PIL.Image.open(lbl_file)
        lbl = np.array(lbl)
        lbl[lbl==255] = 1
        
        # load label
        
        ## black: 0, white: 255 255->1
        ## 0, 1
        
        return self.transform(img), torch.from_numpy(lbl).long()

## Define dataset and dataloader

In [8]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5), (0.5))
])
train_dataset = KITTIdataset(root = './Kitti', split = 'train', transform = transform)
val_dataset = KITTIdataset(root = './Kitti', split = 'val', transform = transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 1, shuffle = True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size = 1, shuffle = False)

In [9]:
print(train_dataset[7][0].shape) ## [0]: img tensor, [1]: label tensor

torch.Size([3, 375, 1242])


# Define the Network
-VGG16

- FCN model

In [None]:
import torch.nn as nn
import torch

# Maxpooling with ceil mode


In [None]:
#Transposed convolution

In [24]:
class FCN(nn.Module):
    def __init__(self, num_class = 3):
        super(FCN, self).__init__()
        
        #3->64 2 
        #64->128 2
        #128->256 3 conv->relu->conv->relu->conv
        #256->512 3 conv->relu->conv->relu->conv
        #512->512 3 conv->relu->conv->relu->conv
        
        #375*1242 ->575*1442 574*1440
        ## conv1
        self.features1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding = 100),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding =1),
            nn.ReLU())
        
        ## conv2
        self.features2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding = 1),
            nn.ReLU())
        ## self.pool2
        
        ## conv3
        self.features3 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding = 1))
        
        ### pool3
        ## conv4
        self.features4 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding = 1))
        
        ### pool4
        ## conv5
        self.features5 = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding = 1))
        
        self.maxpool = nn.MaxPool2d(2, stride=2, ceil_mode=True)
        
        #4096->4096->num_class
        self.classifier = nn.Sequential(
            nn.Conv2d(512, 4096, 7),
            nn.ReLU(),
            nn.Dropout2d(),
            nn.Conv2d(4096, 4096, 1),
            nn.ReLU(),
            nn.Dropout2d(),
            nn.Conv2d(4096, num_class, 1))
        
    
        
        ## pool 5
        ## upsampling transposed convolution
        self.upscore2 = nn.ConvTranspose2d(num_class, num_class, kernel_size=4, stride=2, bias=False)
        self.upscore4 = nn.ConvTranspose2d(num_class, num_class, kernel_size=4, stride=2, bias=False)
        self.upscore8 = nn.ConvTranspose2d(num_class, num_class, kernel_size=16, stride=8, bias=False)
        
        self.score_pool4 = nn.Conv2d(512, num_class, 1)
        self.score_pool3 = nn.Conv2d(256, num_class, 1)
        
        
        self.params = [self.features1, self.features2, self.features3, 
                       self.features4, self.features5]
        

                             
    def forward(self, inputs):
        # input [Batch size, 3, w, h]
        x = self.features1(inputs) #[Batch size, 64, w, h]
        pool1 = self.maxpool(x) #[Batch size, 64, w/2, h/2]
        
        x = self.features2(pool1) #[Batch size, 128, w/2, h/2]
        pool2 = self.maxpool(x) #[Batch size, 128, w/4, h/4]
        
        x = self.features3(pool2) #[Batch size, 256, w/4, h/4]
        pool3 = self.maxpool(x) #[Batch size, 256, w/8, h/8]
        
        x = self.features4(pool3) #[Batch size, 512, w/8, h/8]
        pool4 = self.maxpool(x) #[Batch size, 512, w/16, h/16]
        
        x = self.features5(pool4) #[Batch size, 512, w/16, h/16]
        pool5 = self.maxpool(x) #[Batch size, 512, w/32, h/32]
        
        x = self.classifier(pool5) #[w/32 - 6, h/32 - 6]
        x = self.upscore2(x) ##[w/16 - 10, h/16 - 10]
        
        pool4 = self.score_pool4(pool4)
        pool4 = pool4[:,:, 5:5+x.size()[2], 5:5+x.size()[3]]
        x = torch.add(x, pool4)
        
        x = self.upscore4(x)
        
        pool3 = self.score_pool3(pool3)
        pool3 = pool3[:,:, 9:9+x.size()[2], 9:9+x.size()[3]]
        x = torch.add(x, pool3)
        
        x = self.upscore8(x)
        x = x[:,:, 33:33+inputs.size()[2], 33:33+inputs.size()[3]]
        
        
        ## channel num_class
        return x ## label map [batch, num_class, inputs h, input w]
    
    def copy_params(self, vgg):
        for l1, l2 in zip(vgg.features, self.params):
            if (isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d)):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
        
        

In [25]:
model = FCN(3)
# # # print(model)
temp_input = torch.rand(1, 3, 1024, 1024)
m1 = nn.ConvTranspose2d(3, 3, kernel_size=4, stride=2, bias=False)
m2 = nn.ConvTranspose2d(3, 3, kernel_size=4, stride=2, bias=False)
m3 = nn.ConvTranspose2d(3, 3, kernel_size=16, stride=8, bias=False)
output = model(temp_input)
print(output.size())

torch.Size([1, 3, 1024, 1024])


## Measure accuracy and visualization

In [35]:
def _fast_hist(label_true, label_pred, n_class):
    ## define mask, histogram
    ## hint : np.bincount
    mask = (label_true >= 0) & (label_true < n_class)
    hist = np.bincount(n_class*label_true[mask].astype(int)+label_pred[mask]
                      , minlength = n_class**2).reshape(n_class, n_class)
    
    return hist

def compute_mean_iou(label_trues, label_preds, n_class): 
    # label_true: [h, w] # label_pred: [h, w] # n_class: 2
    # TO DO : 각 Class 별 Intersection of Union을 계산
    hist = np.zeros((n_class, n_class))
    for lt, lp in zip(label_trues, label_preds):
        hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
    iu = np.diag(hist)/ (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
    mean_iou = np.nanmean(iu)
        # Hint1 : _fast_hist를 통해 confusion matrix를 구할 수 있다. 
        # Hint2 : 해당 class의 True = confusion matrix의 해당 row 값의 합
        #         해당 class의 Positive = confusion matrix의 해당 column 값의 합 
        #         해당 class의 True_Positive = confusion matrix의 (class, class)의 값
        # Hint3 : iou = True_Positive / True + Positive - True_Positive 

    return mean_iou

def visualization(net, input_img, epoch):
    ## TO DO : image를 network에 넣어 label을 추출
    img = transform(input_img).unsqueeze(0)
    img.cuda()
    
    score = net(img)
    
    _, lbl_pred = score.max(1)
    lbl_pred = lbl_pred.cpu().numpy()
    lbl_pred = np.squeeze(lbl_pred)
    
    
    os.makedirs("./pred", exist_ok = True)
    os.makedirs("./input", exist_ok = True)

    imageio.imsave('./pred/mask_'+str(epoch+1)+'.png', lbl_pred)
    plt.imshow(mpimg.imread('./pred/mask_'+str(epoch+1)+'.png'),cmap='gray') ### visualize predicted label map
    plt.show()
    input_img = np.array(input_img)
    imageio.imsave('./input/input_'+str(epoch+1)+'.png', input_img)
    plt.imshow(mpimg.imread('./input/input_'+str(epoch+1)+'.png'))
    plt.show()
    

In [34]:
label_true = np.array([
    [1, 2],
    [2, 3]
])
label_pred = np.array([
    [1, 2],
    [2, 2]
])

compute_mean_iou(label_true, label_pred, 4)

  iu = np.diag(hist)/ (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))


0.5555555555555555

# Train

In [None]:
##load pretrained model from torchvision
##pretrained using coco 2017
#net = torchvision.models.segmentation.fcn_resnet50(pretrained = True)
# print(net)
#net.classifier = torchvision.models.segmentation.fcn.FCNHead( 2048, 2)
#net = net.cuda()
#print(net)

In [36]:
vgg16 = torchvision.models.vgg16(pretrained = True)
net = FCN(num_class = 2)
net.copy_params(vgg16)

net = net.cuda()

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

In [37]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss() 
optimizer = optim.Adam(net.parameters(), lr=1e-5, weight_decay = 0.0001)

In [38]:
training_epochs = 5
best_iou = 0
num_class = len(train_loader.dataset.class_names)
j=0

for epoch in range(training_epochs):
    #train
    net.train()
    print ('current epoch : %d'%(epoch))
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        # load data, forward
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
    
        score = net(data)
        loss = criterion(score, target)
        loss.backward()
        optimizer.step()
    
        if batch_idx % 20 ==0:
            print ('batch : {}, loss : {}'.format(batch_idx, loss.item()))
        j += 1
        
    #validation
    net.eval()
    val_loss = 0
    metrics = []
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            # load data, forward
            data, target = data.cuda(), target.cuda()
            score = net(data)
            
            # calc val loss, accuracy
            loss = criterion(score, target)

            val_loss += loss.item()

            _, lbl_pred = score.max(1) #[batch, class, h, w] -> [batch, h, w]
            lbl_pred = lbl_pred.cpu().numpy()
            lbl_true = target.cpu().numpy()
            
            for lt, lp in zip(lbl_true, lbl_pred):
                tmp = compute_mean_iou(lt, lp, num_class)
                metrics.append(tmp)
           
    val_loss /= len(val_loader)
    metrics = np.mean(metrics)
    
    print ('val loss : {}, mean_iou : {}'.format(val_loss, metrics))

    ##save model
    if best_iou < metrics:
        best_iou = metrics
        print("Best model saved")
        torch.save(net.state_dict(), './model_best.pth')
    
    ## visualization
    img = PIL.Image.open('./road_sample1.png')
    visualization(net, img, epoch)
print('Finished Training')

current epoch : 0
batch : 0, loss : 0.692955493927002
batch : 20, loss : 0.692620575428009
batch : 40, loss : 0.6920413374900818
batch : 60, loss : 0.6894384026527405
batch : 80, loss : 0.6717439889907837
batch : 100, loss : 0.6256788372993469
batch : 120, loss : 0.626075267791748
batch : 140, loss : 0.5987927913665771
batch : 160, loss : 0.5587637424468994
batch : 180, loss : 0.5701109766960144
batch : 200, loss : 0.5202690958976746
batch : 220, loss : 0.5167742967605591
batch : 240, loss : 0.5405365824699402
val loss : 0.5363060788561901, mean_iou : 0.3997727512237277
Best model saved


RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

In [None]:
img = PIL.Image.open('./road_sample1.png')
visualization(net, img, epoch)