<a href="https://colab.research.google.com/github/031wnstjd/Deep-Learning/blob/master/DL_12_FCN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DL_12 - FCN

In [None]:
name = input("Name :")
ID = input("student ID :")

Name :추준성
student ID :2016145028


## Semantic segmentation

#### Prepare

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
from tqdm import tqdm

####Dataset(VOC)

In [None]:
!wget -nc -O dataset/VOC2012_TrainVal.tar "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"

!mkdir -p "dataset"
!tar -xvf dataset/VOC2012_TrainVal.tar -C "dataset"
!rm dataset/VOC2012_TrainVal.tar

In [None]:
import os.path as osp

import cv2
import random
from PIL import Image
from torch.utils import data

class VOC(data.Dataset):
    """
    PASCAL VOC Segmentation dataset
    """

    def __init__(self, root, split, ignore_label, mean_bgr, augment=True,
        crop_size=224, scales=[1.0], flip=True):
        super(VOC, self).__init__()

        self.root = root
        self.split = split
        self.ignore_label = ignore_label
        self.mean_bgr = np.array(mean_bgr)
        self.augment = augment
        self.crop_size = crop_size
        self.scales = scales
        self.flip = flip
        self._set_files()

    def __getitem__(self, index):
        image_id, image, label = self._load_data(index)
        if self.augment:
            image, label = self._augmentation(image, label)
        # Mean subtraction
        image -= self.mean_bgr
        # HWC -> CHW
        image = image.transpose(2, 0, 1)
        return image_id, image.astype(np.float32), label.astype(np.int64)

    def __len__(self):
        return len(self.files)

    def _set_files(self):
        self.root = osp.join(self.root, "VOC2012")
        self.image_dir = osp.join(self.root, "JPEGImages")
        self.label_dir = osp.join(self.root, "SegmentationClass")

        if self.split in ["train", "val"]:
            file_list = osp.join(
                self.root, "ImageSets/Segmentation", self.split + ".txt"
            )
            file_list = tuple(open(file_list, "r"))
            file_list = [id_.rstrip() for id_ in file_list]
            self.files = file_list
        else:
            raise ValueError("Invalid split name: {}".format(self.split))

    def _load_data(self, index):
        # Set paths
        image_id = self.files[index]
        image_path = osp.join(self.image_dir, image_id + ".jpg")
        label_path = osp.join(self.label_dir, image_id + ".png")
        # Load an image
        image = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32)
        label = np.asarray(Image.open(label_path), dtype=np.int32)
        return image_id, image, label

    def _augmentation(self, image, label):
        # Scaling
        h, w = label.shape
        scale_factor = random.choice(self.scales)
        h, w = (int(h * scale_factor), int(w * scale_factor))
        image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LINEAR)
        label = Image.fromarray(label).resize((w, h), resample=Image.NEAREST)
        label = np.asarray(label, dtype=np.int64)
        # Padding to fit for crop_size
        h, w = label.shape
        pad_h = max(self.crop_size - h, 0)
        pad_w = max(self.crop_size - w, 0)
        pad_kwargs = {
            "top": 0,
            "bottom": pad_h,
            "left": 0,
            "right": pad_w,
            "borderType": cv2.BORDER_CONSTANT,
        }
        if pad_h > 0 or pad_w > 0:
            image = cv2.copyMakeBorder(image, value=self.mean_bgr, **pad_kwargs)
            label = cv2.copyMakeBorder(label, value=self.ignore_label, **pad_kwargs)
        # Cropping
        h, w = label.shape
        start_h = random.randint(0, h - self.crop_size)
        start_w = random.randint(0, w - self.crop_size)
        end_h = start_h + self.crop_size
        end_w = start_w + self.crop_size
        image = image[start_h:end_h, start_w:end_w]
        label = label[start_h:end_h, start_w:end_w]

        if self.flip:
            # Random flipping
            if random.random() < 0.5:
                image = np.fliplr(image).copy()  # HWC
                label = np.fliplr(label).copy()  # HW
        return image, label

In [None]:
batch_size = 8
mean_bgr = [104.008, 116.669, 122.675]
scales = [0.5, 0.75, 1.0, 1.25, 1.5]
data_root = "dataset/VOCdevkit"

trainset = VOC(root=data_root, split="train", ignore_label=255,
               mean_bgr=mean_bgr, augment=True, crop_size=224, scales=scales, flip=True)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = VOC(root=data_root, split="val", ignore_label=255,
              mean_bgr=mean_bgr, augment=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=1,
                                         shuffle=False, num_workers=2)

#### VGG16 FCN 구현

In [None]:
class VGG16_FCN(nn.Module):
    def __init__(self, n_class):
        super(VGG16_FCN, self).__init__()

        self.conv11 = nn.Conv2d(3, 64, 3, padding=1)
        self.relu11 = nn.ReLU(True)
        self.conv12 = nn.Conv2d(64, 64, 3, padding=1)
        self.relu12 = nn.ReLU(True)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv21 = nn.Conv2d(64, 128, 3, padding=1)
        self.relu21 = nn.ReLU(True)
        self.conv22 = nn.Conv2d(128, 128, 3, padding=1)
        self.relu22 = nn.ReLU(True)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv31 = nn.Conv2d(128, 256, 3, padding=1)
        self.relu31 = nn.ReLU(True)
        self.conv32 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu32 = nn.ReLU(True)
        self.conv33 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu33 = nn.ReLU(True)
        self.pool3 = nn.MaxPool2d(2, 2)

        self.conv41 = nn.Conv2d(256, 512, 3, padding=1)
        self.relu41 = nn.ReLU(True)
        self.conv42 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu42 = nn.ReLU(True)
        self.conv43 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu43 = nn.ReLU(True)
        self.pool4 = nn.MaxPool2d(2, 2)

        self.conv51 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu51 = nn.ReLU(True)
        self.conv52 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu52 = nn.ReLU(True)
        self.conv53 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu53 = nn.ReLU(True)
        self.pool5 = nn.MaxPool2d(2, 2)

        self.fc6 = nn.Conv2d(512, 4096, 1)
        self.relu6 = nn.ReLU(True)
        self.drop6 = nn.Dropout(0.5)

        self.fc7 = nn.Conv2d(4096, 4096, 1)
        self.relu7 = nn.ReLU(True)
        self.drop7 = nn.Dropout(0.5)

        self.score_fc = nn.Conv2d(4096, n_class, 1)

        self.skip_pool3 = nn.Conv2d(256, n_class, 1)
        self.skip_pool4 = nn.Conv2d(512, n_class, 1)

        self.upscore2 = nn.ConvTranspose2d(n_class, n_class, 2, stride=2, bias=False)
        self.upscore4 = nn.ConvTranspose2d(n_class, n_class, 2, stride=2, bias=False)
        self.upscore8 = nn.ConvTranspose2d(n_class, n_class, 8, stride=8, bias=False)
        

    def forward(self, x):

        # (batch_size, 3, 224, 224)
        h = self.relu11(self.conv11(x))
        h = self.relu12(self.conv12(h))
        h = self.pool1(h)

        # (batch_size, 64, 112, 112)
        h = self.relu21(self.conv21(h))
        h = self.relu22(self.conv22(h))
        h = self.pool2(h)

        # (batch_size, 128, 56, 56)
        h = self.relu31(self.conv31(h))
        h = self.relu32(self.conv32(h))
        h = self.relu33(self.conv33(h))
        h = self.pool3(h)
        pool3 = h

        # (batch_size, 256, 28, 28)
        h = self.relu41(self.conv41(h))
        h = self.relu42(self.conv42(h))
        h = self.relu43(self.conv43(h))
        h = self.pool4(h)
        pool4 = h

        # (batch_size, 512, 14, 14)
        h = self.relu51(self.conv51(h))
        h = self.relu52(self.conv52(h))
        h = self.relu53(self.conv53(h))
        h = self.pool5(h)

        # (batch_size, 512, 7, 7)
        h = self.relu6(self.fc6(h))
        h = self.drop6(h)

        # (batch_size, 4096, 7, 7)
        h = self.relu7(self.fc7(h))
        h = self.drop7(h)

        # (batch_size, 4096, 7, 7)
        h = self.score_fc(h)

        # (batch_size, 21, 7, 7)
        h = self.upscore2(h)
        upscore2 = h # (batch_size, 21, 14, 14)

        h = self.skip_pool4(pool4) # Skip connection for pool4: (batch_size, 21, 14, 14)
        score_pool4c = h

        h = upscore2 + score_pool4c # (batch_size, 21, 14, 14)
        h = self.upscore4(h) # (batch_size, 21, 28, 28)
        upscore_pool4 = h

        h = self.skip_pool3(pool3) # Skip connection for pool3: (batch_size, 21, 28, 28)
        score_pool3c = h

        h = upscore_pool4 + score_pool3c # (batch_size, 21, 28, 28)

        h = self.upscore8(h) # (batch_size, 21, 224, 224)

        return h

####Trainer

In [None]:
import warnings
warnings.filterwarnings(action='ignore')

class trainer:
  def __init__(self, model, train_loader, opt, epoch_size=10, learning_rate=1e-2, use_cuda=True ):
    self.use_cuda = use_cuda #gpu 사용 여부
    if use_cuda :
      self.net = model.cuda() # gpu연산을 위한 model to gpu
    else :
      self.net = model
    self.train_loader = train_loader 
    self.opt = opt # optmizer 종류 설정을 위한 인자
    self.epoch_size = epoch_size # epoch
    self.learning_rate = learning_rate 
    self.criterion = nn.CrossEntropyLoss(ignore_index=255)


  def train(self):
  
    #Optimizer
    if self.opt == "SGD":
      optimizer = optim.SGD(self.net.parameters(), lr=self.learning_rate) 
    elif self.opt == "Adam":
      optimizer = optim.Adam(self.net.parameters(), lr=self.learning_rate)
    print(len(self.train_loader))
    for epoch in tqdm(range(self.epoch_size)):
      self.net.train() # weight&bias를 update할 수 있는 상태로 변환
      iter_loss = 0

      for i, (image_id, inputs, targets) in enumerate(self.train_loader): 
        if self.use_cuda: #gpu연산
          inputs = inputs.cuda() 
          targets = targets.cuda()

        if i%30 == 0:
            print("iter: ",i)

        # gradient를 0으로 초기화
        optimizer.zero_grad()
        outputs = self.net(inputs) #forward
        loss = self.criterion(outputs, targets) #loss계산
        loss.backward() #backward -> gradient계산
        optimizer.step() #최적화 -> weight&bias update
        iter_loss += loss.item()

        if i!=0 and i%30 == 0:
            print("loss: ", iter_loss)
            print("loss: ", loss)

####Training & Validation

In [None]:
from torch.utils.model_zoo import load_url

def remove_layer(state_dict, keyword):
    keys = [key for key in state_dict.keys()]
    for key in keys:
        if keyword in key:
            state_dict.pop(key)
    return state_dict

def replace_layer(state_dict, keyword1, keyword2):
    keys = [key for key in state_dict.keys()]
    for key in keys:
        if keyword1 in key:
            new_key = key.replace(keyword1, keyword2)
            state_dict[new_key] = state_dict.pop(key)
    return state_dict

model_urls = {
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth'
}

net = VGG16_FCN(n_class=21).cuda()
state_dict = load_url(model_urls['vgg16'], progress=True)
state_dict = remove_layer(state_dict, 'classifier.')

key_list = list(state_dict.keys())
for i,(name, p) in enumerate(net.named_parameters()):
    if i==len(key_list):
        break
    state_dict = replace_layer(state_dict, key_list[i], name)
net.load_state_dict(state_dict, strict=False)

trainer_FCN = trainer(net, trainloader, "SGD", epoch_size=3, learning_rate=1e-3)
trainer_FCN.train()

183


  0%|          | 0/3 [00:00<?, ?it/s]

iter:  0
iter:  30
loss:  142.49564290046692
loss:  tensor(3.5343, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  60
loss:  211.08659529685974
loss:  tensor(1.7048, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  90
loss:  273.1525123119354
loss:  tensor(2.0010, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  120
loss:  335.3587040901184
loss:  tensor(1.7154, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  150
loss:  396.6994217634201
loss:  tensor(1.6492, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  180
loss:  456.0136696100235
loss:  tensor(2.2027, device='cuda:0', grad_fn=<NllLoss2DBackward>)


 33%|███▎      | 1/3 [06:47<13:34, 407.18s/it]

iter:  0
iter:  30
loss:  59.72471272945404
loss:  tensor(1.6166, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  60
loss:  116.65999984741211
loss:  tensor(1.6076, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  90
loss:  171.96543431282043
loss:  tensor(2.0568, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  120
loss:  229.7346646785736
loss:  tensor(1.5906, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  150
loss:  282.92457258701324
loss:  tensor(1.5006, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  180
loss:  337.72025883197784
loss:  tensor(2.0240, device='cuda:0', grad_fn=<NllLoss2DBackward>)


 67%|██████▋   | 2/3 [13:33<06:46, 406.51s/it]

iter:  0
iter:  30
loss:  58.1079021692276
loss:  tensor(1.0321, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  60
loss:  113.13843560218811
loss:  tensor(2.4319, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  90
loss:  168.06399524211884
loss:  tensor(2.0649, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  120
loss:  223.30292344093323
loss:  tensor(2.2498, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  150
loss:  275.9653570652008
loss:  tensor(2.3770, device='cuda:0', grad_fn=<NllLoss2DBackward>)
iter:  180
loss:  329.4944885969162
loss:  tensor(1.4929, device='cuda:0', grad_fn=<NllLoss2DBackward>)


100%|██████████| 3/3 [20:19<00:00, 406.55s/it]
