In [1]:
import random
import math
import time
import pandas as pd
import numpy as np

import torch
import torch.utils.data as data
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.optim as optim

In [2]:
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

In [3]:
from utils.dataloader import make_datapath_list, DataTransform, VOCDataset

rootpath = "./data/VOCdevkit/VOC2012/"
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(
    rootpath=rootpath)

color_mean = (0.485, 0.456, 0.406)
color_std = (0.229, 0.224, 0.225)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase="train", transform=DataTransform(
    input_size=475, color_mean=color_mean, color_std=color_std))

val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DataTransform(
    input_size=475, color_mean=color_mean, color_std=color_std))

batch_size = 16

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}


In [4]:
from utils.pspnet import PSPNet

net = PSPNet(n_classes=150)

state_dict = torch.load("./weights/pspnet50_ADE20K.pth")
net.load_state_dict(state_dict)

# 분류용 합성곱 층을 출력 수 21로 바꾼다.
n_classes = 21
net.decode_feature.classification = nn.Conv2d(
    in_channels=512, out_channels=n_classes, kernel_size=1, stride=1, padding=0)

net.aux.classification = nn.Conv2d(
    in_channels=256, out_channels=n_classes, kernel_size=1, stride=1, padding=0)

# 교체한 합성곱 층 초기화, 활성화 함수는 시그모이드 함수이므로 Xavier 사용
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        nn.init.xavier_normal_(m.weight.data)
        if m.bias is not None:  
            nn.init.constant_(m.bias, 0.0)


net.decode_feature.classification.apply(weights_init)
net.aux.classification.apply(weights_init)

Conv2d(256, 21, kernel_size=(1, 1), stride=(1, 1))

In [5]:
net

PSPNet(
  (feature_conv): FeatureMap_convolution(
    (cbnr_1): conv2DBatchNormRelu(
      (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (cbnr_2): conv2DBatchNormRelu(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (cbnr_3): conv2DBatchNormRelu(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (feature_res_1): ResidualBlockPSP(
    (block1): bottleNec

In [6]:
class PSPLoss(nn.Module):

    def __init__(self, aux_weight=0.4):
        super(PSPLoss, self).__init__()
        self.aux_weight = aux_weight 

    def forward(self, outputs, targets):
        loss = F.cross_entropy(outputs[0], targets, reduction='mean')
        loss_aux = F.cross_entropy(outputs[1], targets, reduction='mean')

        return loss+self.aux_weight*loss_aux


criterion = PSPLoss(aux_weight=0.4)


In [7]:
optimizer = optim.SGD([
    {'params': net.feature_conv.parameters(), 'lr': 1e-3},
    {'params': net.feature_res_1.parameters(), 'lr': 1e-3},
    {'params': net.feature_res_2.parameters(), 'lr': 1e-3},
    {'params': net.feature_dilated_res_1.parameters(), 'lr': 1e-3},
    {'params': net.feature_dilated_res_2.parameters(), 'lr': 1e-3},
    {'params': net.pyramid_pooling.parameters(), 'lr': 1e-3},
    {'params': net.decode_feature.parameters(), 'lr': 1e-2},
    {'params': net.aux.parameters(), 'lr': 1e-2},
], momentum=0.9, weight_decay=0.0001)


def lambda_epoch(epoch):
    max_epoch = 30
    return math.pow((1-epoch/max_epoch), 0.9)


scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_epoch)


In [8]:
def train_model(net, dataloaders_dict, criterion, scheduler, optimizer, num_epochs):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)

    net.to(device)

    torch.backends.cudnn.benchmark = True

    num_train_imgs = len(dataloaders_dict["train"].dataset)
    num_val_imgs = len(dataloaders_dict["val"].dataset)
    batch_size = dataloaders_dict["train"].batch_size

    # 반복자의 카운터 설정
    iteration = 1
    logs = []

    # multiple minibatch
    batch_multiplier = 3

    for epoch in range(num_epochs):

        t_epoch_start = time.time()
        t_iter_start = time.time()
        epoch_train_loss = 0.0 
        epoch_val_loss = 0.0 

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # 에폭별 훈련 및 검증 루프
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train() # 모델을 훈련 모드로
                scheduler.step() # 최적화 스케줄러 갱신
                optimizer.zero_grad()
                print('（train）')

            else:
                if((epoch+1) % 5 == 0):
                    net.eval() # 모델을 검증 모드로
                    print('-------------')
                    print('（val）')
                else:
                   
                    continue

            count = 0  # multiple minibatch
            for imges, anno_class_imges in dataloaders_dict[phase]:
                # 미니 배치 크기가 1이면 배치 정규화에서 오류가 발생하여 피한다.
                if imges.size()[0] == 1:
                    continue
                    
                imges = imges.to(device)
                anno_class_imges = anno_class_imges.to(device)
    
                # 멀티플 미니 배치로 파라미터 갱신
                if (phase == 'train') and (count == 0):
                    optimizer.step()
                    optimizer.zero_grad()
                    count = batch_multiplier

                # 순전파 계산
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = net(imges)
                    loss = criterion(
                        outputs, anno_class_imges.long()) / batch_multiplier

                    # 훈련 시에는 역전파
                    if phase == 'train':
                        loss.backward() # 경사 계산 
                        count -= 1  # multiple minibatch

                        if (iteration % 10 == 0):  
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('{} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item()/batch_size*batch_multiplier, duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item() * batch_multiplier
                        iteration += 1

                    else:
                        epoch_val_loss += loss.item() * batch_multiplier

        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss/num_train_imgs, epoch_val_loss/num_val_imgs))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        log_epoch = {'epoch': epoch+1, 'train_loss': epoch_train_loss /
                     num_train_imgs, 'val_loss': epoch_val_loss/num_val_imgs}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")

    torch.save(net.state_dict(), 'weights/pspnet50_' +
               str(epoch+1) + '.pth')


In [9]:
num_epochs = 30
train_model(net, dataloaders_dict, criterion, scheduler, optimizer, num_epochs=num_epochs)


cuda:0
-------------
Epoch 1/30
-------------
（train）




10 || Loss: 0.2051 || 10iter: 24.2783 sec.
20 || Loss: 0.1111 || 10iter: 13.8646 sec.
30 || Loss: 0.1252 || 10iter: 13.9891 sec.
40 || Loss: 0.1190 || 10iter: 14.0360 sec.
50 || Loss: 0.0668 || 10iter: 14.0472 sec.
60 || Loss: 0.0866 || 10iter: 14.1550 sec.
70 || Loss: 0.0907 || 10iter: 14.0014 sec.
80 || Loss: 0.1052 || 10iter: 14.0917 sec.
90 || Loss: 0.0534 || 10iter: 14.1339 sec.
-------------
epoch 1 || Epoch_TRAIN_Loss:0.1205 ||Epoch_VAL_Loss:0.0000
timer:  158.6779 sec.
-------------
Epoch 2/30
-------------
（train）
100 || Loss: 0.0785 || 10iter: 10.9121 sec.
110 || Loss: 0.0738 || 10iter: 13.9288 sec.
120 || Loss: 0.0459 || 10iter: 13.9713 sec.
130 || Loss: 0.0619 || 10iter: 13.9901 sec.
140 || Loss: 0.0632 || 10iter: 13.9975 sec.
150 || Loss: 0.0478 || 10iter: 14.0216 sec.
160 || Loss: 0.0893 || 10iter: 14.0104 sec.
170 || Loss: 0.0533 || 10iter: 13.9985 sec.
180 || Loss: 0.0452 || 10iter: 14.0042 sec.
-------------
epoch 2 || Epoch_TRAIN_Loss:0.0538 ||Epoch_VAL_Loss:0.0000
ti

1390 || Loss: 0.0257 || 10iter: 16.1629 sec.
1400 || Loss: 0.0179 || 10iter: 16.1350 sec.
1410 || Loss: 0.0209 || 10iter: 16.1234 sec.
1420 || Loss: 0.0213 || 10iter: 16.1583 sec.
1430 || Loss: 0.0226 || 10iter: 16.1158 sec.
1440 || Loss: 0.0303 || 10iter: 16.1088 sec.
1450 || Loss: 0.0247 || 10iter: 16.1344 sec.
1460 || Loss: 0.0191 || 10iter: 16.1330 sec.
1470 || Loss: 0.0213 || 10iter: 16.1334 sec.
-------------
epoch 16 || Epoch_TRAIN_Loss:0.0245 ||Epoch_VAL_Loss:0.0000
timer:  161.9510 sec.
-------------
Epoch 17/30
-------------
（train）
1480 || Loss: 0.0331 || 10iter: 12.5952 sec.
1490 || Loss: 0.0195 || 10iter: 16.1378 sec.
1500 || Loss: 0.0210 || 10iter: 16.1647 sec.
1510 || Loss: 0.0258 || 10iter: 16.1246 sec.
1520 || Loss: 0.0212 || 10iter: 16.1369 sec.
1530 || Loss: 0.0311 || 10iter: 16.1379 sec.
1540 || Loss: 0.0293 || 10iter: 16.1221 sec.
1550 || Loss: 0.0177 || 10iter: 16.1113 sec.
1560 || Loss: 0.0210 || 10iter: 16.1272 sec.
-------------
epoch 17 || Epoch_TRAIN_Loss:0.0

2760 || Loss: 0.0264 || 10iter: 16.0467 sec.
-------------
（val）
-------------
epoch 30 || Epoch_TRAIN_Loss:0.0230 ||Epoch_VAL_Loss:0.0373
timer:  204.0129 sec.
