## 2018년 05월 18일 16시 21분 30초에 추가 ##
- Training FCN_RGBD_renet on NYUDv2

In [1]:
%matplotlib inline
import os
import collections
import torch
import torchvision
import numpy as np
import scipy.misc as m
import scipy.io as io
import matplotlib.pyplot as plt

from NYUDv2Loader import *

os.environ['CUDA_VISIBLE_DEVICES'] = '5'
data_path = '/home/dongwonshin/Desktop/Datasets/NYUDv2/'
arg_string = '--arch FCN_RGBD_renet --batch_size 3 --n_epoch 50'

# Argument setting

In [2]:
import sys, os
import torch
import visdom
import argparse
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

from torch.autograd import Variable
from torch.utils import data
from tqdm import tqdm

from ptsemseg.models import get_model
from ptsemseg.loader import get_loader, get_data_path
from ptsemseg.metrics import runningScore
from ptsemseg.loss import *
from ptsemseg.augmentations import *


parser = argparse.ArgumentParser(description='Hyperparams')
parser.add_argument('--arch', nargs='?', type=str, default='fcn8s', help='Architecture to use [\'fcn8s, unet, segnet etc\']')
parser.add_argument('--img_rows', nargs='?', type=int, default=256, help='Height of the input image')
parser.add_argument('--img_cols', nargs='?', type=int, default=256, help='Width of the input image')

parser.add_argument('--img_norm', dest='img_norm', action='store_true', help='Enable input image scales normalization [0, 1] | True by default')
parser.add_argument('--no-img_norm', dest='img_norm', action='store_false', help='Disable input image scales normalization [0, 1] | True by default')
parser.set_defaults(img_norm=True)

parser.add_argument('--n_epoch', nargs='?', type=int, default=10, help='# of the epochs')
parser.add_argument('--batch_size', nargs='?', type=int, default=1, help='Batch Size')
parser.add_argument('--l_rate', nargs='?', type=float, default=1e-5, help='Learning Rate')
parser.add_argument('--feature_scale', nargs='?', type=int, default=1, help='Divider for # of features to use')
parser.add_argument('--resume', nargs='?', type=str, default=None, help='Path to previous saved model to restart from')

parser.add_argument('--visdom', dest='visdom', action='store_true', help='Enable visualization(s) on visdom | False by default')
parser.add_argument('--no-visdom', dest='visdom', action='store_false', help='Disable visualization(s) on visdom | False by default')
parser.set_defaults(visdom=False)

# Model init

In [3]:
args = parser.parse_args(arg_string.split(' '))

# Setup Dataloader
t_loader = NYUDv2Loader(data_path, is_transform=True)
v_loader = NYUDv2Loader(data_path, is_transform=True, split='val')

n_classes = t_loader.n_classes
trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=16, shuffle=True)
valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=16)

# Setup Metrics
running_metrics = runningScore(n_classes)

# Setup visdom for visualization
if args.visdom:
    vis = visdom.Visdom()

    loss_window = vis.line(X=torch.zeros((1,)).cpu(),
                       Y=torch.zeros((1)).cpu(),
                       opts=dict(xlabel='minibatches',
                                 ylabel='Loss',
                                 title='Training Loss',
                                 legend=['Loss']))

# Setup Model
model = get_model(args.arch, n_classes)

model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
model.cuda()

# Check if model has custom optimizer / loss
if hasattr(model.module, 'optimizer'):
    optimizer = model.module.optimizer
else:
    optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4)

if hasattr(model.module, 'loss'):
    print('Using custom loss')
    loss_fn = model.module.loss
else:
    loss_fn = cross_entropy2d

if args.resume is not None:                                         
    if os.path.isfile(args.resume):
        print("Loading model and optimizer from checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['model_state'])
        optimizer.load_state_dict(checkpoint['optimizer_state'])
        print("Loaded checkpoint '{}' (epoch {})"                    
              .format(args.resume, checkpoint['epoch']))
    else:
        print("No checkpoint found at '{}'".format(args.resume)) 


# Training

In [4]:
best_iou = -100.0 
for epoch in range(args.n_epoch):
    model.train()
    for i, (color_imgs, depth_imgs, label_imgs) in enumerate(trainloader):
        color_imgs = Variable(color_imgs.cuda())
        depth_imgs = Variable(depth_imgs.cuda())
        label_imgs = Variable(label_imgs.cuda())

        optimizer.zero_grad()
        outputs = model(color_imgs, depth_imgs)

        loss = loss_fn(input=outputs, target=label_imgs)

        loss.backward()
        optimizer.step()

        if args.visdom:
            vis.line(
                X=torch.ones((1, 1)).cpu() * i,
                Y=torch.Tensor([loss.data[0]]).unsqueeze(0).cpu(),
                win=loss_window,
                update='append')

        if (i+1) % 100 == 0:
            print("Epoch [%d/%d] Loss: %.4f" % (epoch+1, args.n_epoch, loss.data[0]))

    model.eval()
    for i_val, (color_images_val, depth_images_val, label_images_val) in tqdm(enumerate(valloader)):
        color_images_val = Variable(color_images_val.cuda(), volatile=True)
        depth_images_val = Variable(depth_images_val.cuda(), volatile=True)
        label_images_val = Variable(label_images_val.cuda(), volatile=True)

        outputs = model(color_images_val, depth_images_val)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = label_images_val.data.cpu().numpy()
        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()
    for k, v in score.items():
        print(k, v)
    running_metrics.reset()

    if score['Mean IoU : \t'] >= best_iou:
        best_iou = score['Mean IoU : \t']
        state = {'epoch': epoch+1,
                 'model_state': model.state_dict(),
                 'optimizer_state' : optimizer.state_dict(),}
        torch.save(state, "../model_weights/{}_{}_best_model.pkl".format(args.arch, 'NYUDv2'))



Epoch [1/50] Loss: 2.2531
Epoch [1/50] Loss: 2.0094


218it [00:48,  4.50it/s]


Overall Acc: 	 0.326301132653
Mean Acc : 	 0.123291945813
FreqW Acc : 	 0.156236735006
Mean IoU : 	 0.0636114287806
Epoch [2/50] Loss: 1.7268
Epoch [2/50] Loss: 1.8402


218it [00:48,  4.46it/s]


Overall Acc: 	 0.372960671708
Mean Acc : 	 0.151892112064
FreqW Acc : 	 0.204148830252
Mean IoU : 	 0.085096121965
Epoch [3/50] Loss: 1.7506
Epoch [3/50] Loss: 1.7429


218it [00:48,  4.50it/s]


Overall Acc: 	 0.422661665392
Mean Acc : 	 0.201016299491
FreqW Acc : 	 0.248437624709
Mean IoU : 	 0.124030193885
Epoch [4/50] Loss: 1.7795
Epoch [4/50] Loss: 1.7651


218it [00:48,  4.50it/s]


Overall Acc: 	 0.452760003544
Mean Acc : 	 0.249636828681
FreqW Acc : 	 0.277215394706
Mean IoU : 	 0.154632817362
Epoch [5/50] Loss: 2.1855
Epoch [5/50] Loss: 1.5850


218it [00:48,  4.52it/s]


Overall Acc: 	 0.473674025229
Mean Acc : 	 0.26944594943
FreqW Acc : 	 0.294550211352
Mean IoU : 	 0.178121234106
Epoch [6/50] Loss: 1.5832
Epoch [6/50] Loss: 1.3626


218it [00:48,  4.48it/s]


Overall Acc: 	 0.489682467866
Mean Acc : 	 0.291108615921
FreqW Acc : 	 0.313475969337
Mean IoU : 	 0.192805028642
Epoch [7/50] Loss: 1.6771
Epoch [7/50] Loss: 1.5760


218it [00:48,  4.49it/s]


Overall Acc: 	 0.498772736685
Mean Acc : 	 0.305408846438
FreqW Acc : 	 0.322675241308
Mean IoU : 	 0.203173251256
Epoch [8/50] Loss: 1.7269
Epoch [8/50] Loss: 1.4646


218it [00:48,  4.54it/s]


Overall Acc: 	 0.509740504149
Mean Acc : 	 0.316237748696
FreqW Acc : 	 0.335479353907
Mean IoU : 	 0.213087797285
Epoch [9/50] Loss: 1.2506
Epoch [9/50] Loss: 1.1340


218it [00:48,  4.52it/s]


Overall Acc: 	 0.511815236565
Mean Acc : 	 0.316545242404
FreqW Acc : 	 0.332999872037
Mean IoU : 	 0.215819009213
Epoch [10/50] Loss: 1.5122
Epoch [10/50] Loss: 1.4629


218it [00:48,  4.52it/s]


Overall Acc: 	 0.522860586495
Mean Acc : 	 0.341179057917
FreqW Acc : 	 0.348717729658
Mean IoU : 	 0.233671141728
Epoch [11/50] Loss: 1.1131
Epoch [11/50] Loss: 1.4300


218it [00:47,  4.54it/s]


Overall Acc: 	 0.528945591233
Mean Acc : 	 0.362956402259
FreqW Acc : 	 0.358850648137
Mean IoU : 	 0.249020629406
Epoch [12/50] Loss: 1.4047
Epoch [12/50] Loss: 1.4026


218it [00:48,  4.51it/s]


Overall Acc: 	 0.534543270379
Mean Acc : 	 0.360594212165
FreqW Acc : 	 0.363062308225
Mean IoU : 	 0.253912602502
Epoch [13/50] Loss: 1.4356
Epoch [13/50] Loss: 1.3193


218it [00:47,  4.55it/s]


Overall Acc: 	 0.536464186735
Mean Acc : 	 0.377907121434
FreqW Acc : 	 0.363760143251
Mean IoU : 	 0.26051503783
Epoch [14/50] Loss: 1.3113
Epoch [14/50] Loss: 1.1115


218it [00:47,  4.55it/s]


Overall Acc: 	 0.543476627206
Mean Acc : 	 0.389053953803
FreqW Acc : 	 0.374446305638
Mean IoU : 	 0.272808312391
Epoch [15/50] Loss: 1.2134
Epoch [15/50] Loss: 1.1988


218it [00:47,  4.61it/s]


Overall Acc: 	 0.548022724739
Mean Acc : 	 0.408733208468
FreqW Acc : 	 0.380548107309
Mean IoU : 	 0.283216357656
Epoch [16/50] Loss: 1.1175
Epoch [16/50] Loss: 2.0094


218it [00:47,  4.55it/s]

Overall Acc: 	 0.548328659571
Mean Acc : 	 0.383211481145
FreqW Acc : 	 0.374535052705
Mean IoU : 	 0.27342441612





Epoch [17/50] Loss: 1.3820
Epoch [17/50] Loss: 1.1289


218it [00:48,  4.54it/s]

Overall Acc: 	 0.55217181129
Mean Acc : 	 0.397748298789
FreqW Acc : 	 0.379833572994
Mean IoU : 	 0.280692817918





Epoch [18/50] Loss: 1.2237
Epoch [18/50] Loss: 1.4158


218it [00:48,  4.50it/s]


Overall Acc: 	 0.554501569867
Mean Acc : 	 0.406947289179
FreqW Acc : 	 0.386949920344
Mean IoU : 	 0.287333345045
Epoch [19/50] Loss: 1.1435
Epoch [19/50] Loss: 1.3299


218it [00:47,  4.56it/s]

Overall Acc: 	 0.555423605138
Mean Acc : 	 0.404263988424
FreqW Acc : 	 0.382443235201
Mean IoU : 	 0.28593893979





Epoch [20/50] Loss: 1.2256
Epoch [20/50] Loss: 1.1663


218it [00:47,  4.55it/s]


Overall Acc: 	 0.560715006013
Mean Acc : 	 0.416162472516
FreqW Acc : 	 0.390704508893
Mean IoU : 	 0.293081545878
Epoch [21/50] Loss: 0.9687
Epoch [21/50] Loss: 1.3340


218it [00:47,  4.55it/s]


Overall Acc: 	 0.563720653351
Mean Acc : 	 0.423354009402
FreqW Acc : 	 0.396068234948
Mean IoU : 	 0.297704662494
Epoch [22/50] Loss: 1.1518
Epoch [22/50] Loss: 1.3957


218it [00:47,  4.56it/s]


Overall Acc: 	 0.564755132677
Mean Acc : 	 0.422787550962
FreqW Acc : 	 0.397431583421
Mean IoU : 	 0.300902592887
Epoch [23/50] Loss: 1.1387
Epoch [23/50] Loss: 0.7871


218it [00:47,  4.56it/s]

Overall Acc: 	 0.565454390251
Mean Acc : 	 0.4245358361
FreqW Acc : 	 0.393764451627
Mean IoU : 	 0.297572746684





Epoch [24/50] Loss: 1.2390
Epoch [24/50] Loss: 1.1085


218it [00:48,  4.54it/s]


Overall Acc: 	 0.567769495413
Mean Acc : 	 0.425499416687
FreqW Acc : 	 0.401549990536
Mean IoU : 	 0.304236491867
Epoch [25/50] Loss: 1.1644
Epoch [25/50] Loss: 1.2466


218it [00:48,  4.51it/s]


Overall Acc: 	 0.570698555763
Mean Acc : 	 0.434894165266
FreqW Acc : 	 0.402783630289
Mean IoU : 	 0.30832605442
Epoch [26/50] Loss: 1.1330
Epoch [26/50] Loss: 0.8983


218it [00:48,  4.50it/s]


Overall Acc: 	 0.571072028702
Mean Acc : 	 0.444391803055
FreqW Acc : 	 0.406088063217
Mean IoU : 	 0.310497616682
Epoch [27/50] Loss: 0.9650
Epoch [27/50] Loss: 1.6611


218it [00:48,  4.50it/s]

Overall Acc: 	 0.570542022052
Mean Acc : 	 0.429854466794
FreqW Acc : 	 0.400576712341
Mean IoU : 	 0.307256057231





Epoch [28/50] Loss: 0.8578
Epoch [28/50] Loss: 1.1171


218it [00:47,  4.56it/s]

Overall Acc: 	 0.572807477821
Mean Acc : 	 0.436051725838
FreqW Acc : 	 0.401954208447
Mean IoU : 	 0.308464407419





Epoch [29/50] Loss: 1.0509
Epoch [29/50] Loss: 1.0682


218it [00:48,  4.53it/s]


Overall Acc: 	 0.5755631162
Mean Acc : 	 0.441615836951
FreqW Acc : 	 0.406833717303
Mean IoU : 	 0.314418327307
Epoch [30/50] Loss: 1.0010
Epoch [30/50] Loss: 1.0385


218it [00:48,  4.52it/s]

Overall Acc: 	 0.575300225774
Mean Acc : 	 0.443217662933
FreqW Acc : 	 0.408188254612
Mean IoU : 	 0.313858204





Epoch [31/50] Loss: 0.8934
Epoch [31/50] Loss: 0.9008


218it [00:48,  4.50it/s]

Overall Acc: 	 0.574366931663
Mean Acc : 	 0.439017263155
FreqW Acc : 	 0.401654505333
Mean IoU : 	 0.309377597339





Epoch [32/50] Loss: 1.2762
Epoch [32/50] Loss: 0.9281


218it [00:48,  4.51it/s]


Overall Acc: 	 0.577780694524
Mean Acc : 	 0.442525950959
FreqW Acc : 	 0.413346836535
Mean IoU : 	 0.317585361473
Epoch [33/50] Loss: 1.0615
Epoch [33/50] Loss: 1.1330


218it [00:48,  4.48it/s]


Overall Acc: 	 0.579834556774
Mean Acc : 	 0.448587895929
FreqW Acc : 	 0.412571839787
Mean IoU : 	 0.320752174188
Epoch [34/50] Loss: 1.1577
Epoch [34/50] Loss: 0.8077


218it [00:48,  4.51it/s]

Overall Acc: 	 0.580532047377
Mean Acc : 	 0.448835868155
FreqW Acc : 	 0.411440764499
Mean IoU : 	 0.319714094047





Epoch [35/50] Loss: 1.0215
Epoch [35/50] Loss: 0.9649


218it [00:47,  4.56it/s]

Overall Acc: 	 0.579089611804
Mean Acc : 	 0.447815595254
FreqW Acc : 	 0.408323756085
Mean IoU : 	 0.318169620605





Epoch [36/50] Loss: 0.8982
Epoch [36/50] Loss: 1.0057


218it [00:48,  4.48it/s]

Overall Acc: 	 0.58022895463
Mean Acc : 	 0.442905558103
FreqW Acc : 	 0.408602963186
Mean IoU : 	 0.317188561722





Epoch [37/50] Loss: 1.3056
Epoch [37/50] Loss: 1.2126


218it [00:48,  4.51it/s]


Overall Acc: 	 0.581634438113
Mean Acc : 	 0.45816036473
FreqW Acc : 	 0.413485787488
Mean IoU : 	 0.323300450578
Epoch [38/50] Loss: 1.0532
Epoch [38/50] Loss: 1.1128


218it [00:48,  4.49it/s]

Overall Acc: 	 0.580160998423
Mean Acc : 	 0.459546267538
FreqW Acc : 	 0.410561868703
Mean IoU : 	 0.322492764552





Epoch [39/50] Loss: 0.6665
Epoch [39/50] Loss: 1.1183


218it [00:48,  4.51it/s]


Overall Acc: 	 0.581557711758
Mean Acc : 	 0.454317665654
FreqW Acc : 	 0.415481693354
Mean IoU : 	 0.32523089178
Epoch [40/50] Loss: 1.0939
Epoch [40/50] Loss: 0.9261


218it [00:48,  4.50it/s]

Overall Acc: 	 0.581258461551
Mean Acc : 	 0.453000951971
FreqW Acc : 	 0.4117264447
Mean IoU : 	 0.323373824523





Epoch [41/50] Loss: 1.1227
Epoch [41/50] Loss: 1.6574


218it [00:48,  4.47it/s]


Overall Acc: 	 0.584897814332
Mean Acc : 	 0.457913493084
FreqW Acc : 	 0.417084179815
Mean IoU : 	 0.327117778353
Epoch [42/50] Loss: 1.0160
Epoch [42/50] Loss: 1.0697


218it [00:48,  4.49it/s]

Overall Acc: 	 0.58521218085
Mean Acc : 	 0.447999417562
FreqW Acc : 	 0.415686468682
Mean IoU : 	 0.324185906112





Epoch [43/50] Loss: 0.8688
Epoch [43/50] Loss: 1.1714


218it [00:48,  4.53it/s]

Overall Acc: 	 0.583019136046
Mean Acc : 	 0.447725496938
FreqW Acc : 	 0.410954525603
Mean IoU : 	 0.322704865249





Epoch [44/50] Loss: 0.7669
Epoch [44/50] Loss: 1.2120


218it [00:48,  4.49it/s]


Overall Acc: 	 0.585180828316
Mean Acc : 	 0.474134274456
FreqW Acc : 	 0.418692774804
Mean IoU : 	 0.332561518458
Epoch [45/50] Loss: 0.8481
Epoch [45/50] Loss: 0.9943


218it [00:48,  4.50it/s]


Overall Acc: 	 0.583711639311
Mean Acc : 	 0.476045591334
FreqW Acc : 	 0.420865544301
Mean IoU : 	 0.334047478852
Epoch [46/50] Loss: 0.9349
Epoch [46/50] Loss: 0.9936


218it [00:48,  4.53it/s]

Overall Acc: 	 0.587301840437
Mean Acc : 	 0.460384711144
FreqW Acc : 	 0.418277499474
Mean IoU : 	 0.330041010021





Epoch [47/50] Loss: 0.8662
Epoch [47/50] Loss: 0.8632


218it [00:48,  4.50it/s]

Overall Acc: 	 0.585668074271
Mean Acc : 	 0.466019744453
FreqW Acc : 	 0.416510650496
Mean IoU : 	 0.330922623476





Epoch [48/50] Loss: 1.1642
Epoch [48/50] Loss: 0.8938


218it [00:48,  4.52it/s]

Overall Acc: 	 0.58644401838
Mean Acc : 	 0.462160385069
FreqW Acc : 	 0.416292061028
Mean IoU : 	 0.330562916175





Epoch [49/50] Loss: 0.7468
Epoch [49/50] Loss: 0.8478


218it [00:48,  4.53it/s]


Overall Acc: 	 0.589420085133
Mean Acc : 	 0.472638649861
FreqW Acc : 	 0.421250140231
Mean IoU : 	 0.336415397128
Epoch [50/50] Loss: 0.9531
Epoch [50/50] Loss: 0.7069


218it [00:48,  4.50it/s]


Overall Acc: 	 0.588800376091
Mean Acc : 	 0.4748038501
FreqW Acc : 	 0.421557183685
Mean IoU : 	 0.338027093385


# Training FCN_RGBD_renet on NYUDv2

In [1]:
from utils import *

arg_str = '--gpu_idx 5 ' \
          '--arch FCN_RGBD_renet ' \
          '--input_type RGBD ' \
          '--dataset NYUDv2 ' \
          '--batch_size 3 ' \
          '--n_epoch 50 ' \
          '--resume ../model_weights/FCN_RGBD_renet_NYUDv2_best_model.pkl ' \
          '--visdom'

trainer = trainer(arg_str)
trainer.model_init()
trainer.training()

Loading model and optimizer from checkpoint '../model_weights/FCN_RGBD_renet_NYUDv2_best_model.pkl'
Loaded checkpoint '../model_weights/FCN_RGBD_renet_NYUDv2_best_model.pkl' (epoch 50)


RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1522182087074/work/torch/lib/THC/generic/THCStorage.cu:58