In [1]:
%matplotlib inline
import os
import collections
import torch
import torchvision
import numpy as np
import scipy.misc as m
import scipy.io as io
import matplotlib.pyplot as plt

from SUNRGBDLoader import *

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import sys, os
import torch
import visdom
import argparse
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

from torch.autograd import Variable
from torch.utils import data
from tqdm import tqdm

from ptsemseg.models import get_model
from ptsemseg.loader import get_loader, get_data_path
from ptsemseg.metrics import runningScore
from ptsemseg.loss import *
from ptsemseg.augmentations import *


parser = argparse.ArgumentParser(description='Hyperparams')
parser.add_argument('--arch', nargs='?', type=str, default='fcn8s', 
                    help='Architecture to use [\'fcn8s, unet, segnet etc\']')
parser.add_argument('--img_rows', nargs='?', type=int, default=256, 
                    help='Height of the input image')
parser.add_argument('--img_cols', nargs='?', type=int, default=256, 
                    help='Width of the input image')

parser.add_argument('--img_norm', dest='img_norm', action='store_true', 
                    help='Enable input image scales normalization [0, 1] | True by default')
parser.add_argument('--no-img_norm', dest='img_norm', action='store_false', 
                    help='Disable input image scales normalization [0, 1] | True by default')
parser.set_defaults(img_norm=True)

parser.add_argument('--n_epoch', nargs='?', type=int, default=100, 
                    help='# of the epochs')
parser.add_argument('--batch_size', nargs='?', type=int, default=1, 
                    help='Batch Size')
parser.add_argument('--l_rate', nargs='?', type=float, default=1e-5, 
                    help='Learning Rate')
parser.add_argument('--feature_scale', nargs='?', type=int, default=1, 
                    help='Divider for # of features to use')
parser.add_argument('--resume', nargs='?', type=str, default=None,    
                    help='Path to previous saved model to restart from')

parser.add_argument('--visdom', dest='visdom', action='store_true', 
                    help='Enable visualization(s) on visdom | False by default')
parser.add_argument('--no-visdom', dest='visdom', action='store_false', 
                    help='Disable visualization(s) on visdom | False by default')
parser.set_defaults(visdom=False)


In [3]:
args = parser.parse_args('--arch fcn8s_with_rgbd --batch_size 3'.split(' '))

# Setup Augmentations
data_aug= Compose([RandomRotate(10),                                        
                   RandomHorizontallyFlip()])

# Setup Dataloader
data_path = '/home/dongwonshin/Desktop/Datasets/SUNRGBD/SUNRGBD(meta)/'
t_loader = SUNRGBDLoader(data_path, is_transform=True)
v_loader = SUNRGBDLoader(data_path, is_transform=True, split='val')

n_classes = t_loader.n_classes
trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=16, shuffle=True)
valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=16)

# Setup Metrics
running_metrics = runningScore(n_classes)

# Setup visdom for visualization
if args.visdom:
    vis = visdom.Visdom()

    loss_window = vis.line(X=torch.zeros((1,)).cpu(),
                       Y=torch.zeros((1)).cpu(),
                       opts=dict(xlabel='minibatches',
                                 ylabel='Loss',
                                 title='Training Loss',
                                 legend=['Loss']))

# Setup Model
model = get_model(args.arch, n_classes)

model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
model.cuda()

# Check if model has custom optimizer / loss
if hasattr(model.module, 'optimizer'):
    optimizer = model.module.optimizer
else:
    optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4)

if hasattr(model.module, 'loss'):
    print('Using custom loss')
    loss_fn = model.module.loss
else:
    loss_fn = cross_entropy2d

if args.resume is not None:                                         
    if os.path.isfile(args.resume):
        print("Loading model and optimizer from checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['model_state'])
        optimizer.load_state_dict(checkpoint['optimizer_state'])
        print("Loaded checkpoint '{}' (epoch {})"                    
              .format(args.resume, checkpoint['epoch']))
    else:
        print("No checkpoint found at '{}'".format(args.resume)) 


NameError: name 'collections' is not defined

# Training

In [5]:
best_iou = -100.0 
for epoch in range(args.n_epoch):
    model.train()
    for i, (color_imgs, depth_imgs, label_imgs) in enumerate(trainloader):
        images = Variable(color_imgs.cuda())
        depth_images = Variable(depth_imgs.cuda())
        labels = Variable(label_imgs.cuda())

        optimizer.zero_grad()
        outputs = model(images,depth_images)

        loss = loss_fn(input=outputs, target=labels)

        loss.backward()
        optimizer.step()

        if args.visdom:
            vis.line(
                X=torch.ones((1, 1)).cpu() * i,
                Y=torch.Tensor([loss.data[0]]).unsqueeze(0).cpu(),
                win=loss_window,
                update='append')

        if (i+1) % 100 == 0:
            print("Epoch [%d/%d] Loss: %.4f" % (epoch+1, args.n_epoch, loss.data[0]))

    model.eval()
    for i_val, (color_images_val, depth_images_val, label_images_val) in tqdm(enumerate(valloader)):
        color_images_val = Variable(color_images_val.cuda(), volatile=True)
        depth_images_val = Variable(depth_images_val.cuda(), volatile=True)
        label_images_val = Variable(label_images_val.cuda(), volatile=True)

        outputs = model(color_images_val, depth_images_val)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = label_images_val.data.cpu().numpy()
        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()
    for k, v in score.items():
        print(k, v)
    running_metrics.reset()

    if score['Mean IoU : \t'] >= best_iou:
        best_iou = score['Mean IoU : \t']
        state = {'epoch': epoch+1,
                 'model_state': model.state_dict(),
                 'optimizer_state' : optimizer.state_dict(),}
        torch.save(state, "{}_{}_best_model.pkl".format(args.arch, 'SUNRGBD'))



Epoch [1/100] Loss: 1.5002
Epoch [1/100] Loss: 1.4631
Epoch [1/100] Loss: 1.7555


150it [00:42,  3.53it/s]


Overall Acc: 	 0.634489561632
Mean Acc : 	 0.0267625883428
FreqW Acc : 	 0.423181158501
Mean IoU : 	 0.0180665631138
Epoch [2/100] Loss: 1.1076
Epoch [2/100] Loss: 1.3288
Epoch [2/100] Loss: 0.6167


150it [00:42,  3.50it/s]

Overall Acc: 	 0.646311407697
Mean Acc : 	 0.0266911445538
FreqW Acc : 	 0.427234681505
Mean IoU : 	 0.0178184187016





Epoch [3/100] Loss: 1.3809
Epoch [3/100] Loss: 1.2002
Epoch [3/100] Loss: 0.8907


150it [00:43,  3.46it/s]

Overall Acc: 	 0.650030562789
Mean Acc : 	 0.0267188524357
FreqW Acc : 	 0.428123764236
Mean IoU : 	 0.017743586981





Epoch [4/100] Loss: 1.1764
Epoch [4/100] Loss: 1.0957
Epoch [4/100] Loss: 0.9036


150it [00:43,  3.48it/s]

Overall Acc: 	 0.650666637731
Mean Acc : 	 0.0266537406607
FreqW Acc : 	 0.427993209216
Mean IoU : 	 0.0176610229865





Epoch [5/100] Loss: 0.9939
Epoch [5/100] Loss: 1.3695
Epoch [5/100] Loss: 1.2573


150it [00:43,  3.47it/s]

Overall Acc: 	 0.652119610822
Mean Acc : 	 0.0264730753002
FreqW Acc : 	 0.427170206704
Mean IoU : 	 0.0174033936217





Epoch [6/100] Loss: 0.8614
Epoch [6/100] Loss: 2.1592
Epoch [6/100] Loss: 0.9619


150it [00:43,  3.47it/s]

Overall Acc: 	 0.652632617187
Mean Acc : 	 0.0264909885239
FreqW Acc : 	 0.427354635185
Mean IoU : 	 0.0174079080208





Epoch [7/100] Loss: 1.3646
Epoch [7/100] Loss: 1.4735
Epoch [7/100] Loss: 0.9370


150it [00:43,  3.44it/s]

Overall Acc: 	 0.652330721933
Mean Acc : 	 0.0266009043244
FreqW Acc : 	 0.427972482119
Mean IoU : 	 0.0175495897224





Epoch [8/100] Loss: 1.2989
Epoch [8/100] Loss: 0.9758
Epoch [8/100] Loss: 1.4815


150it [00:43,  3.44it/s]

Overall Acc: 	 0.650347171586
Mean Acc : 	 0.0269790213373
FreqW Acc : 	 0.429761583744
Mean IoU : 	 0.0180317667877





Epoch [9/100] Loss: 1.2893
Epoch [9/100] Loss: 1.2738
Epoch [9/100] Loss: 1.0679


150it [00:43,  3.43it/s]

Overall Acc: 	 0.651969444444
Mean Acc : 	 0.0267016458696
FreqW Acc : 	 0.4285068931
Mean IoU : 	 0.0176786606244





Epoch [10/100] Loss: 1.3245
Epoch [10/100] Loss: 1.2029
Epoch [10/100] Loss: 1.1630


150it [00:43,  3.44it/s]

Overall Acc: 	 0.652290234375
Mean Acc : 	 0.0266844168458
FreqW Acc : 	 0.42846609543
Mean IoU : 	 0.0176497996695





Epoch [11/100] Loss: 0.9099
Epoch [11/100] Loss: 1.3593
Epoch [11/100] Loss: 1.2911


150it [00:43,  3.45it/s]

Overall Acc: 	 0.652438722512
Mean Acc : 	 0.0265855408385
FreqW Acc : 	 0.427916269203
Mean IoU : 	 0.017530116057





Epoch [12/100] Loss: 1.4681
Epoch [12/100] Loss: 1.3126
Epoch [12/100] Loss: 0.8133


150it [00:43,  3.47it/s]

Overall Acc: 	 0.652946368634
Mean Acc : 	 0.026377543672
FreqW Acc : 	 0.426675242641
Mean IoU : 	 0.0172571679097





Epoch [13/100] Loss: 1.2798
Epoch [13/100] Loss: 1.1743
Epoch [13/100] Loss: 0.9532


150it [00:43,  3.46it/s]

Overall Acc: 	 0.652087934028
Mean Acc : 	 0.0267473520873
FreqW Acc : 	 0.428795328332
Mean IoU : 	 0.0177288524724





Epoch [14/100] Loss: 0.9321
Epoch [14/100] Loss: 0.9026
Epoch [14/100] Loss: 1.1556


150it [00:43,  3.45it/s]

Overall Acc: 	 0.651714641204
Mean Acc : 	 0.0269467684284
FreqW Acc : 	 0.429879559129
Mean IoU : 	 0.0179665111633





Epoch [15/100] Loss: 1.4485
Epoch [15/100] Loss: 1.3852
Epoch [15/100] Loss: 1.4248


150it [00:43,  3.45it/s]

Overall Acc: 	 0.652217332176
Mean Acc : 	 0.0268935096959
FreqW Acc : 	 0.429718767782
Mean IoU : 	 0.01789481783





Epoch [16/100] Loss: 0.9942
Epoch [16/100] Loss: 1.3232
Epoch [16/100] Loss: 0.8508


150it [00:43,  3.46it/s]

Overall Acc: 	 0.652617527488
Mean Acc : 	 0.0267242708672
FreqW Acc : 	 0.428743254409
Mean IoU : 	 0.0176873924419





Epoch [17/100] Loss: 0.8940
Epoch [17/100] Loss: 1.3425
Epoch [17/100] Loss: 1.7431


150it [00:43,  3.44it/s]

Overall Acc: 	 0.652711812789
Mean Acc : 	 0.0266668710941
FreqW Acc : 	 0.428458233214
Mean IoU : 	 0.0176186766206





Epoch [18/100] Loss: 1.4263
Epoch [18/100] Loss: 1.1566
Epoch [18/100] Loss: 1.5078


150it [00:43,  3.45it/s]

Overall Acc: 	 0.651918453414
Mean Acc : 	 0.0270047188967
FreqW Acc : 	 0.43025149175
Mean IoU : 	 0.018025360658





Epoch [19/100] Loss: 1.1654
Epoch [19/100] Loss: 0.8214
Epoch [19/100] Loss: 1.1385


150it [00:43,  3.46it/s]

Overall Acc: 	 0.653055230035
Mean Acc : 	 0.0265645585116
FreqW Acc : 	 0.427889191327
Mean IoU : 	 0.0174851275585





Epoch [20/100] Loss: 0.9785
Epoch [20/100] Loss: 0.9447
Epoch [20/100] Loss: 0.9707


150it [00:43,  3.46it/s]

Overall Acc: 	 0.652981582755
Mean Acc : 	 0.0267688541748
FreqW Acc : 	 0.429132320999
Mean IoU : 	 0.0177318134466





Epoch [21/100] Loss: 0.7833
Epoch [21/100] Loss: 1.0205
Epoch [21/100] Loss: 1.2483


150it [00:43,  3.45it/s]

Overall Acc: 	 0.652788780382
Mean Acc : 	 0.0269672890771
FreqW Acc : 	 0.430270807413
Mean IoU : 	 0.017967013333





Epoch [22/100] Loss: 0.8504
Epoch [22/100] Loss: 1.1717
Epoch [22/100] Loss: 1.0639


150it [00:43,  3.46it/s]

Overall Acc: 	 0.652917375579
Mean Acc : 	 0.0268132378482
FreqW Acc : 	 0.429360702658
Mean IoU : 	 0.0177848444701





Epoch [23/100] Loss: 1.1385
Epoch [23/100] Loss: 1.0231
Epoch [23/100] Loss: 1.1173


150it [00:43,  3.45it/s]

Overall Acc: 	 0.653144813368
Mean Acc : 	 0.0267666200074
FreqW Acc : 	 0.429143193952
Mean IoU : 	 0.0177253849313





Epoch [24/100] Loss: 0.9545
Epoch [24/100] Loss: 1.2520
Epoch [24/100] Loss: 1.1267


150it [00:43,  3.44it/s]

Overall Acc: 	 0.652784968171
Mean Acc : 	 0.0267657969205
FreqW Acc : 	 0.429068463662
Mean IoU : 	 0.0177383883228





Epoch [25/100] Loss: 1.0499
Epoch [25/100] Loss: 0.9140
Epoch [25/100] Loss: 0.8572


150it [00:43,  3.43it/s]

Overall Acc: 	 0.652571752025
Mean Acc : 	 0.0267609429747
FreqW Acc : 	 0.428901848134
Mean IoU : 	 0.0177355891861





Epoch [26/100] Loss: 1.1285
Epoch [26/100] Loss: 0.8285
Epoch [26/100] Loss: 1.3664


150it [00:43,  3.45it/s]

Overall Acc: 	 0.653124804688
Mean Acc : 	 0.0266778219077
FreqW Acc : 	 0.428591852211
Mean IoU : 	 0.0176194111514





Epoch [27/100] Loss: 1.0264
Epoch [27/100] Loss: 1.1641
Epoch [27/100] Loss: 1.0483


150it [00:43,  3.45it/s]

Overall Acc: 	 0.653172880498
Mean Acc : 	 0.0268148602881
FreqW Acc : 	 0.429433829273
Mean IoU : 	 0.0177808874804





Epoch [28/100] Loss: 1.5669
Epoch [28/100] Loss: 0.8667
Epoch [28/100] Loss: 1.1187


150it [00:43,  3.45it/s]

Overall Acc: 	 0.653273690683
Mean Acc : 	 0.0268134932689
FreqW Acc : 	 0.429431271632
Mean IoU : 	 0.0177762635583





Epoch [29/100] Loss: 1.1397
Epoch [29/100] Loss: 1.1614
Epoch [29/100] Loss: 1.1059


150it [00:43,  3.44it/s]

Overall Acc: 	 0.652761820023
Mean Acc : 	 0.0270482183523
FreqW Acc : 	 0.430699507458
Mean IoU : 	 0.018061055003





Epoch [30/100] Loss: 1.5653
Epoch [30/100] Loss: 0.9458
Epoch [30/100] Loss: 1.0779


150it [00:43,  3.46it/s]

Overall Acc: 	 0.652869357639
Mean Acc : 	 0.0270273756424
FreqW Acc : 	 0.430558922136
Mean IoU : 	 0.018049355971





Epoch [31/100] Loss: 1.2554
Epoch [31/100] Loss: 1.0958
Epoch [31/100] Loss: 0.8069


150it [00:43,  3.45it/s]


Overall Acc: 	 0.652376417824
Mean Acc : 	 0.0275335930891
FreqW Acc : 	 0.433317529644
Mean IoU : 	 0.0185907310551
Epoch [32/100] Loss: 1.2398
Epoch [32/100] Loss: 1.2843
Epoch [32/100] Loss: 0.9498


150it [00:43,  3.46it/s]


Overall Acc: 	 0.652769755498
Mean Acc : 	 0.0275189034577
FreqW Acc : 	 0.433403044663
Mean IoU : 	 0.0185941299994
Epoch [33/100] Loss: 1.4525
Epoch [33/100] Loss: 0.9354
Epoch [33/100] Loss: 0.9963


150it [00:43,  3.46it/s]

Overall Acc: 	 0.653205324074
Mean Acc : 	 0.0270141981504
FreqW Acc : 	 0.430642133451
Mean IoU : 	 0.0180115369768





Epoch [34/100] Loss: 0.9861
Epoch [34/100] Loss: 0.8281
Epoch [34/100] Loss: 0.8856


150it [00:43,  3.46it/s]

Overall Acc: 	 0.653019618056
Mean Acc : 	 0.0269051031739
FreqW Acc : 	 0.429920542767
Mean IoU : 	 0.0178904525364





Epoch [35/100] Loss: 0.9780
Epoch [35/100] Loss: 1.2094
Epoch [35/100] Loss: 0.9093


150it [00:43,  3.45it/s]

Overall Acc: 	 0.653144560185
Mean Acc : 	 0.0269215919011
FreqW Acc : 	 0.430050926308
Mean IoU : 	 0.0179098582504





Epoch [36/100] Loss: 1.5369
Epoch [36/100] Loss: 0.9935
Epoch [36/100] Loss: 1.3609


150it [00:43,  3.44it/s]


Overall Acc: 	 0.651484157986
Mean Acc : 	 0.0276660581753
FreqW Acc : 	 0.433967830262
Mean IoU : 	 0.0188013938657
Epoch [37/100] Loss: 1.3174
Epoch [37/100] Loss: 1.0423
Epoch [37/100] Loss: 1.2249


150it [00:43,  3.47it/s]

Overall Acc: 	 0.652720840567
Mean Acc : 	 0.0270856709967
FreqW Acc : 	 0.430868838404
Mean IoU : 	 0.0181106859566





Epoch [38/100] Loss: 1.4132
Epoch [38/100] Loss: 0.8572
Epoch [38/100] Loss: 1.4861


150it [00:43,  3.44it/s]

Overall Acc: 	 0.651441869213
Mean Acc : 	 0.0276524217824
FreqW Acc : 	 0.433677214494
Mean IoU : 	 0.0187499031453





Epoch [39/100] Loss: 0.9790
Epoch [39/100] Loss: 0.9165
Epoch [39/100] Loss: 0.9606


150it [00:43,  3.46it/s]

Overall Acc: 	 0.652887601273
Mean Acc : 	 0.026839713807
FreqW Acc : 	 0.42946709503
Mean IoU : 	 0.0178204032028





Epoch [40/100] Loss: 1.1901
Epoch [40/100] Loss: 1.2260
Epoch [40/100] Loss: 1.4818


150it [00:43,  3.44it/s]

Overall Acc: 	 0.653249204282
Mean Acc : 	 0.0269652935159
FreqW Acc : 	 0.430349672021
Mean IoU : 	 0.01796010656





Epoch [41/100] Loss: 1.1412
Epoch [41/100] Loss: 0.7418
Epoch [41/100] Loss: 0.8645


150it [00:43,  3.44it/s]


Overall Acc: 	 0.652253797743
Mean Acc : 	 0.0278530533123
FreqW Acc : 	 0.435071545544
Mean IoU : 	 0.0189450089178
Epoch [42/100] Loss: 0.9717
Epoch [42/100] Loss: 1.0652
Epoch [42/100] Loss: 1.2520


150it [00:43,  3.45it/s]

Overall Acc: 	 0.653306423611
Mean Acc : 	 0.0270176513002
FreqW Acc : 	 0.430659821448
Mean IoU : 	 0.0180192415613





Epoch [43/100] Loss: 1.6764
Epoch [43/100] Loss: 0.9270
Epoch [43/100] Loss: 0.9728


150it [00:43,  3.46it/s]


Overall Acc: 	 0.649362254051
Mean Acc : 	 0.0277700851815
FreqW Acc : 	 0.433324978486
Mean IoU : 	 0.0189569722062
Epoch [44/100] Loss: 0.8650


Process Process-1389:
Process Process-1384:
Process Process-1390:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/dongwonshin/.conda/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/dongwonshin/.conda/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/dongwonshin/.conda/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/home/dongwonshin/.conda/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/dongwonshin/.conda/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = index_queue.get()
  File "/home/dongwonshin/.conda/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap


RuntimeError: DataLoader worker (pid 26882) exited unexpectedly with exit code 1.