In [2]:
%matplotlib inline
import os
import collections
import torch
import torchvision
import numpy as np
import scipy.misc as m
import scipy.io as io
import matplotlib.pyplot as plt

from SUNRGBDLoader import *

os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [3]:
import sys, os
import torch
import visdom
import argparse
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

from torch.autograd import Variable
from torch.utils import data
from tqdm import tqdm

from ptsemseg.models import get_model
from ptsemseg.loader import get_loader, get_data_path
from ptsemseg.metrics import runningScore
from ptsemseg.loss import *
from ptsemseg.augmentations import *


parser = argparse.ArgumentParser(description='Hyperparams')
parser.add_argument('--arch', nargs='?', type=str, default='fcn8s', 
                    help='Architecture to use [\'fcn8s, unet, segnet etc\']')
parser.add_argument('--img_rows', nargs='?', type=int, default=256, 
                    help='Height of the input image')
parser.add_argument('--img_cols', nargs='?', type=int, default=256, 
                    help='Width of the input image')

parser.add_argument('--img_norm', dest='img_norm', action='store_true', 
                    help='Enable input image scales normalization [0, 1] | True by default')
parser.add_argument('--no-img_norm', dest='img_norm', action='store_false', 
                    help='Disable input image scales normalization [0, 1] | True by default')
parser.set_defaults(img_norm=True)

parser.add_argument('--n_epoch', nargs='?', type=int, default=100, 
                    help='# of the epochs')
parser.add_argument('--batch_size', nargs='?', type=int, default=1, 
                    help='Batch Size')
parser.add_argument('--l_rate', nargs='?', type=float, default=1e-5, 
                    help='Learning Rate')
parser.add_argument('--feature_scale', nargs='?', type=int, default=1, 
                    help='Divider for # of features to use')
parser.add_argument('--resume', nargs='?', type=str, default=None,    
                    help='Path to previous saved model to restart from')

parser.add_argument('--visdom', dest='visdom', action='store_true', 
                    help='Enable visualization(s) on visdom | False by default')
parser.add_argument('--no-visdom', dest='visdom', action='store_false', 
                    help='Disable visualization(s) on visdom | False by default')
parser.set_defaults(visdom=False)


In [4]:
args = parser.parse_args('--arch unet --batch_size 3 --img_rows 572 --img_cols 572'.split(' '))

# Setup Augmentations
data_aug= Compose([RandomRotate(10),                                        
                   RandomHorizontallyFlip()])

# Setup Dataloader
data_path = '/home/dongwonshin/Desktop/Datasets/SUNRGBD/SUNRGBD(meta)/'
t_loader = SUNRGBDLoader(data_path, is_transform=True)
v_loader = SUNRGBDLoader(data_path, is_transform=True, split='val')

n_classes = t_loader.n_classes
trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=16, shuffle=True)
valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=16)

# Setup Metrics
running_metrics = runningScore(n_classes)

# Setup visdom for visualization
if args.visdom:
    vis = visdom.Visdom()

    loss_window = vis.line(X=torch.zeros((1,)).cpu(),
                       Y=torch.zeros((1)).cpu(),
                       opts=dict(xlabel='minibatches',
                                 ylabel='Loss',
                                 title='Training Loss',
                                 legend=['Loss']))

# Setup Model
model = get_model(args.arch, n_classes)

model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
model.cuda()

# Check if model has custom optimizer / loss
if hasattr(model.module, 'optimizer'):
    optimizer = model.module.optimizer
else:
    optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4)

if hasattr(model.module, 'loss'):
    print('Using custom loss')
    loss_fn = model.module.loss
else:
    loss_fn = cross_entropy2d

if args.resume is not None:                                         
    if os.path.isfile(args.resume):
        print("Loading model and optimizer from checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['model_state'])
        optimizer.load_state_dict(checkpoint['optimizer_state'])
        print("Loaded checkpoint '{}' (epoch {})"                    
              .format(args.resume, checkpoint['epoch']))
    else:
        print("No checkpoint found at '{}'".format(args.resume)) 


# Training

In [5]:
best_iou = -100.0 
for epoch in range(args.n_epoch):
    model.train()
    for i, (color_imgs, depth_imgs, label_imgs) in enumerate(trainloader):
        images = Variable(color_imgs.cuda())
        labels = Variable(label_imgs.cuda())

        optimizer.zero_grad()
        outputs = model(images)

        loss = loss_fn(input=outputs, target=labels)

        loss.backward()
        optimizer.step()

        if args.visdom:
            vis.line(
                X=torch.ones((1, 1)).cpu() * i,
                Y=torch.Tensor([loss.data[0]]).unsqueeze(0).cpu(),
                win=loss_window,
                update='append')

        if (i+1) % 100 == 0:
            print("Epoch [%d/%d] Loss: %.4f" % (epoch+1, args.n_epoch, loss.data[0]))

    model.eval()
    for i_val, (color_images_val, depth_images_val, label_images_val) in tqdm(enumerate(valloader)):
        color_images_val = Variable(color_images_val.cuda(), volatile=True)
        label_images_val = Variable(label_images_val.cuda(), volatile=True)

        outputs = model(color_images_val)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = label_images_val.data.cpu().numpy()
        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()
    for k, v in score.items():
        print(k, v)
    running_metrics.reset()

    if score['Mean IoU : \t'] >= best_iou:
        best_iou = score['Mean IoU : \t']
        state = {'epoch': epoch+1,
                 'model_state': model.state_dict(),
                 'optimizer_state' : optimizer.state_dict(),}
        torch.save(state, "{}_{}_best_model.pkl".format(args.arch, 'SUNRGBD'))

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 80 and 79 in dimension 2 at /opt/conda/conda-bld/pytorch_1522182087074/work/torch/lib/THC/generic/THCTensorMath.cu:111

In [24]:
class unetConv2(nn.Module):
    def __init__(self, in_size, out_size, is_batchnorm):
        super(unetConv2, self).__init__()

        if is_batchnorm:
            self.conv1 = nn.Sequential(nn.Conv2d(in_size, out_size, 3, 1, 0),
                                       nn.BatchNorm2d(out_size),
                                       nn.ReLU(),)
            self.conv2 = nn.Sequential(nn.Conv2d(out_size, out_size, 3, 1, 0),
                                       nn.BatchNorm2d(out_size),
                                       nn.ReLU(),)
        else:
            self.conv1 = nn.Sequential(nn.Conv2d(in_size, out_size, 3, 1, 0),
                                       nn.ReLU(),)
            self.conv2 = nn.Sequential(nn.Conv2d(out_size, out_size, 3, 1, 0),
                                       nn.ReLU(),)
    def forward(self, inputs):
        outputs = self.conv1(inputs)
        outputs = self.conv2(outputs)
        return outputs

In [25]:
class unetUp(nn.Module):
    def __init__(self, in_size, out_size, is_deconv):
        super(unetUp, self).__init__()
        self.conv = unetConv2(in_size, out_size, False)
        if is_deconv:
            self.up = nn.ConvTranspose2d(in_size, out_size, kernel_size=2, stride=2)
        else:
            self.up = nn.UpsamplingBilinear2d(scale_factor=2)

    def forward(self, inputs1, inputs2):
        outputs2 = self.up(inputs2)
        offset = outputs2.size()[2] - inputs1.size()[2]
        padding = 2 * [offset // 2, offset // 2]
        outputs1 = F.pad(inputs1, padding)
        print(outputs1, outputs2)
        return self.conv(torch.cat([outputs1, outputs2], 1))

In [40]:
feature_scale=4
in_channels = 3
is_batchnorm=True
is_deconv=True
filters = [64, 128, 256, 512, 1024]
filters = [int(x / feature_scale) for x in filters]

In [41]:
conv1 = unetConv2(in_channels, filters[0], is_batchnorm)
maxpool1 = nn.MaxPool2d(kernel_size=2)

conv2 = unetConv2(filters[0], filters[1], is_batchnorm)
maxpool2 = nn.MaxPool2d(kernel_size=2)

conv3 = unetConv2(filters[1], filters[2], is_batchnorm)
maxpool3 = nn.MaxPool2d(kernel_size=2)

conv4 = unetConv2(filters[2], filters[3], is_batchnorm)
maxpool4 = nn.MaxPool2d(kernel_size=2)

center = unetConv2(filters[3], filters[4], is_batchnorm)

# upsampling
up_concat4 = unetUp(filters[4], filters[3], is_deconv)
up_concat3 = unetUp(filters[3], filters[2], is_deconv)
up_concat2 = unetUp(filters[2], filters[1], is_deconv)
up_concat1 = unetUp(filters[1], filters[0], is_deconv)

# final conv (without any concat)
final = nn.Conv2d(filters[0], n_classes, 1)

In [47]:
color_imgs, depth_imgs, label_imgs = iter(trainloader).next()
color_imgs = Variable(color_imgs)

conv1 = conv1(color_imgs)
maxpool1 = maxpool1(conv1)

conv2 = conv2(maxpool1)
maxpool2 = maxpool2(conv2)

conv3 = conv3(maxpool2)
maxpool3 = maxpool3(conv3)

conv4 = conv4(maxpool3)
maxpool4 = maxpool4(conv4)

center = center(maxpool4)
up4 = up_concat4(conv4, center)

TypeError: 'Variable' object is not callable

In [46]:
up3 = up_concat3(conv3, up4)
up2 = up_concat2(conv2, up3)
up1 = up_concat1(conv1, up2)

final = final(up1)

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 79 and 80 in dimension 2 at /opt/conda/conda-bld/pytorch_1522182087074/work/torch/lib/TH/generic/THTensorMath.c:2897