In [2]:
from __future__ import print_function, absolute_import
import gc
import sys
from torch.autograd import Variable
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler
torch.backends.cudnn.benchmark=True

import time
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
import random

In [3]:
# test CUDA available
print('CUDA enable: ', torch.cuda.is_available())

CUDA enable:  True


In [4]:
# import dataset from ./lib/dataset.py
import lib.dataset as dataset
from models.__init__ import load_model
from lib.config import cfg


In [7]:
# Hyperparameter
BASE_LR = 0.01
EPOCH_DECAY = 10 # number of epochs after which the Learning rate is decayed exponentially.
DECAY_WEIGHT = 0.001
cfg.CONST.IMG_W = 137
cfg.CONST.IMG_H = 137
cfg.CONST.N_VOX = 32
cfg.CONST.N_VIEWS = 4
from lib.solver import Solver

In [5]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [6]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename + '_latest.pth.tar')
    if is_best:
        shutil.copyfile(filename + '_latest.pth.tar', filename + '_best.pth.tar')


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [8]:
# training hyperparameters
batch_size = 16
train_val_ratio = 0.8

# pre setting device and data set length
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

ren_len = dataset.ren_dataset.__len__()
vox_len = dataset.vox_dataset.__len__()

In [9]:
# This function changes the learning rate over the training model.
def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DECAY):
    """Decay learning rate by a factor of DECAY_WEIGHT every lr_decay_epoch epochs."""
    lr = init_lr * (DECAY_WEIGHT**(epoch // lr_decay_epoch))

    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer

In [None]:
NetClass = load_model(cfg.CONST.NETWORK_CLASS)
# print('Network definition: \n')
net = NetClass()
# print(net)

# start an epoch
# slice training and validation index
rand_idx = np.random.permutation(np.arange(min(ren_len,vox_len)))
thr = int(train_val_ratio*len(rand_idx))
train_idx = rand_idx[:thr]
val_idx = rand_idx[thr:]

for i in range(ren_len//batch_size):
    idx = train_idx[i*batch_size: (i+1)*batch_size]
    print(idx)
    render_loader, voxel_loader = dataset.get_train_data_loaders(idx)
    for it, (images, voxels) in enumerate(zip(render_loader, voxel_loader)):
        inputs=Variable(torch.cat(images[0]))
        labels=Variable(voxels[0])
        print("matching: image = ", images[1], 'voxel = ',voxels[1])
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        print(inputs.shape)
    
    # test mode
    if i >3:
        break
    # test mode end
            

In [10]:
def run():
    # Parameters
    num_epochs = 10
    output_period = 100
    batch_size = 4
    
    # setup the device for running
    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
    
    NetClass = load_model(cfg.CONST.NETWORK_CLASS)
    model = NetClass().to(device)
    
    embed = nn.Embedding(3, 96).to(device)
    
    criterion = nn.CrossEntropyLoss().to(device)
    
    optimizer = optim.SGD(model.parameters(), lr=BASE_LR,momentum=0.9,weight_decay=DECAY_WEIGHT)
    top1trset,top5trset = [],[]
    top1set,top5set = [],[]
    epoch = 1
    while epoch <= num_epochs:
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        
        running_loss = 0.0
        optimizer = exp_lr_scheduler(optimizer, epoch)
        for param_group in optimizer.param_groups:
            print('Current learning rate: ' + str(param_group['lr']))
            
        model.train()

        # start an epoch
        # slice training and validation index
        rand_idx = np.random.permutation(np.arange(min(ren_len,vox_len)))
        thr = int(train_val_ratio*len(rand_idx))
        train_idx = rand_idx[:thr]
        val_idx = rand_idx[thr:]

        for i in range(thr//batch_size):
            idx = train_idx[i*batch_size: (i+1)*batch_size]
            print(idx)
            render_loader, voxel_loader = dataset.get_train_data_loaders(idx)
            
#             print('render_loader',render_loader.shape)
            for it, (images, voxels) in enumerate(zip(render_loader, voxel_loader)):
                inputs=Variable(images[0])
                labels=Variable(voxels[0])
#                 print("matching: image = ", images[1], 'voxel = ',voxels[1])
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                print('inputs',inputs.shape)
                print('labels',labels.shape)
                
                outputs = model(torch.stack((inputs,inputs)))
                loss = criterion(outputs, labels)
            
    
            # measure accuracy and record loss
            prec1 = accuracy(outputs.data, labels, topk=(1,))
            losses.update(loss.data.item(), inputs.size(0))
            top1.update(prec1.item(), inputs.size(0))
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
            if batch_num % output_period == 0:
                print('[%d:%.2f] loss: %.3f' % (
                    epoch, batch_num*1.0/num_train_batches,
                    running_loss/output_period
                    ))
                running_loss = 0.0
                top1trset.append(100-top1.avg)
                print('top1 training err (%)= '+str(top1trset[-1]))

                gc.collect()
                
            

        gc.collect()
        # save after every epoch
        torch.save(model.state_dict(), "models/model.%d" % epoch)
        
        
        model.eval()
        
        top1test = AverageMeter()
        top5test = AverageMeter()
        
        for i in range((len(rand_idx)-thr)//batch_size):
            idx = val_idx[i*batch_size: (i+1)*batch_size]
            print(idx)
            render_loader, voxel_loader = dataset.get_train_data_loaders(idx)
            for it, (images, voxels) in enumerate(zip(render_loader, voxel_loader)):
                inputs=Variable(images[0])
                labels=Variable(voxels[0])
#                 print("matching: image = ", images[1], 'voxel = ',voxels[1])
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
#                 loss = criterion(outputs, labels)
            
    
            # measure accuracy and record loss
            prec1 = accuracy(outputs.data, labels, topk=(1,))
            losses.update(loss.data.item(), inputs.size(0))
            top1.update(prec1.item(), inputs.size(0))
            
        
        top1set.append(100-top1test.avg)
        print('top1 val err (%)= '+str(top1set[-1]))
        
        gc.collect()
        epoch += 1
    return top1trset,top1set

In [13]:
36 * 128 * 4 * 4 * 4 / 32768

9.0

In [11]:
print('Starting training')

BASE_LR = 0.001
top1trset,top1set = run()
print('Training terminated')

Starting training

Your Model is "ResidualGRUNet" Initializing

Initializing "Encoder"

Initializing "Decoder"
Current learning rate: 0.01
[27866 27261 30875 22757]
inputs torch.Size([4, 3, 137, 137])
labels torch.Size([4, 32, 32, 32])


RuntimeError: invalid argument 2: size '[36 x 128 x 4 x 4 x 4]' is invalid for input with 32768 elements at /opt/conda/conda-bld/pytorch_1535491974311/work/aten/src/TH/THStorage.cpp:84

In [None]:
# ADAM, w.o. WD=0.001, 2 fc = 3, dp = 0.2, BASE_LR = 0.001
top1trset,top5trset,top1set,top5set #~43 epoch 8

In [None]:
# ADAM, w.o. WD=0.001, 2 fc, dp = 0.2, BASE_LR = 0.01
top1trset,top5trset,top1set,top5set #~43 epoch 8

In [None]:
print('Starting training')
BASE_LR = 0.01
top1trset1,top5trset1,top1set1,top5set1 = run(0.2)
top1trset2,top5trset2,top1set2,top5set2 = run(0.5)
top1trset3,top5trset3,top1set3,top5set3 = run(0.8)
print('Training terminated')

In [None]:
print('Starting training')
BASE_LR = 0.001
top1trset0001,top5trset0001,top1set0001,top5set0001 = run(0)
BASE_LR = 0.0001
top1trset00001,top5trset00001,top1set00001,top5set00001 = run(0)
print('Training terminated')

In [None]:
# import the pre-trained model
model = resnet_18()
model_path = 'models/trained/model.25'
checkpoint = torch.load(model_path, map_location='cpu')
model.load_state_dict(checkpoint)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)#.half()


# write top 5 into result.txt
def test_accu(batch_size):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    val_loader,test_loader = dataset.get_val_test_loaders(batch_size)
    top1_count,top5_count = 0.0,0.0
    print('number of test set: ' + str(len(test_loader)))
    
    result = open("result.txt","w")
    
#     model.eval()
        
#     top1test = AverageMeter()
#     top5test = AverageMeter()
#     for i, data in enumerate(val_loader):
#         img, labels = data
#         x = Variable(img)
#         y = Variable(labels)

#         x = x.to(device)#.half()
#         y = y.to(device)
#         outs = model(x)
#         #get top 5 output
#         prec1test, prec5test = accuracy(outs.data, y, topk=(1, 5))
# #             losses.update(loss.data.item(), inputs.size(0))
#         top1test.update(prec1test.item(), x.size(0))
#         top5test.update(prec5test.item(), x.size(0))

#     top1set.append(100-top1test.avg)
#     top5set.append(100-top5test.avg)
#     print('top1 val err (%)= '+str(top1set[-1]))
#     print('top5 val err (%)= '+str(top5set[-1]))

#     gc.collect()
#     epoch += 1

    model.eval()
    for i, data in enumerate(test_loader, 0):
        img, labels = data
        x = Variable(img)
        y = Variable(labels)
        if torch.cuda.is_available():
            x = x.cuda()#.half()
            y = y.cuda()
        outs = model(x)
        _, pred1 = torch.max(outs, -1)
        _, pred5 = torch.topk(outs, 5)
        
        for b in range(batch_size):
            n = int(i*batch_size+b+1)
            result.write('test/' + str(n).zfill(8)+'.jpg '+' '.join(str(int(e)) for e in pred5[b].tolist())+'\n')
    
    # overwrite the result.txt every run
    result.truncate()
    result.close()
    
# Execute
test_accu(25)
print('completed!')

In [None]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = args.lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1, top5=top5))
        return losses,top1,top5

In [None]:
import numpy as np
import matplotlib.pyplot as plt
shist1 = [np.array(h) for h in top1trset]
shist2 = [np.array(h) for h in top1set]
# shist3 = [np.array(h) for h in top5set0001]
# shist4 = [np.array(h) for h in top5set00001]
num_epochs=10

plt.title("Validation vs. Training Error")
plt.xlabel("Training Epochs")
plt.ylabel("Error")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist1)),shist1,label="Training")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist2)),shist2,label="Validation")
# plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist3)),shist3,label="lr=0.001")
# plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist4)),shist4,label="lr=0.0001")
# plt.ylim((0,1.))
# plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
shist1 = [np.array(h) for h in top5set001]
shist2 = [np.array(h) for h in top5set1]
shist3 = [np.array(h) for h in top5set2]
shist4 = [np.array(h) for h in top5set3]
num_epochs=10

plt.title("Validation Accuracy vs. Dropout rate")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Error")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist1)),shist1,label="dropout=0")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist2)),shist2,label="dropout=0.2")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist3)),shist3,label="dropout=0.5")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist4)),shist4,label="dropout=0.8")
# plt.ylim((0,1.))
# plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
shist1 = [np.array(h) for h in top5trset001]
shist2 = [np.array(h) for h in top5trset1]
shist3 = [np.array(h) for h in top5trset2]
shist4 = [np.array(h) for h in top5trset3]
num_epochs=10

plt.title("Validation Accuracy vs. Dropout rate")
plt.xlabel("Training Epochs")
plt.ylabel("Training Error")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist1)),shist1,label="dropout=0")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist2)),shist2,label="dropout=0.2")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist3)),shist3,label="dropout=0.5")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist4)),shist4,label="dropout=0.8")
# plt.ylim((0,1.))
# plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
shist1 = [np.array(h) for h in top5trset0]
shist2 = [np.array(h) for h in top5trset001]
shist3 = [np.array(h) for h in top5trset0001]
shist4 = [np.array(h) for h in top5trset00001]
num_epochs=10

plt.title("Validation Accuracy vs. Learning rate")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist1)),shist1,label="lr=0.1")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist2)),shist2,label="lr=0.01")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist3)),shist3,label="lr=0.001")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist4)),shist4,label="lr=0.0001")
# plt.ylim((0,1.))
# plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

In [None]:
range(1,10,2)

In [None]:
top1trset,top5trset,top1set,top5set

In [None]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    return losses,top1,top5


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename + '_latest.pth.tar')
    if is_best:
        shutil.copyfile(filename + '_latest.pth.tar', filename + '_best.pth.tar')