In [1]:
from __future__ import print_function, absolute_import
import gc
import sys
from torch.autograd import Variable
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler
torch.backends.cudnn.benchmark=True

import time
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
import random

In [2]:
# test CUDA available
print('CUDA enable: ', torch.cuda.is_available())
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print('current device: ', device)

CUDA enable:  True


NameError: name 'device' is not defined

In [2]:
# import dataset from ./lib/dataset.py
import lib.dataset as dataset

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename + '_latest.pth.tar')
    if is_best:
        shutil.copyfile(filename + '_latest.pth.tar', filename + '_best.pth.tar')


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [None]:
# Hyperparameter
BASE_LR = 0.01
EPOCH_DECAY = 10 # number of epochs after which the Learning rate is decayed exponentially.
DECAY_WEIGHT = 0.001

In [None]:
# training hyperparameters
batch_size = 16
train_val_ratio = 0.8

# pre setting device and data set length
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

ren_len = dataset.ren_dataset.__len__()
vox_len = dataset.vox_dataset.__len__()

In [None]:
# This function changes the learning rate over the training model.
def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DECAY):
    """Decay learning rate by a factor of DECAY_WEIGHT every lr_decay_epoch epochs."""
    lr = init_lr * (DECAY_WEIGHT**(epoch // lr_decay_epoch))

    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer

In [4]:
# start an epoch
# slice training and validation index
rand_idx = np.random.permutation(np.arange(min(ren_len,vox_len)))
thr = int(train_val_ratio*len(rand_idx))
train_idx = rand_idx[:thr]
val_idx = rand_idx[thr:]

for i in range(ren_len//batch_size):
    idx = train_idx[i*batch_size: (i+1)*batch_size]
    print(idx)
    render_loader, voxel_loader = dataset.get_train_data_loaders(idx)
    for it, (images, voxels) in enumerate(zip(render_loader, voxel_loader)):
        inputs=Variable(images[0])
        labels=Variable(voxels[0])
        print("matching: image = ", images[1], 'voxel = ',voxels[1])
        inputs = inputs.to(device)
        labels = labels.to(device)
    
    # test mode
    if i >3:
        break
    # test mode end
            

[37403 25742 27527 27895 27860  9526 42788 37721 25389  7588 12693 36383
 25394 24941  5439 34943]
matching: image =  tensor([37403, 25742, 27527, 27895, 27860,  9526, 42788, 37721, 25389,  7588,
        12693, 36383, 25394, 24941,  5439, 34943]) voxel =  tensor([37403, 25742, 27527, 27895, 27860,  9526, 42788, 37721, 25389,  7588,
        12693, 36383, 25394, 24941,  5439, 34943])
[11714 10591 34298 20655  3099 25104 31500 42511 30269 26848 35442 12160
 26979 39204 35912 40335]
matching: image =  tensor([11714, 10591, 34298, 20655,  3099, 25104, 31500, 42511, 30269, 26848,
        35442, 12160, 26979, 39204, 35912, 40335]) voxel =  tensor([11714, 10591, 34298, 20655,  3099, 25104, 31500, 42511, 30269, 26848,
        35442, 12160, 26979, 39204, 35912, 40335])
[16258 23812 20030 11554 30200 16067 28794 22299 33370  6314 30383  1675
   182  6343 14102 26371]
matching: image =  tensor([16258, 23812, 20030, 11554, 30200, 16067, 28794, 22299, 33370,  6314,
        30383,  1675,   182,  6343

In [None]:
def run(p=0):
    # Parameters
    num_epochs = 10 #origin = 10
    output_period = 100
    batch_size = 64 #origin = 100
    
    # setup the device for running
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#     model = models.resnet152(pretrained=False)
    model = resnet_18()
    model = model.to(device)#.half()
    
    model.drop = nn.Dropout(p)
    print(model.drop)
    

    train_loader, val_loader = dataset.get_data_loaders(batch_size)
    num_train_batches = len(train_loader)

    criterion = nn.CrossEntropyLoss().to(device)
    
    # TODO: optimizer is currently unoptimized
    # there's a lot of room for improvement/different optimizers
    optimizer = optim.SGD(model.parameters(), lr=BASE_LR,momentum=0.9,weight_decay=DECAY_WEIGHT)#,weight_decay=DECAY_WEIGHT,momentum=0.9) # SGD
    # Adam Torch：learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8
#     optimizer = optim.Adam(model.parameters(), lr=BASE_LR, betas=(0.9, 0.999), eps=1e-08, weight_decay=DECAY_WEIGHT)

    top1trset,top5trset = [],[]
    top1set,top5set = [],[]
    epoch = 1
    while epoch <= num_epochs:
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        
        running_loss = 0.0
        optimizer = exp_lr_scheduler(optimizer, epoch)
        for param_group in optimizer.param_groups:
            print('Current learning rate: ' + str(param_group['lr']))
            
        model.train()

        for batch_num, (inputs, labels) in enumerate(train_loader):
        
            inputs=Variable(inputs)
            labels=Variable(labels)
            inputs = inputs.to(device)#.half()
            labels = labels.to(device)

            outputs = model(inputs)
    
            outputs = outputs.float()
            loss = criterion(outputs, labels)
            
    
            # measure accuracy and record loss
            prec1, prec5 = accuracy(outputs.data, labels, topk=(1, 5))
            losses.update(loss.data.item(), inputs.size(0))
            top1.update(prec1.item(), inputs.size(0))
            top5.update(prec5.item(), inputs.size(0))
            
            optimizer.zero_grad()
#             model.float()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
            if batch_num % output_period == 0:
                print('[%d:%.2f] loss: %.3f' % (
                    epoch, batch_num*1.0/num_train_batches,
                    running_loss/output_period
                    ))
                running_loss = 0.0
                top1trset.append(100-top1.avg)
                top5trset.append(100-top5.avg)
                print('top1 training err (%)= '+str(top1trset[-1]))
                print('top5 training err (%)= '+str(top5trset[-1]))

                gc.collect()
                
            

        gc.collect()
        # save after every epoch
        torch.save(model.state_dict(), "models/model.%d" % epoch)

        # TODO: Calculate classification error and Top-5 Error
        # on training and validation datasets here
        
        val_loader,test_loader = dataset.get_val_test_loaders(32)
        
        model.eval()
        
        top1test = AverageMeter()
        top5test = AverageMeter()
        for i, data in enumerate(val_loader):
            img, labels = data
            x = Variable(img)
            y = Variable(labels)

            x = x.to(device)#.half()
            y = y.to(device)
            outs = model(x)
            #get top 5 output
            prec1test, prec5test = accuracy(outs.data, y, topk=(1, 5))
#             losses.update(loss.data.item(), inputs.size(0))
            top1test.update(prec1test.item(), x.size(0))
            top5test.update(prec5test.item(), x.size(0))
        
        top1set.append(100-top1test.avg)
        top5set.append(100-top5test.avg)
        print('top1 val err (%)= '+str(top1set[-1]))
        print('top5 val err (%)= '+str(top5set[-1]))
        
        gc.collect()
        epoch += 1
    return top1trset,top5trset,top1set,top5set

In [None]:
print('Starting training')
# SGD, w.o. WD, 3 fc = 35, dp = 0.5. 33
# SGD, w.o. WD=0.001, 3 fc = 3,random center in training, dp = 0.2
# SGD, w.o. WD=0.001, 3 fc = 3,random center in training, dp = 0.2, 35
# SGD, w.o. WD=0.001, 3 fc = 3,random center in training, dp = 0.2, BASE_LR = 0.1
# SGD, w.o. WD=0.001, 2 fc = 3,random center in training, dp = 0.2, BASE_LR = 0.1
BASE_LR = 0.001
top1trset,top5trset,top1set,top5set = run(0.2)
print('Training terminated')

In [None]:
# ADAM, w.o. WD=0.001, 2 fc = 3, dp = 0.2, BASE_LR = 0.001
top1trset,top5trset,top1set,top5set #~43 epoch 8

In [None]:
# ADAM, w.o. WD=0.001, 2 fc, dp = 0.2, BASE_LR = 0.01
top1trset,top5trset,top1set,top5set #~43 epoch 8

In [None]:
print('Starting training')
BASE_LR = 0.01
top1trset1,top5trset1,top1set1,top5set1 = run(0.2)
top1trset2,top5trset2,top1set2,top5set2 = run(0.5)
top1trset3,top5trset3,top1set3,top5set3 = run(0.8)
print('Training terminated')

In [None]:
print('Starting training')
BASE_LR = 0.001
top1trset0001,top5trset0001,top1set0001,top5set0001 = run(0)
BASE_LR = 0.0001
top1trset00001,top5trset00001,top1set00001,top5set00001 = run(0)
print('Training terminated')

In [None]:
# import the pre-trained model
model = resnet_18()
model_path = 'models/trained/model.25'
checkpoint = torch.load(model_path, map_location='cpu')
model.load_state_dict(checkpoint)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)#.half()


# write top 5 into result.txt
def test_accu(batch_size):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    val_loader,test_loader = dataset.get_val_test_loaders(batch_size)
    top1_count,top5_count = 0.0,0.0
    print('number of test set: ' + str(len(test_loader)))
    
    result = open("result.txt","w")
    
#     model.eval()
        
#     top1test = AverageMeter()
#     top5test = AverageMeter()
#     for i, data in enumerate(val_loader):
#         img, labels = data
#         x = Variable(img)
#         y = Variable(labels)

#         x = x.to(device)#.half()
#         y = y.to(device)
#         outs = model(x)
#         #get top 5 output
#         prec1test, prec5test = accuracy(outs.data, y, topk=(1, 5))
# #             losses.update(loss.data.item(), inputs.size(0))
#         top1test.update(prec1test.item(), x.size(0))
#         top5test.update(prec5test.item(), x.size(0))

#     top1set.append(100-top1test.avg)
#     top5set.append(100-top5test.avg)
#     print('top1 val err (%)= '+str(top1set[-1]))
#     print('top5 val err (%)= '+str(top5set[-1]))

#     gc.collect()
#     epoch += 1

    model.eval()
    for i, data in enumerate(test_loader, 0):
        img, labels = data
        x = Variable(img)
        y = Variable(labels)
        if torch.cuda.is_available():
            x = x.cuda()#.half()
            y = y.cuda()
        outs = model(x)
        _, pred1 = torch.max(outs, -1)
        _, pred5 = torch.topk(outs, 5)
        
        for b in range(batch_size):
            n = int(i*batch_size+b+1)
            result.write('test/' + str(n).zfill(8)+'.jpg '+' '.join(str(int(e)) for e in pred5[b].tolist())+'\n')
    
    # overwrite the result.txt every run
    result.truncate()
    result.close()
    
# Execute
test_accu(25)
print('completed!')

In [None]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = args.lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1, top5=top5))
        return losses,top1,top5

In [None]:
import numpy as np
import matplotlib.pyplot as plt
shist1 = [np.array(h) for h in top1trset]
shist2 = [np.array(h) for h in top1set]
# shist3 = [np.array(h) for h in top5set0001]
# shist4 = [np.array(h) for h in top5set00001]
num_epochs=10

plt.title("Validation vs. Training Error")
plt.xlabel("Training Epochs")
plt.ylabel("Error")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist1)),shist1,label="Training")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist2)),shist2,label="Validation")
# plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist3)),shist3,label="lr=0.001")
# plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist4)),shist4,label="lr=0.0001")
# plt.ylim((0,1.))
# plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
shist1 = [np.array(h) for h in top5set001]
shist2 = [np.array(h) for h in top5set1]
shist3 = [np.array(h) for h in top5set2]
shist4 = [np.array(h) for h in top5set3]
num_epochs=10

plt.title("Validation Accuracy vs. Dropout rate")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Error")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist1)),shist1,label="dropout=0")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist2)),shist2,label="dropout=0.2")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist3)),shist3,label="dropout=0.5")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist4)),shist4,label="dropout=0.8")
# plt.ylim((0,1.))
# plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
shist1 = [np.array(h) for h in top5trset001]
shist2 = [np.array(h) for h in top5trset1]
shist3 = [np.array(h) for h in top5trset2]
shist4 = [np.array(h) for h in top5trset3]
num_epochs=10

plt.title("Validation Accuracy vs. Dropout rate")
plt.xlabel("Training Epochs")
plt.ylabel("Training Error")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist1)),shist1,label="dropout=0")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist2)),shist2,label="dropout=0.2")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist3)),shist3,label="dropout=0.5")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist4)),shist4,label="dropout=0.8")
# plt.ylim((0,1.))
# plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
shist1 = [np.array(h) for h in top5trset0]
shist2 = [np.array(h) for h in top5trset001]
shist3 = [np.array(h) for h in top5trset0001]
shist4 = [np.array(h) for h in top5trset00001]
num_epochs=10

plt.title("Validation Accuracy vs. Learning rate")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist1)),shist1,label="lr=0.1")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist2)),shist2,label="lr=0.01")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist3)),shist3,label="lr=0.001")
plt.plot(np.arange(1,num_epochs+1,num_epochs/len(shist4)),shist4,label="lr=0.0001")
# plt.ylim((0,1.))
# plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

In [None]:
range(1,10,2)

In [None]:
top1trset,top5trset,top1set,top5set

In [None]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    return losses,top1,top5


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename + '_latest.pth.tar')
    if is_best:
        shutil.copyfile(filename + '_latest.pth.tar', filename + '_best.pth.tar')