In [1]:
from __future__ import print_function, absolute_import
import gc
import sys
from torch.autograd import Variable
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler
torch.backends.cudnn.benchmark=True

import time
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
import random
import cv2
from PIL import Image


In [2]:
# test CUDA available
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
print('CUDA enable: ', torch.cuda.is_available())


CUDA enable:  True


In [3]:
# import dataset from ./lib/dataset.py
import lib.dataset as dataset
from models.__init__ import load_model
from lib.config import cfg
from lib.solver import Solver


In [4]:
# Hyperparameter
BASE_LR = cfg.TRAIN.DEFAULT_LEARNING_RATE
EPOCH_DECAY = 10 # number of epochs after which the Learning rate is decayed exponentially.
DECAY_WEIGHT = cfg.TRAIN.WEIGHT_DECAY
cfg.CONST.BATCH_SIZE = 16

In [5]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [6]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename + '_latest.pth.tar')
    if is_best:
        shutil.copyfile(filename + '_latest.pth.tar', filename + '_best.pth.tar')


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


In [7]:
# training hyperparameters
batch_size = cfg.CONST.BATCH_SIZE
train_val_ratio = cfg.TRAIN.DATASET_PORTION[1]

ren_len = dataset.ren_dataset.__len__()
vox_len = dataset.vox_dataset.__len__()
print(ren_len,vox_len)

dict_ren1 = dataset.ren_dataset.class_to_idx
list_ren = [[]]*(vox_len+1)

for (path, idx) in dataset.ren_dataset.samples:
    list_ren[idx] = list(set(list_ren[idx]))
    list_ren[idx].append(path)


dict_vox = {idx:path for (path, idx) in dataset.vox_dataset.samples}
# print(list_ren[202][2])
# print(dataset.center_crop(Image.open(list_ren[202][2])))
# print(dict_vox[202])


1050816 43783


In [8]:
# This function changes the learning rate over the training model.
def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DECAY):
    """Decay learning rate by a factor of DECAY_WEIGHT every lr_decay_epoch epochs."""
    lr = init_lr * (DECAY_WEIGHT**(epoch // lr_decay_epoch))

    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer


In [9]:
def testDataInput():
    NetClass = load_model(cfg.CONST.NETWORK_CLASS)
    # print('Network definition: \n')
    net = NetClass()
    # print(net)

    # start an epoch
    # slice training and validation index
    rand_idx = np.random.permutation(np.arange(vox_len))
    thr = int(train_val_ratio*len(rand_idx))
    train_idx = rand_idx[:thr]
    val_idx = rand_idx[thr:]

    batch_size = 4
    max_num_views = 5

    dict_vox = {idx:path for (path, idx) in dataset.vox_dataset.samples}


    for i in range(thr//batch_size):

        # for each batch
        num_views = random.randint(2,max_num_views)

        idx = train_idx[i*batch_size: (i+1)*batch_size]
        voxel_loader = dataset.get_vox_data_loaders(idx)

        label_list = []
        for it, (labels, model_ids) in enumerate(voxel_loader):

            batch_image = []
            for model_id in model_ids:
                image_list = []
                image_ids = np.random.choice(cfg.TRAIN.NUM_RENDERING, num_views)
    #             print(image_ids)
                for n_view in range(num_views):
                    image_list.append(dataset.center_crop(Image.open(list_ren[(model_id).item()][image_ids[n_view]]))[:3])

                image_1 = torch.stack(image_list , dim=0)
                batch_image.append(image_1)
    #             print(image_1.shape)
            batch_image = torch.stack(batch_image,dim=0)
            batch_image = batch_image.transpose(1,0)
    #         batch_image = batch_image.transpose(4,2)
    #         batch_image = batch_image.transpose(4,3)

            labels0 = (labels < 1)        
            batch_voxel = torch.stack((labels0.float(),labels.float()),dim=0)
            batch_voxel = batch_voxel.transpose(1,0)        

            inputs=Variable(batch_image)
            labels=Variable(batch_voxel)
            inputs = inputs.to(device)
            labels = labels.to(device)

        print(inputs.shape)
        print(inputs[0].shape)
        print(labels.shape)
        # test mode
        if i ==3:
            break
        # test mode end            

In [10]:
def run():
    # Parameters
    num_epochs = 10
    output_period = 100
    batch_size = cfg.CONST.BATCH_SIZE
    
    # setup the device for running
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    
    NetClass = load_model(cfg.CONST.NETWORK_CLASS)
    model = NetClass().to(device)
    
    criterion = nn.CrossEntropyLoss().to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=cfg.TRAIN.DEFAULT_LEARNING_RATE,weight_decay=cfg.TRAIN.WEIGHT_DECAY)
    top1trset,top5trset = [],[]
    top1set,top5set = [],[]
    epoch = 1
    while epoch <= num_epochs:
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        
        running_loss = 0.0
        optimizer = exp_lr_scheduler(optimizer, epoch)
        for param_group in optimizer.param_groups:
            print('Current learning rate: ' + str(param_group['lr']))
            
        model.train()

        # start an epoch
        # slice training and validation index
        rand_idx = np.random.permutation(np.arange(vox_len))
        thr = int(train_val_ratio*len(rand_idx))
        train_idx = rand_idx[:thr]
        val_idx = rand_idx[thr:]

        max_num_views = 5

        dict_vox = {idx:path for (path, idx) in dataset.vox_dataset.samples}

        num_train_batches = thr//batch_size
        for i in range(num_train_batches):

            # for each batch
            num_views = random.randint(2,max_num_views)

            idx = train_idx[i*batch_size: (i+1)*batch_size]
#             print(idx)
            voxel_loader = dataset.get_vox_data_loaders(idx)

            label_list = []
            for it, (labels, model_ids) in enumerate(voxel_loader):

                batch_image = []
                for model_id in model_ids:
                    image_list = []
                    image_ids = np.random.choice(cfg.TRAIN.NUM_RENDERING, num_views)
        #             print(image_ids)
                    for n_view in range(num_views):
                        image_list.append(dataset.center_crop(Image.open(list_ren[(model_id).item()][image_ids[n_view]]))[:3])

                    image_1 = torch.stack(image_list , dim=0)
                    batch_image.append(image_1)
        #             print(image_1.shape)
                batch_image = torch.stack(batch_image,dim=0)
                batch_image = batch_image.transpose(1,0)

                labels0 = (labels < 1)        
                batch_voxel = torch.stack((labels0.float(),labels.float()),dim=0)
                batch_voxel = batch_voxel.transpose(1,0)        

                inputs=Variable(batch_image)
                labels=Variable(labels)
                inputs = inputs.to(device)
                labels = labels.to(device).long()
                
                outputs = model(inputs,test=True)
                
#                 print('outputs[0].shape: ',outputs[0].shape)
#                 print('labels.shape: ',labels.shape)
                loss = criterion(outputs[0], labels)
            
    
                # measure accuracy and record loss
#                 prec1 = accuracy(outputs[0].data, labels, topk=(1,))
                losses.update(loss.data.item(), inputs.size(0))
#                 top1.update(prec1.item(), inputs.size(0))

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            
                running_loss += loss.item()
        
                if i % output_period == 0:
                    print('[%d:%.2f] loss: %.3f' % (
                        epoch, i*1.0/num_train_batches,
                        running_loss/output_period
                        ))
                    running_loss = 0.0

                    gc.collect()
                
            

        gc.collect()
        # save after every epoch
        torch.save(model.state_dict(), "models/model.%d" % epoch)
        
        
        model.eval()
        
        batch_size_val = batch_size
        for i in range((len(rand_idx)-thr)//batch_size_val):
            # for each batch
            num_views = 1

            idx = val_idx[i*batch_size_val: (i+1)*batch_size_val]
            voxel_loader = dataset.get_vox_data_loaders(idx)

            label_list = []
            for it, (labels, model_ids) in enumerate(voxel_loader):

                batch_image = []
                for model_id in model_ids:
                    image_list = []
                    image_ids = np.random.choice(cfg.TRAIN.NUM_RENDERING, num_views)
                    for n_view in range(num_views):
                        image_list.append(dataset.center_crop(Image.open(list_ren[(model_id).item()][image_ids[n_view]]))[:3])

                    image_1 = torch.stack(image_list , dim=0)
                    batch_image.append(image_1)

                batch_image = torch.stack(batch_image,dim=0)
                batch_image = batch_image.transpose(1,0)

                labels0 = (labels < 1)        
                batch_voxel = torch.stack((labels0.float(),labels.float()),dim=0)
                batch_voxel = batch_voxel.transpose(1,0)        

                inputs=Variable(batch_image)
                labels=Variable(labels)
                inputs = inputs.to(device)
                labels = labels.to(device).long()

                outputs = model(inputs)
                loss = criterion(outputs[0], labels)
    
                # measure accuracy and record loss
#                 prec1 = accuracy(outputs[0].data, labels, topk=(1,))
                losses.update(loss.data.item(), inputs.size(0))
#                 top1.update(prec1.item(), inputs.size(0))
            
        

        print('test loss = '+ losses.avg)
        
        gc.collect()
        epoch += 1

In [11]:
print('Starting training')

BASE_LR = cfg.TRAIN.DEFAULT_LEARNING_RATE
run()
print('Training terminated')


Starting training
cuda:0

Your Model is "ResidualGRUNet" Initializing

Initializing "Encoder"

Initializing "Decoder"
Current learning rate: 0.0001
[1:0.00] loss: 0.007
[1:0.05] loss: 0.420
[1:0.09] loss: 0.393
[1:0.14] loss: 0.394
[1:0.18] loss: 0.387
[1:0.23] loss: 0.388
[1:0.27] loss: 0.391
[1:0.32] loss: 0.390
[1:0.37] loss: 0.390
[1:0.41] loss: 0.392
[1:0.46] loss: 0.388
[1:0.50] loss: 0.387
[1:0.55] loss: 0.387
[1:0.59] loss: 0.387
[1:0.64] loss: 0.390
[1:0.69] loss: 0.385
[1:0.73] loss: 0.385
[1:0.78] loss: 0.388
[1:0.82] loss: 0.388
[1:0.87] loss: 0.383
[1:0.91] loss: 0.385
[1:0.96] loss: 0.384


TypeError: must be str, not float