# Torch
## Check GPU¶

In [1]:
import torch
import numpy as np
import sys
sys.path.append('..')

from torchlib.utils import list_device,set_device

# S1: check GPU
#list_device()

# S2: default parameters
set_device(1)
np.set_printoptions(precision = 2)
torch.set_default_dtype(torch.float32)
torch.set_printoptions(precision=4)
torch.backends.cudnn.benchmark = True
torch.set_printoptions(sci_mode=False)

Using Device 1 : TITAN Xp


the rosdep view is empty: call 'sudo rosdep init' and 'rosdep update'


# Set Arguments

In [2]:
import argparse
import sys
import os
import time
import pickle

parser = argparse.ArgumentParser()

'''Training Parameters'''
parser.add_argument('--batch_size', type=int, default=5000, help='minibatch size')
parser.add_argument('--num_epochs', type=int, default=80, help='number of epochs')
parser.add_argument('--grad_clip', type=float, default=5., help='clip gradients at this value')
parser.add_argument('--learning_rate', type=float, default=0.01, help='learning rate')
parser.add_argument('--learning_rate_clip', type=float, default=0.0000001, help='learning rate clip')
parser.add_argument('--decay_rate', type=float, default=.9, help='decay rate for rmsprop')
parser.add_argument('--weight_decay', type=float, default=.0001, help='decay rate for rmsprop')
parser.add_argument('--batch_norm_decay', type=float, default=.999, help='decay rate for rmsprop')
parser.add_argument('--keep_prob', type=float, default=1.0, help='dropout keep probability')
parser.add_argument('--lamda_weights', type=float, default=.01, help='lamda weight')
parser.add_argument('--data_argumentation', type=bool, default=True, help='whether do data argument')
parser.add_argument('--is_normalization', type=bool, default=True, help='whether do data nomalization')
parser.add_argument('--target_image_size', default=[300, 300], nargs=2, type=int, help='Input images will be resized to this for data argumentation.')
parser.add_argument('--output_dim', default=3, type=int, help='output dimention.')
parser.add_argument('--feat_dim', default=128, type=int, help='feature dimention.')

'''Configure'''
parser.add_argument('--network', type=str, default='vggnet_localization')
parser.add_argument('--model_dir', type=str, default='/notebooks/global_localization/gps_net_torch', help='rnn, gru, or lstm')

parser.add_argument('--train_dataset', type=str, default = ['/notebooks/michigan_nn_data/train_dense_old'])

parser.add_argument('--norm_tensor', type=str, default = ['/notebooks/global_localization/norm_mean_std.pt'])

parser.add_argument('--seed', default=1337, type=int)
parser.add_argument('--save_every', type=int, default=1000, help='save frequency')
parser.add_argument('--display', type=int, default=10, help='display frequency')
parser.add_argument('--tensorboard', type=bool, default=True, help='open tensorboard')
parser.add_argument('--cuda_device', type=int, default=1, help='cuda device')

sys.argv = ['']
args = parser.parse_args()

# Load Dataset

In [3]:
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torchvision.transforms as transforms
import tf.transformations as tf_tran
from tqdm import tqdm
from PIL import Image
import numpy as np
import random

import gpytorch

import torch.nn as nn
import torch.optim as optim
from torchlib import resnet, vggnet
from torchlib.cnn_auxiliary import normalize, denormalize_navie, denormalize, get_relative_pose, translational_rotational_loss
from torchlib.utils import LocalizationDataset, display_loss, data2tensorboard
import time

path = args.train_dataset[0]
x_dense = torch.load(os.path.join(path,'x.pt'))#[::10,:]
y_dense = torch.load(os.path.join(path,'y.pt'))#[::10,:]
dataset = TensorDataset(x_dense, y_dense)

[args.norm_mean, args.norm_std] = torch.load(*args.norm_tensor)
print('Load norm and std:',*args.norm_tensor)

dataloader = DataLoader(dataset, batch_size=args.batch_size, \
                        shuffle=True, num_workers=0, \
                        drop_last=True, pin_memory=True)

Load norm and std: /notebooks/global_localization/norm_mean_std.pt


# Define Model

In [4]:
from torch.cuda.amp import autocast, GradScaler
from torchlib.GPs import Backbone, NN, GPNode, BaseModule

class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.global_context = vggnet.vggnet(input_channel=2048,opt="context")
        self.global_regressor = vggnet.vggnet(opt="regressor")
        
    def forward(self,input_data):
        #context_feat = self.global_context(input_data)
        context_feat = input_data
        output,feature_t, feature_r = self.global_regressor(context_feat)
        return output, feature_t, feature_r

class GP(gpytorch.models.ApproximateGP):
    def __init__(self, inducing_points, output_dim=3):
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            inducing_points.size(-2), batch_shape=torch.Size([output_dim])
        )
        variational_strategy = gpytorch.variational.MultitaskVariationalStrategy(
            gpytorch.variational.VariationalStrategy(
                self, inducing_points, variational_distribution, learn_inducing_locations=True
            ), num_tasks=output_dim
        )
        super().__init__(variational_strategy)
        self.mean_module = gpytorch.means.ZeroMean()
        #self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([output_dim]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([output_dim])),
            batch_shape=torch.Size([output_dim]))

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

class GPNode(nn.Module):
    def __init__(self, inducing_points):
        super().__init__()
        output_dim = inducing_points.shape[0]
        
        self.fc = nn.Linear(128,128)

        self.gp = GP(inducing_points,output_dim)
        self.likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=output_dim) 
        
    def forward(self,input_data):
        input_data = self.fc(input_data)
        output = self.gp(input_data)
        return output

class Model(nn.Module):
    def __init__(self, num_gp = 20):
        super().__init__()
        self.backbone = Backbone()
        self.nn = NN()
        self.gps = nn.ModuleList()
        
        self.num_gp = num_gp
        
        for i in range(self.num_gp):
            inducing_points = torch.zeros(3, 1000, 128)
            gp = GPNode(inducing_points)
            self.gps.append(gp)
        
    def forward_nn(self, input_data):
        #dense_feat = self.backbone(input_data)
        dense_feat = input_data
        output, feature_t, feature_r = self.nn(dense_feat)
        rot_pred = torch.split(output, [3, 4], dim=1)[1] # 4-dimention            
        return feature_t, rot_pred
    
    def forward_gp(self,gp,trans_feat):
        trans_pred = gp(trans_feat)
        return trans_pred
    
class PosePredictor(BaseModule):
    def __init__(self, norm_mean, norm_std, args, num_gp = 20,
                 regressor_context_rate = [0.0,0.0],
                 is_training=True, train_rot = True):
        
        super().__init__(norm_mean, norm_std, args)
        self.num_gp = num_gp
        self.model = Model(num_gp).to(self.device)
        self.train_rot = train_rot
        self.num_sub_data = self.args.batch_size//num_gp
        
        # disable learning backbone
        self.disable_requires_grad(self.model.backbone)
        
        if is_training:
            # training tool
            self.optimizer = optim.Adam(self._optimize(regressor_context_rate))
            self.scheduler = optim.lr_scheduler.LambdaLR(optimizer=self.optimizer,
                                                             lr_lambda=lambda epoch: args.decay_rate**epoch)
        else:
            self.disable_requires_grad(self.model)
            
    def _optimize(self, regressor_context_rate = [0.0,0.0]):
        optimizer = [
                {'params': self.model.gps.parameters(), \
                 'lr': self.args.learning_rate,'weight_decay':self.args.weight_decay}]
            
        # NN
        if regressor_context_rate[0]!=0:
            optimizer += [{'params': self.model.nn.global_regressor.parameters(), \
                 'lr': args.learning_rate * regressor_context_rate[0],'weight_decay':args.weight_decay}]
            print('Regressor learn rate:',regressor_context_rate[0])
        else:
            self.disable_requires_grad(self.model.nn.global_regressor)
                
        if regressor_context_rate[1]!=0:
            optimizer += [{'params': self.model.nn.global_context.parameters(), \
                 'lr': args.learning_rate * regressor_context_rate[1],'weight_decay':args.weight_decay}]
            print('Context learn rate:',regressor_context_rate[1])
            self.train_rot = True
        else:
            self.disable_requires_grad(self.model.nn.global_context)
            
        
        if not self.train_rot and regressor_context_rate[1]==0.0:
            self.disable_requires_grad(self.model.nn.global_regressor.regressor.fc1_rot)
            self.disable_requires_grad(self.model.nn.global_regressor.regressor.fc2_rot)
            self.disable_requires_grad(self.model.nn.global_regressor.regressor.fc3_rot)
            self.disable_requires_grad(self.model.nn.global_regressor.regressor.logits_r)
                
        return optimizer
    
    def train(self, x, y):
        # Step 0: zero grad
        self.optimizer.zero_grad()
        
        start = time.time()
        # Step 1: get data
        x,y = x.to(self.device),y.to(self.device)
        if self.args.is_normalization:
            y = normalize(y,self.norm_mean, self.norm_std)
            
        # Step 2: training
        assert self.model.training == True
        
        trans_loss = torch.tensor(0.).to(self.device)
        
        trans_target, rot_target = torch.split(y, [3, 4], dim=1)
        trans_feat, rot_pred = self.model.forward_nn(x)
        rot_loss = self._nn_loss(rot_pred,rot_target)
        
        mask = torch.randperm(self.args.batch_size)
        for i,gp in enumerate(self.model.gps):
            sampled_mask = mask[self.num_sub_data*i:self.num_sub_data*(i+1)]
            sub_x = trans_feat[sampled_mask]
            sub_y = trans_target[sampled_mask]
            gp_loss = self._gp_loss(gp,sub_x,sub_y)
            trans_loss += gp_loss
        trans_loss = trans_loss/self.model.num_gp
        
        total_loss = trans_loss + self.args.lamda_weights * rot_loss
        
        batch_time = time.time() - start
        
        #Step 3: update
        total_loss.backward()
        self.optimizer.step()
        
        return float(total_loss), batch_time    
    
    def _nn_loss(self, rot_pred, rot_target):
        rot_loss = 1. - torch.mean(torch.square(torch.sum(torch.mul(rot_pred,rot_target),dim=1)))
        return rot_loss
        
    def _gp_loss(self, gp, trans_feat, trans_target):
        # predict
        trans_pred = self.model.forward_gp(gp,trans_feat)
        
        #num_data = int(min(len(dataloader)*args.batch_size,len(dataset))*self.model.sub_batch_rate)
        num_data = min(len(dataloader)*self.args.batch_size,len(dataset))//self.num_gp
        mll = gpytorch.mlls.PredictiveLogLikelihood(gp.likelihood, gp.gp, num_data = num_data)
        
        # trans loss
        trans_loss = -1.*mll(trans_pred, trans_target)
        
        return trans_loss
    
    def _eval_gp(self, gp, trans_pred):
        c_mean, c_var = trans_pred.mean, trans_pred.variance
        y_mean, y_var = gp.likelihood(trans_pred).mean, gp.likelihood(trans_pred).variance
        
        return y_mean, c_mean, c_var
    
    def _sample(self, mean, var, num_sample = 100):
        dist = torch.distributions.Normal(mean, var)
        samples = dist.sample([num_sample])
        return samples

    def eval_forward(self, x, y, num_sample = 100, output_denormalize = True):
        # Step 1: get data
        x = x.to(self.device)
        
        # Step 2: forward
        assert self.model.training == False
        trans_feat, rot_pred = self.model.forward_nn(x)
        
        trans_preds = 0
        trans_means = 0
        trans_vars = 0
        for gp in self.model.gps:
            trans_pred = self.model.forward_gp(gp,trans_feat)
            trans_pred, trans_mean, trans_var = self._eval_gp(gp, trans_pred)
            trans_preds += trans_pred * 1/trans_var
            trans_means += trans_mean * 1/trans_var
            trans_vars += 1/trans_var
         
        trans_vars = 1/trans_vars
        trans_preds *= trans_vars
        trans_means *= trans_vars
        
        if self.args.is_normalization and output_denormalize:
            trans_preds = denormalize_navie(trans_preds, self.norm_mean, self.norm_std)
            trans_means = denormalize_navie(trans_means, self.norm_mean, self.norm_std)
            trans_vars = trans_vars.mul(self.norm_std)
        
        samples = self._sample(trans_means, trans_vars, num_sample)
        
        # Step 3: split output
        trans_target, rot_target = torch.split(y, [3, 4], dim=1)
        
        return trans_preds, rot_pred, trans_target, rot_target, samples

In [5]:
trainer = PosePredictor(args.norm_mean,args.norm_std,args,
                        regressor_context_rate = [0.,0.], train_rot = False)
#trainer.load_model('pretrained_gp20_fix.pth')
trainer.load_model('model-fast-zero-mean-fc.pth',strict = False)
#trainer.show_require_grad()

Successfully loaded model to TITAN Xp.


In [6]:
'''
state_dict = torch.load(os.path.join(args.model_dir, 'pretrained.pth'))
state_dict = trainer._from_old_model(state_dict)
for key in list(state_dict):
    if 'gp' in key or 'likelihood' in key:
        state_dict.pop(key)
trainer.model.load_state_dict(state_dict, strict=False)
'''

"\nstate_dict = torch.load(os.path.join(args.model_dir, 'pretrained.pth'))\nstate_dict = trainer._from_old_model(state_dict)\nfor key in list(state_dict):\n    if 'gp' in key or 'likelihood' in key:\n        state_dict.pop(key)\ntrainer.model.load_state_dict(state_dict, strict=False)\n"

In [11]:
list(trainer.model.gps.named_parameters())

[('0.fc.weight',
  Parameter containing:
  tensor([[     0.4046,      0.4207,     -0.0154,  ...,      0.0019,
                0.4005,      0.1801],
          [     0.5204,      0.5434,     -0.4008,  ...,      0.0002,
                0.4568,      0.2893],
          [    -0.4439,     -0.5083,      0.5050,  ...,      0.0839,
               -0.5123,      0.0222],
          ...,
          [    -0.4321,     -0.5253,     -0.5349,  ...,      0.0047,
               -0.5353,      0.0185],
          [    -0.4439,     -0.5115,     -0.4452,  ...,     -0.1687,
               -0.5088,      0.0005],
          [     0.4401,      0.4433,      0.4658,  ...,      0.0014,
                0.3794,     -0.0211]], device='cuda:1', requires_grad=True)),
 ('0.fc.bias',
  Parameter containing:
  tensor([ 0.5023,  0.5398,  0.5448,  0.3759, -0.5002, -0.5285,  0.4919,  0.4678,
          -0.4694,  0.5164,  0.0569,  0.4849, -0.5386,  0.5403,  0.2692,  0.4419,
          -0.4841, -0.4884,  0.4128,  0.5510, -0.4631, -0.5

# Training

In [7]:
if args.tensorboard:
    import os
    os.system('rm -rf runs/gps')
    from torch.utils.tensorboard import SummaryWriter
    writer = SummaryWriter('runs/gps')
import time
time.sleep(3)

In [8]:
trainer.model.train()
for e in range(args.num_epochs):
    torch.manual_seed(args.seed)
    minibatch_iter = tqdm(dataloader)
    train_loss = 0.
    for b, data in enumerate(minibatch_iter):
        x,y = data
        
        loss, batch_time = trainer.train(x,y)
        
        train_loss += loss
        ave_loss = train_loss/(b+1)
        step = e*len(dataloader)+(b+1)
        # display data
        minibatch_iter.set_postfix(ave = ave_loss, loss=loss,lr=trainer.scheduler.get_last_lr()[0])
        # tensorboard
        trainer.data2tensorboard(writer,'training loss',{'item loss':loss,'batch loss':ave_loss},step)
        # save model
        #trainer.save_model_step(e,step)
        # step scheduler
        trainer.schedule_step(step,51)

 24%|██▎       | 12/51 [00:49<02:41,  4.15s/it, ave=4.61, loss=4.55, lr=0.01]


KeyboardInterrupt: 

In [9]:
trainer.save_model('model-fast-zero-mean-fc.pth')

Saving model to model-fast-zero-mean-fc.pth


In [7]:
x,y = next(iter(dataloader))
trainer.model.eval()
trans_preds, rot_pred, trans_target, rot_target, samples = trainer.eval_forward(x,y)

In [8]:
trans_preds

tensor([[  75.0240,  238.4550,   -2.2583],
        [-246.9995,  445.7235,  -12.2136],
        [-177.5730,  588.5127,  -12.3354],
        ...,
        [ -46.7197,  467.6529,  -10.7595],
        [ -33.5037,  505.6027,   -1.5013],
        [-278.8232,  437.5095,  -12.2012]], device='cuda:1',
       grad_fn=<AddBackward0>)

In [9]:
trans_target

tensor([[  76.4018,  239.5712,   -2.2569],
        [-247.7184,  445.7048,  -12.2009],
        [-177.7762,  588.2364,  -12.3849],
        ...,
        [ -48.2569,  465.5806,  -10.6875],
        [ -34.3161,  505.4826,   -1.4701],
        [-277.7613,  438.0477,  -12.2082]])