# Torch
## Check GPU

In [1]:
import torch
import numpy as np
import sys
sys.path.append('..')

from torchlib.utils import list_device,set_device

# S1: check GPU
#list_device()

# S2: default parameters
set_device(1)
np.set_printoptions(precision = 2)
torch.set_default_dtype(torch.float32)
torch.set_printoptions(precision=4)
torch.backends.cudnn.benchmark = True
torch.set_printoptions(sci_mode=False)

the rosdep view is empty: call 'sudo rosdep init' and 'rosdep update'


Using Device 1 : TITAN Xp


# Set Arguments

In [2]:
import argparse
import sys
import os
import time
import pickle

parser = argparse.ArgumentParser()

'''Training Parameters'''
parser.add_argument('--batch_size', type=int, default=60, help='minibatch size')
parser.add_argument('--num_epochs', type=int, default=200, help='number of epochs')
parser.add_argument('--grad_clip', type=float, default=5., help='clip gradients at this value')
parser.add_argument('--learning_rate', type=float, default=0.000005, help='learning rate')
parser.add_argument('--learning_rate_clip', type=float, default=0.0000001, help='learning rate clip')
parser.add_argument('--decay_rate', type=float, default=.9, help='decay rate for rmsprop')
parser.add_argument('--weight_decay', type=float, default=.0001, help='decay rate for rmsprop')
parser.add_argument('--batch_norm_decay', type=float, default=.999, help='decay rate for rmsprop')
parser.add_argument('--keep_prob', type=float, default=1.0, help='dropout keep probability')
parser.add_argument('--lamda_weights', type=float, default=0.01, help='weight of rotation error')
parser.add_argument('--data_argumentation', type=bool, default=True, help='whether do data argument')
parser.add_argument('--is_normalization', type=bool, default=True, help='whether do data normalization')
parser.add_argument('--target_image_size', default=[300, 300], nargs=2, type=int, help='Input images will be resized to this for data argumentation.')

'''Configure'''
parser.add_argument('--network', type=str, default='vggnet_localization')
parser.add_argument('--model_dir', type=str, default='/notebooks/global_localization/dual_resnet_torch', help='rnn, gru, or lstm')


parser.add_argument('--train_dataset', type=str, default = ['/notebooks/michigan_nn_data/2012_01_08',
                                                            '/notebooks/michigan_nn_data/2012_01_15',
                                                            '/notebooks/michigan_nn_data/2012_01_22',
                                                            '/notebooks/michigan_nn_data/2012_02_02',
                                                            '/notebooks/michigan_nn_data/2012_02_04',
                                                            '/notebooks/michigan_nn_data/2012_02_05',
                                                            '/notebooks/michigan_nn_data/2012_03_31',
                                                            '/notebooks/michigan_nn_data/2012_09_28'])
'''
#parser.add_argument('--train_dataset', type=str, default = ['/notebooks/michigan_nn_data/test'])
parser.add_argument('--train_dataset', type=str, default = ['/notebooks/michigan_nn_data/2012_01_08'])
'''
parser.add_argument('--norm_tensor', type=str, default = ['/notebooks/global_localization/norm_mean_std.pt'])

parser.add_argument('--seed', default=1337, type=int)
parser.add_argument('--save_every', type=int, default=3000, help='save frequency')
parser.add_argument('--display', type=int, default=20, help='display frequency')
parser.add_argument('--tensorboard', type=bool, default=True, help='open tensorboard')
parser.add_argument('--cuda_device', type=int, default=1, help='cuda device')

sys.argv = ['']
args = parser.parse_args()

# Load Dataset

In [3]:
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import tf.transformations as tf_tran
from tqdm import tqdm
from PIL import Image
import numpy as np
import random

import torch.nn as nn
import torch.optim as optim
from torchlib import resnet, vggnet, cnn_auxiliary
from torchlib.cnn_auxiliary import normalize, denormalize, get_relative_pose, translational_rotational_loss
from torchlib.utils import LocalizationDataset, display_loss, data2tensorboard
import time

transform = transforms.Compose([transforms.ToTensor()])
dataset = LocalizationDataset(dataset_dirs = args.train_dataset, \
                              image_size = args.target_image_size, \
                              transform = transform, get_pair = True)
if len(args.train_dataset)>7:
    [args.norm_mean, args.norm_std] = [torch.tensor(x) for x in dataset.get_norm()]
    torch.save([args.norm_mean, args.norm_std], *args.norm_tensor)
    print('Save norm and std:',*args.norm_tensor)
else:
    [args.norm_mean, args.norm_std] = torch.load(*args.norm_tensor)
    print('Load norm and std:',*args.norm_tensor)

dataloader = DataLoader(dataset, batch_size=args.batch_size, \
                        shuffle=True, num_workers=0, \
                        drop_last=True, pin_memory=True)

100%|██████████| 16446/16446 [00:22<00:00, 722.95it/s]
100%|██████████| 22584/22584 [00:31<00:00, 715.98it/s]
100%|██████████| 18655/18655 [00:25<00:00, 719.23it/s]
100%|██████████| 17310/17310 [00:24<00:00, 721.11it/s]
100%|██████████| 10766/10766 [00:14<00:00, 723.18it/s]
100%|██████████| 14878/14878 [00:22<00:00, 659.56it/s]
100%|██████████| 13452/13452 [00:20<00:00, 665.05it/s]
100%|██████████| 14037/14037 [00:25<00:00, 548.13it/s]


Save norm and std: /notebooks/global_localization/norm_mean_std.pt


# Define Model

In [4]:
from torch.cuda.amp import autocast, GradScaler
from torchlib.GPs import Backbone, NN, BaseModule
        
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = Backbone()
        self.nn = NN()
        
    def forward(self, *args):
        outputs = []
        for input_data in args:
            dense_feat = self.backbone(input_data)
            output, feature_t, feature_r = self.nn(dense_feat)
            outputs += [output]
        if len(args)>1:
            return outputs
        else:
            return output, feature_t, feature_r
        
class PosePredictor(BaseModule):
    def __init__(self, norm_mean, norm_std, args, is_training=True, mixed_precision=True):
        super().__init__(norm_mean, norm_std, args)
        self.model = Model().to(self.device)
        self.mixed_precision = mixed_precision
        if self.mixed_precision:
            self.scaler = GradScaler()
        if is_training:
            self.optimizer = optim.Adam(self.model.parameters(), 
                                        lr=args.learning_rate, 
                                        weight_decay=args.weight_decay)
            self.scheduler = optim.lr_scheduler.LambdaLR(optimizer=self.optimizer,
                                                             lr_lambda=lambda epoch: args.decay_rate**epoch)
        else:
            self.disable_requires_grad(self.model)
    
    def train(self,x0, x1, y0, y1):
        # Step 0: zero grad
        self.optimizer.zero_grad()
        
        start = time.time()
        # Step 1: get data
        x0,x1,y0,y1 = x0.to(self.device),x1.to(self.device),y0.to(self.device),y1.to(self.device)
        if args.is_normalization:
            y0, y1 = [normalize(y,self.norm_mean, self.norm_std) for y in [y0,y1]]
            
        # Step 2: training
        assert trainer.model.training == True
        
        if self.mixed_precision:
            with autocast():
                global_loss, consistent_loss = self._loss(x0, x1, y0, y1)
                single_loss = global_loss + consistent_loss
                
            self.scaler.scale(single_loss).backward()
            self.scaler.step(self.optimizer)
            self.scaler.update()
        else:
            global_loss, consistent_loss = self._loss(x0, x1, y0, y1)
            single_loss = global_loss + consistent_loss
            
            single_loss.backward()
            self.optimizer.step()
        batch_time = time.time() - start
        
        return float(single_loss), float(global_loss), float(consistent_loss), batch_time
    
    def _loss(self,x0, x1, y0, y1):
        # target relative
        relative_target_normed = get_relative_pose(y0, y1)
        # forward output
        global_output0,global_output1 = self.model(x0, x1)
        # output relative
        relative_consistence = get_relative_pose(global_output0,global_output1)
        
        # target loss
        global_loss = translational_rotational_loss(pred=global_output1, gt=y1, \
                                                    lamda=args.lamda_weights)
        # relative loss
        geometry_consistent_loss = translational_rotational_loss(pred=relative_consistence, \
                                                                 gt=relative_target_normed, \
                                                                 lamda=args.lamda_weights)        
        
        return global_loss, geometry_consistent_loss
    
    def eval_forward(self,x,y,output_denormalize = True):
        # Step 1: get data
        x,y = x.to(self.device),y.to(self.device)
        
        # Step 2: forward
        assert trainer.model.training == False
        
        if self.mixed_precision:
            with autocast():
                output,_,_ = self.model(x)
            self.scaler.scale(output)
        else:
            output,_,_ = self.model(x)

        if args.is_normalization and output_denormalize:
            output = denormalize(output, self.norm_mean, self.norm_std)
            
        # Step 3: split output
        trans_target, rot_target = torch.split(y, [3, 4], dim=1)
        trans_prediction, rot_prediction = torch.split(output, [3, 4], dim=1)
        return trans_prediction, rot_prediction, trans_target, rot_target

In [5]:
trainer = PosePredictor(args.norm_mean,args.norm_std,args,mixed_precision=True)
trainer.load_model('pretrained.pth')
#trainer.load_model('model-12-27000.pth')

Successfully loaded model to TITAN Xp.


# Training

In [6]:
if args.tensorboard:
    import os
    os.system('rm -rf runs/nn')
    from torch.utils.tensorboard import SummaryWriter
    writer = SummaryWriter('runs/nn')

In [7]:
trainer.model.train()
for e in range(args.num_epochs):
    minibatch_iter = tqdm(dataloader)
    train_loss = 0.
    for b, data in enumerate(minibatch_iter):
        x0, x1 = data['image']
        y0, y1 = data['target']
        
        loss, global_loss, consistent_loss, batch_time = trainer.train(x0,x1,y0,y1)
        
        train_loss += loss
        ave_loss = train_loss/(b+1)
        step = e*len(dataloader)+(b+1)
        # display data
        minibatch_iter.set_postfix(ave = ave_loss, loss=loss,lr=trainer.scheduler.get_last_lr()[-1])
        # tensorboard
        trainer.data2tensorboard(writer,'training loss',{'item loss':loss,'batch loss':ave_loss},step)
        trainer.data2tensorboard(writer,'loss',{'global_loss':global_loss,'con_loss':consistent_loss},step)
        # save model
        trainer.save_model_step(e,step)
        # step scheduler
        trainer.schedule_step(step,150)

  1%|          | 24/2135 [00:30<44:56,  1.28s/it, ave=0.0015, loss=0.00187, lr=5e-6]  


KeyboardInterrupt: 