# Torch
## Check GPU¶

In [1]:
#from apex import amp,optimizers

In [2]:
import torch
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(1))

TITAN Xp


## Set torch default parameters¶

In [3]:
torch.set_default_dtype(torch.float32)
torch.set_printoptions(precision=8)
torch.backends.cudnn.benchmark = True

# Set Arguments

In [4]:
import argparse
import sys
import os
import time
import pickle

parser = argparse.ArgumentParser()

'''Training Parameters'''
parser.add_argument('--batch_size', type=int, default=35, help='minibatch size')
parser.add_argument('--num_epochs', type=int, default=200, help='number of epochs')
parser.add_argument('--grad_clip', type=float, default=5., help='clip gradients at this value')
parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate')
parser.add_argument('--learning_rate_clip', type=float, default=0.0000001, help='learning rate clip')
parser.add_argument('--decay_rate', type=float, default=.95, help='decay rate for rmsprop')
parser.add_argument('--weight_decay', type=float, default=.0001, help='decay rate for rmsprop')
parser.add_argument('--batch_norm_decay', type=float, default=.999, help='decay rate for rmsprop')
parser.add_argument('--keep_prob', type=float, default=1.0, help='dropout keep probability')
parser.add_argument('--lamda_weights', type=float, default=10, help='lamda weight')
parser.add_argument('--data_argumentation', type=bool, default=True, help='whether do data argument')
parser.add_argument('--is_normalization', type=bool, default=True, help='whether do data nomalization')
parser.add_argument('--target_image_size', default=[300, 300], nargs=2, type=int, help='Input images will be resized to this for data argumentation.')
parser.add_argument('--output_dim', default=3, type=int, help='output dimention.')
parser.add_argument('--feat_dim', default=128, type=int, help='feature dimention.')

'''Configure'''
parser.add_argument('--network', type=str, default='vggnet_localization')
parser.add_argument('--model_dir', type=str, default='/notebooks/global_localization/dual_resnet_torch', help='rnn, gru, or lstm')
'''
parser.add_argument('--train_dataset', type=str, default = ['/notebooks/michigan_nn_data/2012_01_08',
                                                            '/notebooks/michigan_nn_data/2012_01_15',
                                                            '/notebooks/michigan_nn_data/2012_01_22',
                                                            '/notebooks/michigan_nn_data/2012_02_02',
                                                            '/notebooks/michigan_nn_data/2012_02_04',
                                                            '/notebooks/michigan_nn_data/2012_02_05',
                                                            '/notebooks/michigan_nn_data/2012_03_31',
                                                            '/notebooks/michigan_nn_data/2012_09_28'])
'''
parser.add_argument('--train_dataset', type=str, default = ['/notebooks/michigan_nn_data/test'])

parser.add_argument('--seed', default=1337, type=int)
parser.add_argument('--save_every', type=int, default=2000, help='save frequency')
parser.add_argument('--display', type=int, default=10, help='display frequency')

sys.argv = ['']
args = parser.parse_args()

# Load Dataset

In [5]:
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import tf.transformations as tf_tran
from tqdm import tqdm
from PIL import Image
import numpy as np
import random

#import gpflow.multioutput.kernels as mk
import gpytorch

import torch.nn as nn
import torch.optim as optim
from torchlib import resnet, vggnet
from torchlib.utils import LocalizationDataset
import time

transform = transforms.Compose([transforms.ToTensor()])
dataset = LocalizationDataset(dataset_dirs = args.train_dataset, \
                              image_size = args.target_image_size, \
                              transform = transform,
                              get_pair = False)
[args.norm_mean, args.norm_std] = [torch.tensor(x) for x in dataset.get_norm()]

dataloader = DataLoader(dataset, batch_size=args.batch_size, \
                        shuffle=True, num_workers=0, \
                        drop_last=True, pin_memory=True)

the rosdep view is empty: call 'sudo rosdep init' and 'rosdep update'
100%|██████████| 5593/5593 [00:06<00:00, 842.64it/s]


# Define Model

In [6]:
def normalize(target, norm_mean, norm_std):
    target_trans = target[:,:3]
    target_trans = torch.div(torch.sub(target_trans,norm_mean),norm_std)
    target_normed = torch.cat([target_trans,target[:,3:]],dim=1)
    return target_normed 

class MultitaskGPModel(gpytorch.models.ApproximateGP):
    def __init__(self, inducing_points):
        # We have to mark the CholeskyVariationalDistribution as batch
        # so that we learn a variational distribution for each task
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            inducing_points.size(-2), batch_shape=torch.Size([3])
        )

        # We have to wrap the VariationalStrategy in a MultitaskVariationalStrategy
        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
        variational_strategy = gpytorch.variational.MultitaskVariationalStrategy(
            gpytorch.variational.VariationalStrategy(
                self, inducing_points, variational_distribution, learn_inducing_locations=True
            ), num_tasks=3
        )

        super().__init__(variational_strategy)

        # The mean and covariance modules should be marked as batch
        # so we learn a different set of hyperparameters
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([3]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([3])),
            batch_shape=torch.Size([3])
        )

    def forward(self, x):
        # The forward function should be written as if we were dealing with each output
        # dimension in batch
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.resnet = resnet.resnet50(pretrained=True)
        self.global_context = vggnet.vggnet(input_channel=2048,opt="context")
        self.global_regressor = vggnet.vggnet(opt="regressor")
        
    def forward(self,input_data):
        dense_feat = self.resnet(input_data)
        global_context_feat = self.global_context(dense_feat)
        global_output, trans_feat, rot_feat = self.global_regressor(global_context_feat)
        return global_output, trans_feat, rot_feat

In [7]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
if torch.cuda.is_available():
    torch.cuda.set_device(device)

# set to cpu
#device = torch.device("cpu")
net = Model().to(device)
model = MultitaskGPModel(torch.rand(3, 16, 128)).to(device)

In [9]:
for name, param in net.named_parameters():
    if param.requires_grad:
        print (name, param.shape)

resnet.conv1.weight torch.Size([64, 1, 7, 7])
resnet.bn1.weight torch.Size([64])
resnet.bn1.bias torch.Size([64])
resnet.layer1.0.conv1.weight torch.Size([64, 64, 1, 1])
resnet.layer1.0.bn1.weight torch.Size([64])
resnet.layer1.0.bn1.bias torch.Size([64])
resnet.layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
resnet.layer1.0.bn2.weight torch.Size([64])
resnet.layer1.0.bn2.bias torch.Size([64])
resnet.layer1.0.conv3.weight torch.Size([256, 64, 1, 1])
resnet.layer1.0.bn3.weight torch.Size([256])
resnet.layer1.0.bn3.bias torch.Size([256])
resnet.layer1.0.downsample.0.weight torch.Size([256, 64, 1, 1])
resnet.layer1.0.downsample.1.weight torch.Size([256])
resnet.layer1.0.downsample.1.bias torch.Size([256])
resnet.layer1.1.conv1.weight torch.Size([64, 256, 1, 1])
resnet.layer1.1.bn1.weight torch.Size([64])
resnet.layer1.1.bn1.bias torch.Size([64])
resnet.layer1.1.conv2.weight torch.Size([64, 64, 3, 3])
resnet.layer1.1.bn2.weight torch.Size([64])
resnet.layer1.1.bn2.bias torch.Size([64])
re

In [10]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print (name, param.shape)

variational_strategy.base_variational_strategy.inducing_points torch.Size([3, 16, 128])
variational_strategy.base_variational_strategy._variational_distribution.variational_mean torch.Size([3, 16])
variational_strategy.base_variational_strategy._variational_distribution.chol_variational_covar torch.Size([3, 16, 16])
mean_module.constant torch.Size([3, 1])
covar_module.raw_outputscale torch.Size([3])
covar_module.base_kernel.raw_lengthscale torch.Size([3, 1, 1])


# Training
## Parameters

In [11]:
args.norm_mean = args.norm_mean.to(device)
args.norm_std = args.norm_std.to(device)

likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=3)
likelihood = likelihood.to(device)
optimizer = torch.optim.Adam([
    {'params': model.parameters()},
    {'params': likelihood.parameters()},
], lr=args.learning_rate, weight_decay=args.weight_decay)

#optimizer = optim.Adam(net.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
#optimizer = optimizers.FusedAdam(net.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
scheduler = optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda epoch: args.decay_rate**epoch)

mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(dataset.Targets))
#net, optimizer = amp.initialize(net, optimizer, opt_level="O1")

## Training Epoch

In [13]:
model.train()
likelihood.train()

for e in range(2):
    if e != 0:
        scheduler.step()
    train_loss = 0.
    for b, data in enumerate(dataloader, 0):

        start = time.time()
        optimizer.zero_grad()
        
        x,y = data.values()
        x,y = x.to(device),y.to(device)
        # normalize targets
        y = normalize(y,args.norm_mean, args.norm_std)
        
        #global_output, trans_feat, rot_feat = net(x)
        _, trans_feat, _ = net(x)
        
        #_, rot_pred = torch.split(global_output, [3, 4], dim=1)
        #trans_target, rot_target = torch.split(y, [3, 4], dim=1)
        trans_target, _ = torch.split(y, [3, 4], dim=1)
        
        output = model(trans_feat)
        loss = -mll(output, trans_target)
        loss.backward()
        print(loss.item())
        optimizer.step()
        if b == 2:
            break
    break

5.267107963562012
5.134354591369629
5.235293388366699


In [14]:
likelihood(output).mean.shape

torch.Size([35, 3])

In [11]:
trans_feat.shape

torch.Size([35, 128])

In [12]:
trans_target.shape

torch.Size([35, 3])

In [None]:
import gpytorch
kernel = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(
                ard_num_dims=128
                batch_shape=torch.Size([2])),
            batch_shape=torch.Size([2])
        )