In [2]:
args = {}
args['model_path'] = './results/'
args['no_env'] = 1
args['no_motion_paths'] = 10000
args['total_input_size'] = 8
args['AE_input_size'] = 0
args['mlp_input_size'] = 8
args['output_size'] = 4
args['learning_rate'] = 0.01
args['device'] = 1
args['num_epochs'] = 5
args['batch_size'] = 100
args['path_folder'] = './data/cartpole/'
args['path_file'] = 'train.pkl'
args['obs_file'] = './data/cartpole/obs.pkl'
args['obc_file'] = './data/cartpole/obc.pkl'
args['start_epoch'] = 0
args['env_type'] = 'cartpole'
args['world_size'] = [30., 40., 3.141592653589793, 2.]

In [3]:
"""
This implements the Kinodynamic Planning using MPNet, by using MPNet
to generate random samples, that will guide the SST algorithm.
"""
import torch
import model.AE.identity as cae_identity
from model.mlp_simple import MLP
from model.mpnet import KMPNet
from tools import data_loader
from tools.utility import *
from plan_utility import cart_pole, cart_pole_obs
import argparse
import numpy as np
import random
import os


if torch.cuda.is_available():
    torch.cuda.set_device(args['device'])
# environment setting
    if args['env_type'] == 'cartpole':
        normalize = cart_pole.normalize
        unnormalize = cart_pole.unnormalize
        obs_file = None
        obc_file = None
    elif args['env_type'] == 'cartpole_obs':
        normalize = cart_pole_obs.normalize
        unnormalize = cart_pole_obs.unnormalize
        obs_file = args['obs_file']
        obc_file = args['obc_file']

cae = cae_identity
mlp = MLP
mpnet = KMPNet(args['total_input_size'], args['AE_input_size'], args['mlp_input_size'], args['output_size'],
               cae, mlp)
# load net
# load previously trained model if start epoch > 0
model_path='kmpnet_epoch_%d.pkl' %(args['start_epoch'])
torch_seed, np_seed, py_seed = 0, 0, 0
if args['start_epoch'] > 0:
    load_net_state(mpnet, os.path.join(args['model_path'], model_path))
    torch_seed, np_seed, py_seed = load_seed(os.path.join(args['model_path'], model_path))
    # set seed after loading
    torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)

if torch.cuda.is_available():
    mpnet.cuda()
    mpnet.mlp.cuda()
    mpnet.encoder.cuda()
    # here we use Adagrad because previous MPNet performs well under it
    mpnet.set_opt(torch.optim.Adagrad, lr=args['learning_rate'])
if args['start_epoch'] > 0:
    load_opt_state(mpnet, os.path.join(args['model_path'], model_path))

# load train and test data
print('loading...')
obs, dataset, targets, env_indices = data_loader.load_train_dataset(N=args['no_env'], NP=args['no_motion_paths'],
                                                                    p_folder=args['path_folder'], p_fname=args['path_file'],
                                                                    obs_f=obs_file, obc_f=obc_file)
data=list(zip(dataset,targets,env_indices))
random.shuffle(data)
dataset,targets,env_indices=list(zip(*data))
dataset = list(dataset)
targets = list(targets)
env_indices = list(env_indices)
dataset = np.array(dataset)
targets = np.array(targets)
env_indices = np.array(env_indices)


# Train the Models
print('training...')
losses = []
for epoch in range(args['start_epoch']+1,args['num_epochs']+1):
    print('epoch' + str(epoch))
    for i in range(0,len(dataset),args['batch_size']):
        #print('epoch: %d, training... path: %d' % (epoch, i+1))
        dataset_i = dataset[i:i+args['batch_size']]
        targets_i = targets[i:i+args['batch_size']]
        env_indices_i = env_indices[i:i+args['batch_size']]
        # record
        bi = np.concatenate( (obs[env_indices_i, :args['AE_input_size']], dataset_i), axis=1).astype(np.float32)
        bt = targets_i
        bi = torch.FloatTensor(bi)
        bt = torch.FloatTensor(bt)
        bi, bt = normalize(bi, args['world_size']), normalize(bt, args['world_size'])
        mpnet.zero_grad()
        bi=to_var(bi)
        bt=to_var(bt)
        #print('before training losses:')
        #print(mpnet.loss(mpnet(bi), bt))
        losses.append(mpnet.loss(mpnet(bi), bt).cpu().data)
        mpnet.step(bi, bt)
        #print('after training losses:')
        #print(mpnet.loss(mpnet(bi), bt))
    # Save the models
    if epoch > 0:
        model_path='kmpnet_epoch_%d.pkl' %(epoch)
        save_state(mpnet, torch_seed, np_seed, py_seed, os.path.join(args['model_path'],model_path))
        # test



loading...
training...
epoch1
epoch2
epoch3
epoch4
epoch5


In [4]:
import matplotlib.pyplot as plt
plt.plot(list(range(len(losses))), losses)
plt.show()

<Figure size 640x480 with 1 Axes>

In [5]:
# load training paths
import pickle
from sparse_rrt import _sst_module
from sparse_rrt.systems import standard_cpp_systems
import time
def informed_sample(env, planner, start, goal):
    # repeat until finding a path
    max_iter = 10000
    min_time_steps = 20
    max_time_steps = 200
    integration_step = 0.002
    x_new = np.array(start)
    sample_time = []
    plan_time_single = []
    for i in range(max_iter):
        # use MPNet to find the informed sample
        time1 = time.time()
        if i % 100 == 0:
            # use goal from time to time
            x_rand = goal
        else:
            x_in = np.concatenate([x_new, goal]).reshape(1, -1)
            x_in = torch.from_numpy(x_in).type(torch.FloatTensor)
            x_in = normalize(x_in, args['world_size']).cuda()
            x_rand = mpnet(x_in).cpu()[0]
            x_rand = unnormalize(x_rand, args['world_size'])
            x_rand = x_rand.cpu().data.numpy()
        sample_time.append(time.time() - time1)
        #print('x_random:')
        #print(x_rand)
        x_new = planner.step_with_sample(env, x_rand, min_time_steps, max_time_steps, integration_step)
        #print('x_new: ')
        #print(x_new)
        plan_time_single.append(time.time() - time1)
        # if x_new near goal it will terminate automatically
        #TODO: better iterative sampling method
        solution = planner.get_solution()
        if solution is not None:
            break
    solution = planner.get_solution()
    return solution, i, sample_time, plan_time_single

file = open('data/cartpole/train.pkl', 'rb')
paths = pickle.load(file)
#N = 100
max_iter = 10000
# for each path, compare the number of samples needed for finding a "good" path
env = standard_cpp_systems.CartPole()
# for each BVP
min_time_steps = 20
max_time_steps = 200
integration_step = 0.002
low = []
high = []
state_bounds = env.get_state_bounds()
for i in range(len(state_bounds)):
    low.append(state_bounds[i][0])
    high.append(state_bounds[i][1])

    
uniform_suc = []
inform_suc = []
uniform_iter = []
inform_iter = []
uniform_cost = []
inform_cost = []
uniform_sample_time = []
uniform_plan_time_single = []
uniform_plan_time = []
inform_sample_time = []
inform_plan_time_single = []
inform_plan_time = []
num_paths = 100
for i in range(num_paths):
    print("path %d" % (i))
    # randomly sample collision-free start and goal
    #print('random sampling trial:')
    start = np.random.uniform(low=low, high=high)
    end = np.random.uniform(low=low, high=high)
    planner = _sst_module.SSTWrapper(
        state_bounds=env.get_state_bounds(),
        control_bounds=env.get_control_bounds(),
        distance=env.distance_computer(),
        start_state=start,
        goal_state=end,
        goal_radius=0.5,
        random_seed=0,
        sst_delta_near=0.4,
        sst_delta_drain=0.2
    )
    # generate a path by using SST to plan for some maximal iterations
    time0 = time.time()
    for iter in range(max_iter):
        time1 = time.time()
        if iter % 100 == 0:
            # from time to time use the goal
            sample = end
        else:
            sample = np.random.uniform(low=low, high=high)
        uniform_sample_time.append(time.time() - time1)
        planner.step_with_sample(env, sample, min_time_steps, max_time_steps, integration_step)
        uniform_plan_time_single.append(time.time() - time1)
        # check if we find the solution, if so, stop planning
        solution = planner.get_solution()
        if solution is not None:
            break    
    solution = planner.get_solution()
    uniform_plan_time.append(time.time() - time0)

    if solution is None:
        print('path %d: Uniform sampling failed.' % (i))
        uniform_suc.append(0)
        uniform_iter.append(np.inf)
        uniform_cost.append(np.inf)
        
    else:
        print('path %d: Uniform succeeded with %d iterations.' % (i, iter))
        path, controls, costs = solution
        #print(path)
        #print('uniform sampling costs:')
        #print(np.sum(costs))
        uniform_suc.append(1)
        uniform_iter.append(iter)
        uniform_cost.append(np.sum(costs))

        
    # informed sampling
    # create a new planner
    planner = _sst_module.SSTWrapper(
        state_bounds=env.get_state_bounds(),
        control_bounds=env.get_control_bounds(),
        distance=env.distance_computer(),
        start_state=start,
        goal_state=end,
        goal_radius=0.5,
        random_seed=0,
        sst_delta_near=0.4,
        sst_delta_drain=0.2
    )    
    time0 = time.time()
    solution, iter, sample_time, plan_time_single = informed_sample(env, planner, start, end)
    inform_plan_time.append(time.time() - time0)
    inform_sample_time += sample_time
    inform_plan_time_single += plan_time_single
    
    if solution is not None:
        print('path %d: Informed sampling succeeded with %d iterations.' % (i, iter))
        path, controls, costs = solution
        #print(path)
        #print('Informed sampling costs:')
        #print(np.sum(costs))      
        inform_suc.append(1)
        inform_iter.append(iter)
        inform_cost.append(np.sum(costs))
    else:
        print('path %d: Informed sampling failed.' % (i))
        inform_suc.append(0)
        inform_iter.append(np.inf)
        inform_cost.append(np.inf)
        

path 0
path 0: Uniform sampling failed.
path 0: Informed sampling failed.
path 1
path 1: Uniform sampling failed.
path 1: Informed sampling failed.
path 2
path 2: Uniform sampling failed.
path 2: Informed sampling failed.
path 3
path 3: Uniform sampling failed.
path 3: Informed sampling succeeded with 2401 iterations.
path 4
path 4: Uniform sampling failed.
path 4: Informed sampling failed.
path 5
path 5: Uniform sampling failed.
path 5: Informed sampling failed.
path 6
path 6: Uniform sampling failed.
path 6: Informed sampling failed.
path 7
path 7: Uniform sampling failed.
path 7: Informed sampling failed.
path 8
path 8: Uniform sampling failed.
path 8: Informed sampling failed.
path 9
path 9: Uniform sampling failed.
path 9: Informed sampling failed.
path 10
path 10: Uniform sampling failed.
path 10: Informed sampling failed.
path 11
path 11: Uniform sampling failed.
path 11: Informed sampling failed.
path 12
path 12: Uniform sampling failed.
path 12: Informed sampling failed.
path 

In [6]:
# printout success rate
print('uniform sampling success rate:')
print(np.sum(uniform_suc))
print('informed sampling success rate:')
print(np.sum(inform_suc))


uniform sampling success rate:
5
informed sampling success rate:
6


In [7]:
# plot iteration comparsion
iter_smaller = []
iter_larger = []
iter_equal = []
for i in range(len(inform_iter)):
    if inform_iter[i] < uniform_iter[i]:
        iter_smaller.append(1)
    elif inform_iter[i] == uniform_iter[i]:
        iter_equal.append(1)
    else:
        iter_larger.append(1)
print('smaller iterations:')
print(np.sum(iter_smaller))
print('equal iterations:')
print(np.sum(iter_equal))
print('larger iterations:')
print(np.sum(iter_larger))



smaller iterations:
3
equal iterations:
93
larger iterations:
4


In [8]:
# plot cost comparsion
cost_smaller = []
cost_equal = []
cost_larger = []
for i in range(len(inform_iter)):
    if inform_cost[i] < uniform_cost[i]:
        cost_smaller.append(1)
    elif inform_cost[i] == uniform_cost[i]:
        cost_equal.append(1)
    else:
        cost_larger.append(1)
print('smaller iterations:')
print(np.sum(cost_smaller))
print('equal iterations:')
print(np.sum(cost_equal))
print('larger iterations:')
print(np.sum(cost_larger))

print(np.mean(uniform_cost[uniform_cost!=np.inf]))
print(np.mean(inform_cost[uniform_cost!=np.inf]))

smaller iterations:
3
equal iterations:
93
larger iterations:
4
inf
inf


In [9]:
# compare the time
print('uniform average time for sampling:')
print(np.mean(uniform_sample_time))
print('informed average time for sampling:')
print(np.mean(inform_sample_time))

print('uniform average time for planning one iteration:')
print(np.mean(uniform_plan_time_single))
print('informed average time for planning one iteration:')
print(np.mean(inform_plan_time_single))



uniform average time for sampling:
0.00010455474178497793
informed average time for sampling:
0.0014037334035650617
uniform average time for planning one iteration:
0.0013717364524444425
informed average time for planning one iteration:
0.0027249390834536927


In [10]:
# plot cost comparsion
time_smaller = []
time_equal = []
time_larger = []
for i in range(len(inform_iter)):
    if inform_plan_time[i] < uniform_plan_time[i]:
        time_smaller.append(1)
    elif inform_plan_time[i] == uniform_plan_time[i]:
        time_equal.append(1)
    else:
        time_larger.append(1)
print('smaller iterations:')
print(np.sum(time_smaller))
print('equal iterations:')
print(np.sum(time_equal))
print('larger iterations:')
print(np.sum(time_larger))

print(np.mean(uniform_plan_time))
print(np.mean(inform_plan_time))

smaller iterations:
3
equal iterations:
0.0
larger iterations:
97
13.224549949169159
25.96289939403534
