In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append("..")
sys.path.append(".")
import copy
import torch
import numpy as np
import pickle
from datetime import datetime
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)
from scipy import interpolate

import robust_value_approx.utils as utils
import plotting_utils
import slip_utils
import robust_value_approx.train_value as train_value
import robust_value_approx.relu_mpc as relu_mpc

In [None]:
# vf, slip = slip_utils.get_value_function(torch.Tensor([0., 0., 0.]), N=4)
vf, slip = slip_utils.get_value_function_gait(torch.Tensor([0., 0., 0.]), N=3)
V = vf.get_value_function()

In [None]:
xf = torch.Tensor([2.5, 1.1, 0.])
xtraj = xf.type(vf.dtype).unsqueeze(1).repeat(1, vf.N-1)
vf.set_traj(xtraj=xtraj)
count = 0
for xdot in np.linspace(3, 9, 10):
    for z in np.linspace(.9, 1.25, 10):
        x0 = torch.Tensor([0., z, xdot]).type(vf.dtype)
        (x_traj, u_traj, alpha_traj) = vf.sol_to_traj(x0, *V(x0)[1:])
        if x_traj is not None:
            print([z, xdot, u_traj[0,0], u_traj[0,1]])
            count += 1
print(count)

In [None]:
xf = torch.Tensor([9., 1.25, 0.])
xtraj = xf.type(vf.dtype).unsqueeze(1).repeat(1, vf.N-1)
vf.set_traj(xtraj=xtraj)

# x0 = torch.Tensor([0., 1.12, 7.]).type(vf.dtype)
x0 = torch.rand(vf.sys.x_dim) * (x0_up - x0_lo) + x0_lo
x0_lo = torch.Tensor([0, .95, 4.]).type(vf.dtype)
x0_up = torch.Tensor([0, 1.25, 9.]).type(vf.dtype)

V = vf.get_value_function()
(x_traj, u_traj, alpha_traj) = vf.sol_to_traj(x0, *V(x0)[1:])
print(x0)
print(u_traj)

In [None]:
fig = plotting_utils.slip_traj(slip, x_traj, u_traj, xf)
fig.show()

In [None]:
import slip_utils

vf, slip = slip_utils.get_value_function_gait(torch.Tensor([0., 0., 0.]), N=3)

In [None]:
xf = torch.Tensor([0., 1.2, 5]).type(vf.dtype)
xtraj = xf.type(vf.dtype).unsqueeze(1).repeat(1, vf.N-1)
vf.set_traj(xtraj=xtraj)

In [None]:
x0 = xf.clone()

V = vf.get_value_function()
(x_traj, u_traj, alpha_traj) = vf.sol_to_traj(x0, *V(x0)[1:])
print(x0)
print(u_traj)

In [None]:
fig = plotting_utils.slip_traj(slip, x_traj, u_traj, xf)
fig.show()

In [None]:
x0_lo = torch.Tensor([0, .95, 4.]).type(vf.dtype)
x0_up = torch.Tensor([0, 1.25, 9.]).type(vf.dtype)

# validation options
num_breaks_validation = [1, 70, 70]

# data file options
sys_name = 'slip'
x_samples_file = '../data/learn_value_function_' + sys_name + '_x'
v_samples_file = '../data/learn_value_function_' + sys_name + '_v'
model_file = '../data/' + sys_name

# neural network options
nn_width = 64
nn_depth = 1

# setting up adversarial training options
train_opt = train_value.AdversarialWithBaselineTrainingOptions()
train_opt.num_iter_desired = 10000
train_opt.num_steps_between_sampling = 500
train_opt.init_buffer_size = 1000
train_opt.init_num_train_steps = 2000
train_opt.num_rand_extra = 0
train_opt.num_x_adv_opt = 20
train_opt.x_adv_max_iter = 2
train_opt.x_adv_conv_tol = 1e-5
train_opt.x_adv_lr = .2
train_opt.batch_size = 150
train_opt.max_buffer_size = 100000

num_training_run = 1

In [None]:
# checking the spread of trajectories for sanity check
fig = plotting_utils.rollout_range(vf, x0_lo, x0_up, [1, 2], ["x", "y", "xdot"], n=30)
fig.show()

In [None]:
adv = train_value.AdversarialWithBaseline(vf, x0_lo, x0_up)
x_samples_init, v_samples_init = adv.get_random_samples(train_opt.init_buffer_size)

In [None]:
torch.save(x_samples_init, x_samples_file + 'gait_init.pt')
torch.save(v_samples_init, v_samples_file + 'gait_init.pt')

In [None]:
x_samples_init = torch.load(x_samples_file + 'gait_init.pt')
v_samples_init = torch.load(v_samples_file + 'gait_init.pt')

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    y = v_samples_init.squeeze(),
))
fig.show()

In [None]:
training_runs = []
baseline_models = []
robust_models = []

In [None]:
num_fresh_runs = 1
for run_i in range(num_fresh_runs):
    adv = train_value.AdversarialWithBaseline(vf, x0_lo, x0_up,
                                              nn_width=nn_width, nn_depth=nn_depth,
#                                               x_samples_validation=x_samples_validation,
#                                               v_samples_validation=v_samples_validation,
                                              x_samples_validation=x_samples_init,
                                              v_samples_validation=v_samples_init,
                                              x_samples_init=x_samples_init[:train_opt.init_buffer_size,:],
                                              v_samples_init=v_samples_init[:train_opt.init_buffer_size,:])
    state_log = []
    for _ in range(num_training_run):
        adv.train(train_opt)
#         adv.train_no_benchmark(train_opt)
        state_log.append(adv.get_state())
    training_runs.append(state_log)
    baseline_models.append(copy.deepcopy(adv.baseline_model))
    robust_models.append(copy.deepcopy(adv.robust_model))
    utils.update_progress((run_i + 1) / num_fresh_runs)

In [None]:
vf, slip = slip_utils.get_value_function_gait(torch.Tensor([0., 1.2, 5]), N=3+1)
V = vf.get_value_function()
vf_next, slip_next = slip_utils.get_value_function_gait(torch.Tensor([0., 1.2, 5]), N=3)
V_next = vf_next.get_value_function()

In [None]:
# robust_ctrl = relu_mpc.ReLUMPC(vf, robust_models[0])
# baseline_ctrl = relu_mpc.ReLUMPC(vf, baseline_models[0])
robust_ctrl = relu_mpc.ReLUMPC(vf, adv.robust_model)
baseline_ctrl = relu_mpc.ReLUMPC(vf, adv.baseline_model)
def eval_one_step_ctrl(ctrl, x0_samp):
    (u0, x1) = ctrl.get_ctrl(x0_samp)
    if u0 is None:
        return (None, None, None)
    (x_traj_next, u_traj_next, alpha_traj_next) = vf_next.sol_to_traj(x1, *V_next(x1)[1:])
    if x_traj_next is None:
        return (None, None, None)
    x_traj = torch.cat((x0_samp.unsqueeze(0).t(), x_traj_next), axis=1)
    u_traj = torch.cat((u0.unsqueeze(0).t(), u_traj_next), axis=1)
    # assumes no cost on alpha! (true on all benchmarks)    
    value = vf.traj_cost(x_traj[:,1:], u_traj)
    return (value, x_traj, u_traj)

In [None]:
num_samples = 10
# sample_down = 100
# num_samples = int(x_samples_validation.shape[0] / sample_down)
for i in range(num_samples):
    x0_samp = torch.rand(vf.sys.x_dim) * (x0_up - x0_lo) + x0_lo
#     x0_samp = x_samples_validation[i*sample_down,:]

    optimal_value, opt_s, opt_alpha = V(x0_samp)
    if optimal_value is None:
        print("opt bad")
        continue
    (x_traj_opt, u_traj_opt, alpha_traj_opt) = vf.sol_to_traj(x0_samp, opt_s, opt_alpha)
    
#     (baseline_value, baseline_x_traj, baseline_u_traj) = eval_one_step_ctrl(baseline_ctrl, x0_samp)
#     if baseline_value is None:
#         print("baseline bad")
#         continue
    
    (robust_value, robust_x_traj, robust_u_traj) = eval_one_step_ctrl(robust_ctrl, x0_samp)
    if robust_value is None:
        print("robust bad")
        continue

#     if robust_value < baseline_value:
    print(x0_samp)
    print("Robust: " + str(robust_value))
    print(robust_u_traj)
    print(robust_x_traj)
#         print("Baseline: " + str(baseline_value))
#         print(baseline_u_traj)
#         print(baseline_x_traj)
    print("Optimal:" + str(optimal_value))
    print(u_traj_opt)
    print(x_traj_opt)
    print("---")
        
    print(i)

In [None]:
# x0_samp = torch.Tensor([0.0000, 1.2363, 6.2168]).type(vf.dtype)
x0_samp = torch.Tensor([0.0000, 1.0912, 8.2804]).type(vf.dtype)

optimal_value, opt_s, opt_alpha = V(x0_samp)
if optimal_value is None:
    print("opt bad")
(x_traj_opt, u_traj_opt, alpha_traj_opt) = vf.sol_to_traj(x0_samp, opt_s, opt_alpha)
print(u_traj_opt)

In [None]:
fig = plotting_utils.slip_traj(slip, x_traj_opt[:,:-1], u_traj_opt[:,:-1], xf)
fig.show()

In [None]:
(robust_value, robust_x_traj, robust_u_traj) = eval_one_step_ctrl(robust_ctrl, x0_samp)
if robust_value is None:
    print("robust bad")
print(robust_u_traj)

In [None]:
fig = plotting_utils.slip_traj(slip, robust_x_traj[:,:-1], robust_u_traj[:,:-1], xf)
fig.show()

In [None]:
(baseline_value, baseline_x_traj, baseline_u_traj) = eval_one_step_ctrl(baseline_ctrl, x0_samp)
if baseline_value is None:
    print("baseline bad")
print(baseline_u_traj)

In [None]:
fig = plotting_utils.slip_traj(slip, baseline_x_traj[:,:-1], baseline_u_traj[:,:-1], xf)
fig.show()

In [None]:
cost_opt = torch.Tensor(0, 1).type(vf.dtype)
cost_baseline = torch.Tensor(0, 1).type(vf.dtype)
cost_robust = torch.Tensor(0, 1).type(vf.dtype)

In [None]:
x_samples_validation = x_samples_init
# num_samples = 100
sample_down = 10
num_samples = int(x_samples_validation.shape[0] / sample_down)
for i in range(num_samples):
#     x0_samp = torch.rand(vf.sys.x_dim) * (x0_up - x0_lo) + x0_lo
    x0_samp = x_samples_validation[i*sample_down,:]
    
    optimal_value, opt_s, opt_alpha = V(x0_samp)
    if optimal_value is None:
        print("opt bad")
        continue
    (x_traj_opt, u_traj_opt, alpha_traj_opt) = vf.sol_to_traj(x0_samp, opt_s, opt_alpha)
    
    (baseline_value, baseline_x_traj, baseline_u_traj) = eval_one_step_ctrl(baseline_ctrl, x0_samp)
    if baseline_value is None:
        print("baseline bad")
        continue
    
    (robust_value, robust_x_traj, robust_u_traj) = eval_one_step_ctrl(robust_ctrl, x0_samp)
    if robust_value is None:
        print("robust bad")
        continue
    
    cost_opt = torch.cat((cost_opt, torch.Tensor([[optimal_value]]).type(vf.dtype)), 0)
    cost_baseline = torch.cat((cost_baseline, torch.Tensor([[baseline_value.item()]]).type(vf.dtype)), 0)
    cost_robust = torch.cat((cost_robust, torch.Tensor([[robust_value.item()]]).type(vf.dtype)), 0)                                                               
    utils.update_progress((i + 1) / num_samples)

In [None]:
fig = plotting_utils.control_perf(cost_opt, cost_baseline, cost_robust, nbin=40, bartop=160, clamp_val=100000)
fig.show()

In [None]:
print("Robust mean: " + str(torch.mean(cost_robust - cost_opt).item()))
print("Baseline mean: " + str(torch.mean(cost_baseline - cost_opt).item()))

In [None]:
x_samples_validation = x_samples_init
# num_samples = 100
sample_down = 10
num_samples = int(x_samples_validation.shape[0] / sample_down)
for i in range(num_samples):
#     x0_samp = torch.rand(vf.sys.x_dim) * (x0_up - x0_lo) + x0_lo
    x0_samp = x_samples_validation[i*sample_down,:]

    optimal_value, opt_s, opt_alpha = V(x0_samp)
    if optimal_value is None:
        print("opt bad")
        continue
    (x_traj_opt, u_traj_opt, alpha_traj_opt) = vf.sol_to_traj(x0_samp, opt_s, opt_alpha)
    
    (baseline_value, baseline_x_traj, baseline_u_traj) = eval_one_step_ctrl(baseline_ctrl, x0_samp)
    if baseline_value is None:
        print("baseline bad")
        continue
    
    (robust_value, robust_x_traj, robust_u_traj) = eval_one_step_ctrl(robust_ctrl, x0_samp)
    if robust_value is None:
        print("robust bad")
        continue

    if robust_value < baseline_value:
        print(x0_samp)
        print("Robust: " + str(robust_value))
        print(robust_u_traj)
        print(robust_x_traj)
        print("Baseline: " + str(baseline_value))
        print(baseline_u_traj)
        print(baseline_x_traj)
        print("Optimal:" + str(optimal_value))
        print(u_traj_opt)
        print(x_traj_opt)
        print("---")
        
    print(i)