In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("..")
sys.path.append(".")
import torch
import numpy as np
import copy
import plotly
import plotly.graph_objs as go
import pickle
from datetime import datetime
plotly.offline.init_notebook_mode(connected=True)

import robust_value_approx.samples_generator as samples_generator
import robust_value_approx.samples_buffer as samples_buffer
import robust_value_approx.value_approximation as value_approximation
import robust_value_approx.training_log as training_log
import robust_value_approx.controllers as controllers

import pendulum_utils
import acrobot_utils



You may have already (directly or indirectly) imported `torch` which uses
`RTLD_GLOBAL`. Using `RTLD_GLOBAL` may cause symbol collisions which manifest
themselves in bugs like "free(): invalid pointer". Please consider importing
`pydrake` (and related C++-wrapped libraries like `cv2`, `open3d`, etc.)
*before* importing `torch`. For more details, see:
https://github.com/pytorch/pytorch/issues/3059#issuecomment-534676459




# Pendulum

In [30]:
vf, sys = pendulum_utils.get_value_function(10)
V = vf.get_value_function()

In [31]:
x0 = torch.Tensor([np.pi+1.5, 1.]).double()
v, res = V(x0)
sys.plot_result(res).show()

In [19]:
# ctrl = lambda x: (torch.zeros(1), torch.zeros(1), None)
# ctrl = controllers.get_optimal_controller(vf)
x_nom = torch.Tensor([np.pi, 0.]).type(vf.dtype)
u_nom = torch.zeros(vf.u_dim[0]).type(vf.dtype)
ctrl, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, torch.diag(torch.Tensor([1., 10.])), torch.diag(torch.Tensor([1.])), vf.u_lo[0], vf.u_up[0])

Q = torch.Tensor(S).type(vf.dtype)
lqr_model = value_approximation.QuadraticModel(2, torch.float64, Q=Q, q=-2.*Q@x_nom, c=x_nom@Q@x_nom)
ctrl = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, lqr_model, vf.u_lo[0], vf.u_up[0], vf.dt_lo, 100)

x0 = torch.Tensor([np.pi+1.5, 0.]).double()
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, 1, sys.dx, ctrl, .1, 50)

fig = go.Figure()
for i in range(2):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

In [20]:
x0_lo = torch.Tensor([np.pi-1.5, -15.]).type(vf.dtype)
x0_up = torch.Tensor([np.pi+1.5, 15.]).type(vf.dtype)

# file options
sys_name = 'pendulum'
x_samples_file = '../data/validation_' + sys_name + '_x.pt'
v_samples_file = '../data/validation_' + sys_name + '_v.pt'
model_file = '../data/' + sys_name + '.pt'

In [22]:
opt = dict(
    max_buffer_size = None,
    batch_size = 10,
    nn_width = None,
    nn_depth = None,

    init_num_samples = 0,
    init_num_trainig_step = 0,
    
    num_generations = 100,
    num_samples_per_generation = 10,
    num_train_step_per_gen = 100,
    
    adv_max_iter = 3,
    adv_conv_tol = 1e-5,
    adv_learning_rate = .25,
    
    num_samples_validation = 500,
)

In [23]:
samples_gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)

data, label = samples_gen.generate_samples(1)
samples_buff = samples_buffer.SamplesBuffer(data.shape[1], label.shape[1], vf.dtype, max_size=opt['max_buffer_size'])

vf_approx = value_approximation.InfiniteHorizonValueFunctionApproximation(vf.dtype, vf.x_dim[0], opt['nn_width'], opt['nn_depth'])
train_log = training_log.TrainingLog(1, prefix="baseline")

In [24]:
x_samples_validation, v_labels_validation = samples_gen.generate_samples(opt['num_samples_validation'])

In [25]:
torch.save(x_samples_validation, x_samples_file)
torch.save(v_labels_validation, v_samples_file)

In [None]:
x_samples_validation = torch.load(x_samples_file)
v_labels_validation = torch.load(v_samples_file)

In [None]:
(x_samples, v_labels) = samples_gen.generate_samples(opt['init_num_samples'])
samples_buff.add_samples(x_samples, v_labels)

In [None]:
for train_step_i in range(opt['init_num_trainig_step']):
    x, v = samples_buff.get_random_samples(opt['batch_size'])
    losses = vf_approx.train_step(x, v)
    train_log.add_train_loss(losses)

In [27]:
samples_buff_adv = copy.deepcopy(samples_buff)
samples_gen_adv = samples_generator.AdversarialSampleGenerator(vf, x0_lo, x0_up, 
                                                               max_iter=opt['adv_max_iter'],
                                                               conv_tol=opt['adv_conv_tol'],
                                                               learning_rate=opt['adv_learning_rate'])
vf_approx_adv = copy.deepcopy(vf_approx)
train_log_adv = training_log.TrainingLog.get_copy(
    train_log, prefix="adversarial", keep_writer=True)

In [28]:
validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
train_log_adv.add_validation_loss(validation_losses)
validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
train_log.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    # adverserial samples
    (x_samples, v_labels) = samples_gen_adv.generate_samples(opt['num_samples_per_generation'], vf_approx_adv)
    samples_buff_adv.add_samples(x_samples, v_labels)
    # random samples
    (x_samples, v_labels) = samples_gen.generate_samples(opt['num_samples_per_generation'])
    samples_buff.add_samples(x_samples, v_labels)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff_adv.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff_adv.get_samples_from_indices(samples_indices)
        losses = vf_approx_adv.train_step(x, v)
        train_log_adv.add_train_loss(losses)
        x, v = samples_buff.get_samples_from_indices(samples_indices)
        losses = vf_approx.train_step(x, v)
        train_log.add_train_loss(losses)
    validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
    train_log_adv.add_validation_loss(validation_losses)
    validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
    train_log.add_validation_loss(validation_losses)

In [41]:
# controller params
dt_ctrl = .2
num_samples = 100

# benchmark params
x0 = torch.Tensor([np.pi, 0.]).type(vf.dtype)
x0_eps = torch.Tensor([1.5, 15.]).type(vf.dtype)

x_goal = torch.Tensor([np.pi, 0.]).type(vf.dtype)
num_breaks = [10, 10]
dt_sim = .1
N = 20

In [42]:
ctrl_baseline = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, vf_approx.model, vf.u_lo[0], vf.u_up[0], dt_ctrl, num_samples)
bench_baseline = controllers.benchmark_controller(u_min.shape[0], sys.dx, ctrl_baseline, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [43]:
ctrl_adv = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, vf_approx_adv.model, vf.u_lo[0], vf.u_up[0], dt_ctrl, num_samples)
bench_adv = controllers.benchmark_controller(u_min.shape[0], sys.dx, ctrl_adv, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [44]:
Q = torch.diag(torch.Tensor([1., 10.]))
R = torch.diag(torch.Tensor([1.]))
x_nom = torch.Tensor([np.pi, 0.]).type(vf.dtype)
u_nom = torch.zeros(u_min.shape[0]).type(vf.dtype)
ctrl_lqr, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])
bench_lqr = controllers.benchmark_controller(u_min.shape[0], sys.dx, ctrl_lqr, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [47]:
conv_thresh = 1.5
zmin = 0.
zmax = 3.

baseline_plot = True
adversarial_plot = True
lqr_plot = True

if baseline_plot:
    print("Baseline")
    fig = go.Figure()
    fig.add_trace(go.Heatmap(
        z = bench_baseline.detach().numpy(),
        zmin = zmin,
        zmax = zmax,
        ))
    fig.show()

if adversarial_plot:
    print("Adversarial")
    fig = go.Figure()
    fig.add_trace(go.Heatmap(
        z = bench_adv.detach().numpy(),
        zmin = zmin,
        zmax = zmax,
        ))
    fig.show()

if lqr_plot:
    print("LQR")
    fig = go.Figure()
    fig.add_trace(go.Heatmap(
        z = bench_lqr.detach().numpy(),
        zmin = zmin,
        zmax = zmax,
        ))
    fig.show()

if baseline_plot:
    print("Baseline")
    fig = go.Figure()
    fig.add_trace(go.Heatmap(
        z = torch.Tensor(bench_baseline.detach().numpy() > conv_thresh).type(vf.dtype),
        ))
    fig.show()

if adversarial_plot:
    print("Adversarial")
    fig = go.Figure()
    fig.add_trace(go.Heatmap(
        z = torch.Tensor(bench_adv.detach().numpy() > conv_thresh).type(vf.dtype),
        ))
    fig.show()

if lqr_plot:
    print("LQR")
    fig = go.Figure()
    fig.add_trace(go.Heatmap(
        z = torch.Tensor(bench_lqr.detach().numpy() > conv_thresh).type(vf.dtype),
        ))
    fig.show()

Baseline


Adversarial


LQR


Baseline


Adversarial


LQR


In [48]:
if baseline_plot:
    print("Baseline")
    fig = go.Figure()
    fig.add_trace(go.Heatmap(
        z = (bench_baseline-bench_adv).detach().numpy(),
        ))
    fig.show()

Baseline


# Acrobot

In [121]:
vf, sys = acrobot_utils.get_value_function(20)
V = vf.get_value_function()

In [122]:
eps = 0.1
x0 = torch.Tensor([np.pi+eps, -eps, 0., 0.]).double()
v, res = V(x0)
sys.plot_result(res).show()

In [123]:
res['u_traj']

[tensor([26.8483], dtype=torch.float64),
 tensor([26.5273], dtype=torch.float64),
 tensor([-17.1352], dtype=torch.float64),
 tensor([-8.9664], dtype=torch.float64),
 tensor([-9.5458], dtype=torch.float64),
 tensor([-8.9031], dtype=torch.float64),
 tensor([-7.9167], dtype=torch.float64),
 tensor([-6.7804], dtype=torch.float64),
 tensor([-5.6710], dtype=torch.float64),
 tensor([-4.6848], dtype=torch.float64),
 tensor([-3.8501], dtype=torch.float64),
 tensor([-3.1557], dtype=torch.float64),
 tensor([-2.5766], dtype=torch.float64),
 tensor([-2.0890], dtype=torch.float64),
 tensor([-1.6741], dtype=torch.float64),
 tensor([-1.3143], dtype=torch.float64),
 tensor([-1.0223], dtype=torch.float64),
 tensor([-0.4953], dtype=torch.float64),
 tensor([-2.7877], dtype=torch.float64),
 tensor([-2.6945], dtype=torch.float64)]

In [124]:
# ctrl = lambda x: (torch.zeros(1), torch.zeros(1), None)

ctrl = controllers.get_optimal_controller(vf)

# Q = torch.diag(torch.Tensor([1., 1., .1, .1]))
# R = torch.diag(torch.Tensor([1.]))
# x_nom = torch.Tensor([np.pi, 0., 0., 0.]).double()
# u_nom = torch.zeros(vf.u_dim[0]).type(vf.dtype)
# ctrl, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])

# Q = torch.Tensor(S).type(vf.dtype)
# lqr_model = value_approximation.QuadraticModel(vf.x_dim[0], vf.dtype, Q=Q, q=-2.*Q@x_nom, c=x_nom@Q@x_nom)
# ctrl = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, lqr_model, vf.u_lo[0], vf.u_up[0], vf.dt_lo, 500)

eps = .05
x0 = torch.Tensor([np.pi+eps, -eps, 0., 0.]).double()
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, 1, sys.dx, ctrl, .1, 10)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

print(S)

[[18315.53563872  5083.10441454  7807.11020051  2805.51200954]
 [ 5083.10441454  1425.67946699  2169.23039866   782.86083588]
 [ 7807.11020051  2169.23039866  3328.31577397  1196.6087852 ]
 [ 2805.51200954   782.86083588  1196.6087852    430.97320533]]


In [96]:
eps = .05
x0_lo = torch.Tensor([np.pi-eps, -eps, -2., -3.]).type(vf.dtype)
x0_up = torch.Tensor([np.pi+eps, eps, 2., 3.]).type(vf.dtype)

# file options
sys_name = 'acrobot'
x_samples_file = '../data/validation_' + sys_name + '_x.pt'
v_samples_file = '../data/validation_' + sys_name + '_v.pt'
model_file = '../data/' + sys_name + '.pt'

In [104]:
opt = dict(
    max_buffer_size = None,
    batch_size = 60,
    nn_width = 200,
    nn_depth = 2,

    init_num_samples = 100,
    init_num_trainig_step = 1000,
    
    num_generations = 5,
    num_samples_per_generation = 10,
    num_train_step_per_gen = 100,
    
    adv_max_iter = 3,
    adv_conv_tol = 1e-5,
    adv_learning_rate = 1.,
    
    num_samples_validation = 100,
)

In [98]:
samples_gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)

data, label = samples_gen.generate_samples(1)
samples_buff = samples_buffer.SamplesBuffer(data.shape[1], label.shape[1], vf.dtype, max_size=opt['max_buffer_size'])

vf_approx = value_approximation.InfiniteHorizonValueFunctionApproximation(vf.dtype, vf.x_dim[0], opt['nn_width'], opt['nn_depth'])
train_log = training_log.TrainingLog(1, prefix="baseline")

In [29]:
x_samples_validation, v_labels_validation = samples_gen.generate_samples(opt['num_samples_validation'])

In [30]:
torch.save(x_samples_validation, x_samples_file)
torch.save(v_labels_validation, v_samples_file)

In [99]:
x_samples_validation = torch.load(x_samples_file)
v_labels_validation = torch.load(v_samples_file)

In [100]:
(x_samples, v_labels) = samples_gen.generate_samples(opt['init_num_samples'])
samples_buff.add_samples(x_samples, v_labels)

In [101]:
for train_step_i in range(opt['init_num_trainig_step']):
    x, v = samples_buff.get_random_samples(opt['batch_size'])
    losses = vf_approx.train_step(x, v)
    train_log.add_train_loss(losses)

In [102]:
samples_buff_adv = copy.deepcopy(samples_buff)
samples_gen_adv = samples_generator.AdversarialSampleGenerator(vf, x0_lo, x0_up, 
                                                               max_iter=opt['adv_max_iter'],
                                                               conv_tol=opt['adv_conv_tol'],
                                                               learning_rate=opt['adv_learning_rate'])
vf_approx_adv = copy.deepcopy(vf_approx)
train_log_adv = training_log.TrainingLog.get_copy(
    train_log, prefix="adversarial", keep_writer=True)

In [105]:
validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
train_log_adv.add_validation_loss(validation_losses)
validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
train_log.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    # adverserial samples
    (x_samples, v_labels) = samples_gen_adv.generate_samples(opt['num_samples_per_generation'], vf_approx_adv)
    samples_buff_adv.add_samples(x_samples, v_labels)
    # random samples
    (x_samples, v_labels) = samples_gen.generate_samples(opt['num_samples_per_generation'])
    samples_buff.add_samples(x_samples, v_labels)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff_adv.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff_adv.get_samples_from_indices(samples_indices)
        losses = vf_approx_adv.train_step(x, v)
        train_log_adv.add_train_loss(losses)
        x, v = samples_buff.get_samples_from_indices(samples_indices)
        losses = vf_approx.train_step(x, v)
        train_log.add_train_loss(losses)
    validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
    train_log_adv.add_validation_loss(validation_losses)
    validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
    train_log.add_validation_loss(validation_losses)

In [110]:
dt_ctrl = .2
num_samples = 1000
# model = vf_approx.model
model = vf_approx_adv.model
ctrl = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, model, vf.u_lo[0], vf.u_up[0], dt_ctrl, num_samples)
eps = 0.1
x0 = torch.Tensor([np.pi+eps, -eps, 0., 0.]).double()
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, 1, sys.dx, ctrl, .1, 20)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

In [58]:
Q = vf_approx.model.sqrtQ.t()@vf_approx.model.sqrtQ
print(Q)
print(S)
print(vf_approx.model.sqrtQ)

tensor([[1086.3273,  320.3016,  491.1612,  174.4732],
        [ 320.3016,   95.9192,  145.0247,   51.9021],
        [ 491.1612,  145.0247,  222.1646,   78.9756],
        [ 174.4732,   51.9021,   78.9756,   28.2056]], dtype=torch.float64,
       grad_fn=<MmBackward>)
tensor([[1086.3273,  320.3016,  491.1612,  174.4732],
        [ 320.3016,   95.9192,  145.0247,   51.9021],
        [ 491.1612,  145.0247,  222.1646,   78.9756],
        [ 174.4732,   51.9021,   78.9756,   28.2056]], dtype=torch.float64)
Parameter containing:
tensor([[28.8661,  8.1513, 12.9025,  4.4895],
        [ 8.1513,  3.5311,  3.7998,  1.6024],
        [12.9025,  3.7998,  6.0626,  2.1205],
        [ 4.4895,  1.6024,  2.1205,  0.9929]], dtype=torch.float64,
       requires_grad=True)


In [155]:
# controller params
dt_ctrl = .1
num_samples = 500

# benchmark params
x0 = torch.Tensor([np.pi, 0., 0., 0.]).type(vf.dtype)
x0_eps = torch.Tensor([.1, .1, 0., 0.]).type(vf.dtype)
x_goal = torch.Tensor([np.pi, 0., 0., 0.]).type(vf.dtype)
num_breaks = [5, 5]
dt_sim = .1
N = 20

In [157]:
ctrl_baseline = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, vf_approx.model, vf.u_lo[0], vf.u_up[0], dt_ctrl, num_samples)
bench_baseline = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_baseline, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [156]:
ctrl_adv = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, vf_approx_adv.model, vf.u_lo[0], vf.u_up[0], dt_ctrl, num_samples)
bench_adv = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_adv, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [158]:
Q = torch.diag(torch.Tensor([1., 1., .1, .1]))
R = torch.diag(torch.Tensor([.1]))
x_nom = torch.Tensor([np.pi, 0., 0., 0.]).type(vf.dtype)
u_nom = torch.zeros(u_min.shape[0]).type(vf.dtype)
ctrl_lqr, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])
bench_lqr = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_lqr, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [163]:
conv_thresh = 5.
zmin = 0.
zmax = 50.

print("Baseline")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_baseline.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("Adversarial")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_adv.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("LQR")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_lqr.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("Baseline")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_baseline.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()

print("Adversarial")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_adv.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()

print("LQR")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_lqr.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()

Baseline


Adversarial


LQR


Baseline


Adversarial


LQR
