In [92]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [93]:
import sys
sys.path.append("..")
sys.path.append(".")
import torch
import numpy as np
import copy
import plotly
import plotly.graph_objs as go
import pickle
from datetime import datetime
plotly.offline.init_notebook_mode(connected=True)

import robust_value_approx.samples_generator as samples_generator
import robust_value_approx.samples_buffer as samples_buffer
import robust_value_approx.value_approximation as value_approximation
import robust_value_approx.training_log as training_log
import robust_value_approx.controllers as controllers

import pendulum_utils
import acrobot_utils

# Acrobot

In [94]:
vf, sys = acrobot_utils.get_value_function(25)
V = vf.get_value_function()

In [95]:
x0 = torch.tensor([np.pi+.2, -.1, 0.5, 1.], dtype=vf.dtype)
v, res = V(x0)
sys.plot_result(res).show()

In [410]:
# ctrl = lambda x: (torch.zeros(1), torch.zeros(1), None)

Q = torch.diag(torch.tensor([1., 1., 1., 1.]))
R = torch.diag(torch.tensor([1.]))
x_nom = torch.tensor([np.pi, 0., 0., 0.], dtype=vf.dtype)
u_nom = torch.zeros(vf.u_dim[0], dtype=vf.dtype)
ctrl, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])
S = torch.tensor(S, dtype=vf.dtype)
lqr_model = value_approximation.QuadraticModel(vf.x_dim[0], vf.dtype, Q=S, q=-2.*S@x_nom, c=x_nom@S@x_nom)

# ctrl = controllers.get_sampling_infinite_horizon_controller(sys.dx,
#                                                             vf.step_cost,
#                                                             lqr_model,
#                                                             vf.x_lo[0],
#                                                             vf.x_up[0],
#                                                             vf.u_lo[0],
#                                                             vf.u_up[0],
#                                                             vf.dt_lo,
#                                                             500)

vf_ctrl, sys_ctrl = acrobot_utils.get_value_function(5)
ctrl = controllers.get_limited_lookahead_controller(vf_ctrl, lqr_model)
# ctrl = controllers.get_limited_lookahead_controller(vf_ctrl)

x0 = torch.tensor([np.pi+.1, .1, 0., 0.], dtype=vf.dtype)
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, 1, sys.dx, ctrl, .1, 50)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

In [542]:
x0_lo = torch.tensor([np.pi-.1, -1., -2., -2.], dtype=vf.dtype)
x0_up = torch.tensor([np.pi+.1, 1., 2., 2.], dtype=vf.dtype)

# file options
sys_name = 'acrobot'
x_samples_file = '../data/validation_' + sys_name + '_x.pt'
v_samples_file = '../data/validation_' + sys_name + '_v.pt'
model_file = '../data/' + sys_name + '.pt'

In [553]:
opt = dict(
    max_buffer_size = None,
    batch_size = 1000,
    nn_width = None,
    nn_depth = None,
    learning_rate = .001,
    
    init_num_samples = 10,
    init_num_trainig_step = 30000,
    
    num_generations = 10,
    num_samples_per_generation = 10,
    num_train_step_per_gen = 10000,
    
    adv_max_iter = 2,
    adv_conv_tol = 1e-5,
    adv_learning_rate = .2,
    
    num_samples_validation = 1000,
)

In [554]:
samples_gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)
samples_buff = samples_buffer.SamplesBuffer(vf.x_dim[0], 1, vf.dtype, max_size=opt['max_buffer_size'])

In [555]:
vf_approx = value_approximation.InfiniteHorizonValueFunctionApproximation(vf.dtype,
                                                                          vf.x_dim[0],
                                                                          learning_rate=opt['learning_rate'],
                                                                          nn_width=opt['nn_width'],
                                                                          nn_depth=opt['nn_depth'])
train_log = training_log.TrainingLog(1, prefix="baseline")

In [None]:
warm_start_radius = None
x_samples_validation, v_labels_validation = samples_gen.generate_samples(opt['num_samples_validation'],
                                                                         show_progress=True,
                                                                         warm_start_radius=warm_start_radius)

In [None]:
torch.save(x_samples_validation, x_samples_file)
torch.save(v_labels_validation, v_samples_file)

In [471]:
x_samples_validation = torch.load(x_samples_file)
v_labels_validation = torch.load(v_samples_file)

In [291]:
x = torch.rand((1000, 4), dtype=vf.dtype)
v = lqr_model(x)
samples_buff.add_samples(x, v.detach())

In [556]:
x = x_samples_validation[v_labels_validation[:,0] < 50, :]
v = v_labels_validation[v_labels_validation[:,0] < 50, :]
samples_buff.add_samples(x, v)

In [546]:
x = x_samples_validation
v = lqr_model(x_samples_validation).detach()
samples_buff.add_samples(x, v)

AttributeError: 'QuadraticModel' object has no attribute 'x_desired'

In [440]:
# (x_samples, v_labels) = samples_gen.generate_samples(opt['init_num_samples'],
#                                                      show_progress=True)
# samples_buff.add_samples(x_samples, v_labels)

# TEMPORARY
samples_buff.add_samples(x_samples_validation, v_labels_validation)

In [441]:
vf_approx.model = copy.deepcopy(lqr_model)

In [559]:
for train_step_i in range(opt['init_num_trainig_step']):
    x, v = samples_buff.get_random_samples(opt['batch_size'])
    losses = vf_approx.train_step(x, v)
    train_log.add_train_loss(losses)

In [565]:
print(lqr_model.sqrtQ)
print(vf_approx.model.sqrtQ)
print(lqr_model.q)
print(vf_approx.model.q)
print(lqr_model.c)
print(vf_approx.model.c)
print(vf_approx.model.x_desired)

Parameter containing:
tensor([[81.8209, 22.6451, 36.6020, 12.6230],
        [22.6451,  9.5495, 10.6264,  4.5628],
        [36.6020, 10.6264, 17.0011,  5.9124],
        [12.6230,  4.5628,  5.9124,  2.7783]], dtype=torch.float64,
       requires_grad=True)
Parameter containing:
tensor([[ 4.8330e+00, -2.4108e-01, -1.9685e+00, -8.1444e-01],
        [ 1.0530e+01, -5.1563e-01,  8.0975e-01, -3.7840e-01],
        [-1.2380e+01,  4.5731e-01, -1.1316e-02, -8.0703e-01],
        [ 8.4437e+00,  1.3753e+00,  1.5636e+00,  4.3315e-01]],
       dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([-54704.5861, -15806.2200, -24707.6967,  -8718.6892],
       dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([ 0.4949, -0.0286,  0.1477,  0.1260], dtype=torch.float64,
       requires_grad=True)
Parameter containing:
tensor(85929.7629, dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([0.5794], dtype=torch.float64, requires_grad=True)
tensor([3.1416, 0.

In [541]:
print(lqr_model.sqrtQ)
print(vf_approx.model.sqrtQ)
print(lqr_model.q)
print(vf_approx.model.q)
print(lqr_model.c)
print(vf_approx.model.c)

Parameter containing:
tensor([[81.8209, 22.6451, 36.6020, 12.6230],
        [22.6451,  9.5495, 10.6264,  4.5628],
        [36.6020, 10.6264, 17.0011,  5.9124],
        [12.6230,  4.5628,  5.9124,  2.7783]], dtype=torch.float64,
       requires_grad=True)
Parameter containing:
tensor([[ 0.1803,  0.1575,  0.5409,  0.4535],
        [ 0.3171,  0.5267,  1.4986,  0.3109],
        [-0.3726, -0.6223, -1.7684, -0.3588],
        [-0.2819, -0.2350, -0.8212, -0.7312]], dtype=torch.float64,
       requires_grad=True)
Parameter containing:
tensor([-54704.5861, -15806.2200, -24707.6967,  -8718.6892],
       dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([ 0.2115, -2.4843, -8.1372, -2.4630], dtype=torch.float64,
       requires_grad=True)
Parameter containing:
tensor(85929.7629, dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([0.5740], dtype=torch.float64, requires_grad=True)


In [449]:
print(torch.cat((vf_approx.model(x_samples_validation), v_labels_validation), axis=1))
print(max(v_labels_validation))
print(max(vf_approx.model(x_samples_validation)))

tensor([[2.0354e+02, 1.3101e+00],
        [3.1014e+03, 1.8534e+01],
        [2.6130e+01, 4.8519e-01],
        ...,
        [6.1773e+03, 4.7435e+01],
        [3.7383e+00, 5.9940e+01],
        [1.6926e+03, 1.5182e+01]], dtype=torch.float64, grad_fn=<CatBackward>)
tensor([572.7670], dtype=torch.float64)
tensor([19459.5020], dtype=torch.float64, grad_fn=<SelectBackward>)


In [476]:
fig = go.Figure()
fig.add_trace(go.Scatter(
#     x = x[:,0],
#     y = x[:,1],
#     z = v[:,0]
    y = v[:,0],
))
fig.show()

In [499]:
torch.mean(v[:,0])

tensor(26.0348, dtype=torch.float64)

In [None]:
samples_buff_adv = copy.deepcopy(samples_buff)
samples_gen_adv = samples_generator.AdversarialSampleGenerator(vf, x0_lo, x0_up, 
                                                               max_iter=opt['adv_max_iter'],
                                                               conv_tol=opt['adv_conv_tol'],
                                                               learning_rate=opt['adv_learning_rate'])
vf_approx_adv = copy.deepcopy(vf_approx)
train_log_adv = training_log.TrainingLog.get_copy(train_log,
                                                  prefix="adversarial",
                                                  keep_writer=True)

In [None]:
validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
train_log_adv.add_validation_loss(validation_losses)
validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
train_log.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    # adverserial samples
    (x_samples, v_labels) = samples_gen_adv.generate_samples(opt['num_samples_per_generation'], vf_approx_adv)
    samples_buff_adv.add_samples(x_samples, v_labels)
    # random samples
    (x_samples, v_labels) = samples_gen.generate_samples(opt['num_samples_per_generation'])
    samples_buff.add_samples(x_samples, v_labels)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff_adv.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff_adv.get_samples_from_indices(samples_indices)
        losses = vf_approx_adv.train_step(x, v)
        train_log_adv.add_train_loss(losses)
        x, v = samples_buff.get_samples_from_indices(samples_indices)
        losses = vf_approx.train_step(x, v)
        train_log.add_train_loss(losses)
    validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
    train_log_adv.add_validation_loss(validation_losses)
    validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
    train_log.add_validation_loss(validation_losses)

In [90]:
# dt_ctrl = .2
# num_samples = 1000
# u_lo = vf.u_lo[0]
# u_up = vf.u_up[0]
# x_lo = torch.tensor([np.pi-.5, -5., -10., -10.], dtype=vf.dtype)
# x_up = torch.tensor([np.pi+.5, 5., 10., 10.], dtype=vf.dtype)
# model = vf_approx.model
# model = vf_approx_adv.model
# ctrl = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, model,
#                                                             x_lo, x_up, u_lo, u_up,
#                                                             dt_ctrl, num_samples)

# ctrl = controllers.get_limited_lookahead_controller(vf)
vf_ctrl, sys_ctrl = acrobot_utils.get_value_function(10)
# ctrl = controllers.get_limited_lookahead_controller(vf_ctrl)
ctrl = controllers.get_limited_lookahead_controller(vf_ctrl, vf_approx.model)

x0 = torch.Tensor([np.pi+.2, -.1, 0., 0.]).double()
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, 1, sys.dx, ctrl, .1, 50)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

In [50]:
ctrl(x0)

(tensor([18.6873], dtype=torch.float64),
 tensor([11.0765], dtype=torch.float64),
 tensor([ 2.9468,  0.6728, -2.4478,  6.7279], dtype=torch.float64))

In [89]:
Q = vf_approx.model.sqrtQ.t()@vf_approx.model.sqrtQ
print(Q)

tensor([[ 6362.2426, -1063.7265, -1383.8910, -2113.2294],
        [-1063.7265,   644.7358,   232.9151,   263.1638],
        [-1383.8910,   232.9151,   514.0397,   289.0659],
        [-2113.2294,   263.1638,   289.0659,   855.4697]], dtype=torch.float64,
       grad_fn=<MmBackward>)


In [82]:
Q = vf_approx.model.sqrtQ.t()@vf_approx.model.sqrtQ
print(Q)

tensor([[ 4.5871, -1.9057, -4.8377, -0.6502],
        [-1.9057,  1.3514,  3.5320,  1.1220],
        [-4.8377,  3.5320,  9.2420,  3.0026],
        [-0.6502,  1.1220,  3.0026,  1.3888]], dtype=torch.float64,
       grad_fn=<MmBackward>)


In [None]:
# controller params
dt_ctrl = .1
num_samples = 500

# benchmark params
x0 = torch.Tensor([np.pi, 0., 0., 0.]).type(vf.dtype)
x0_eps = torch.Tensor([.1, .1, 0., 0.]).type(vf.dtype)
x_goal = torch.Tensor([np.pi, 0., 0., 0.]).type(vf.dtype)
num_breaks = [5, 5]
dt_sim = .1
N = 30

In [None]:
ctrl_baseline = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, vf_approx.model, vf.u_lo[0], vf.u_up[0], dt_ctrl, num_samples)
bench_baseline = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_baseline, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [None]:
ctrl_adv = controllers.get_sampling_infinite_horizon_controller(sys.dx, vf.step_cost, vf_approx_adv.model, vf.u_lo[0], vf.u_up[0], dt_ctrl, num_samples)
bench_adv = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_adv, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [None]:
Q = torch.diag(torch.Tensor([1., 1., .1, .1]))
R = torch.diag(torch.Tensor([.1]))
x_nom = torch.Tensor([np.pi, 0., 0., 0.]).type(vf.dtype)
u_nom = torch.zeros(vf.u_lo[0].shape[0]).type(vf.dtype)
ctrl_lqr, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])
# bench_lqr = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_lqr, x0, x0_eps, num_breaks, x_goal, dt_sim, N)
print(S)

In [None]:
conv_thresh = 5.
zmin = 0.
zmax = 50.

print("Baseline")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_baseline.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("Adversarial")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_adv.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("LQR")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_lqr.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("Baseline")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_baseline.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()

print("Adversarial")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_adv.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()

print("LQR")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_lqr.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()