In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.append("..")
sys.path.append(".")
import torch
import numpy as np
import copy
import plotly
import plotly.graph_objs as go
import pickle
from datetime import datetime
plotly.offline.init_notebook_mode(connected=True)

import robust_value_approx.samples_generator as samples_generator
import robust_value_approx.samples_buffer as samples_buffer
import robust_value_approx.value_approximation as value_approximation
import robust_value_approx.training_log as training_log
import robust_value_approx.controllers as controllers

import pendulum_utils
import acrobot_utils

# Acrobot

In [4]:
vf, sys = acrobot_utils.get_value_function(40)
V = vf.get_value_function()

In [None]:
x0 = torch.tensor([np.pi+.2, .1, 0.5, 3.], dtype=vf.dtype)
v, res = V(x0)
sys.plot_result(res).show()

In [None]:
Q = torch.diag(torch.tensor([.1, .1, .1, .1]))
R = torch.diag(torch.tensor([.01]))
x_nom = torch.tensor([np.pi, 0., 0., 0.], dtype=vf.dtype)
u_nom = torch.zeros(vf.u_dim[0], dtype=vf.dtype)
ctrl, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])
S = torch.tensor(S, dtype=vf.dtype)

lqr_model = value_approximation.QuadraticModel(vf.dtype, vf.x_dim[0], Q=S, q=-2.*S@x_nom, c=x_nom@S@x_nom)
lqr_vf_approx = value_approximation.InfiniteHorizonValueFunctionApproximation(vf.dtype, vf.x_dim[0], lqr_model)

vf_ctrl_N = 5
ctrl = controllers.get_limited_lookahead_controller(acrobot_utils.get_value_function(vf_ctrl_N), lqr_vf_approx)
ctrl = controllers.get_limited_lookahead_controller(acrobot_utils.get_value_function(vf_ctrl_N))

x0 = torch.tensor([np.pi+.1, .1, 0., 0.], dtype=vf.dtype)
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, 1, sys.dx, ctrl, .1, 50)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

print(S)

In [5]:
x0_lo = torch.tensor([np.pi, 0., 0., 0.], dtype=vf.dtype) - 1.
x0_up = torch.tensor([np.pi, 0., 0., 0.], dtype=vf.dtype) + 1.

# file options
sys_name = 'acrobot'
x_samples_file = '../data/validation_' + sys_name + '_x.pt'
v_samples_file = '../data/validation_' + sys_name + '_v.pt'
model_file = '../data/' + sys_name + '.pt'

In [6]:
opt = dict(
    max_buffer_size = None,
    batch_size = 30,
    learning_rate = 1e-3,
    
    init_num_samples = 100,
    init_num_trainig_step = 10000,
    
    num_generations = 10,
    num_samples_per_generation = 10,
    num_train_step_per_gen = 5000,
    
    adv_max_iter = 3,
    adv_conv_tol = 1e-5,
    adv_learning_rate = .25,
    
    num_samples_validation = 100,
)

In [7]:
samples_gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)

In [8]:
x_validation, v_validation = samples_gen.generate_samples(opt['num_samples_validation'], show_progress=True)

Progress: [########################################] 100.0%


In [None]:
torch.save(x_validation, x_samples_file)
torch.save(v_validation, v_samples_file)

In [None]:
x_validation = torch.load(x_samples_file)
v_validation = torch.load(v_samples_file)

In [None]:
scale = torch.mean(v_validation)
model = value_approximation.QuadraticModel(vf.dtype, vf.x_dim[0], scaling=scale)
# model = value_approximation.NeuralNetworkModel(vf.dtype, vf.x_dim[0], 50, 1, scaling=scale)
vf_approx = value_approximation.InfiniteHorizonValueFunctionApproximation(vf.dtype, vf.x_dim[0], model,
                                                                          learning_rate=opt['learning_rate'])
train_log = training_log.TrainingLog(1, prefix="baseline")

In [None]:
samples_buff = samples_buffer.SamplesBuffer(vf.x_dim[0], 1, vf.dtype, max_size=opt['max_buffer_size'])

In [None]:
x_init, v_init = samples_gen.generate_samples(opt['init_num_samples'], show_progress=True)

In [None]:
samples_buff.add_samples(x_init, v_init)
# samples_buff.add_samples(x_validation, v_validation)

In [None]:
for train_step_i in range(opt['init_num_trainig_step']):
    x, v = samples_buff.get_random_samples(opt['batch_size'])
    losses = vf_approx.train_step(x, v)
    train_log.add_train_loss(losses)

In [None]:
samples_buff_adv = copy.deepcopy(samples_buff)
samples_gen_adv = samples_generator.AdversarialSampleGenerator(vf, x0_lo, x0_up, 
                                                               max_iter=opt['adv_max_iter'],
                                                               conv_tol=opt['adv_conv_tol'],
                                                               learning_rate=opt['adv_learning_rate'])
vf_approx_adv = copy.deepcopy(vf_approx)
train_log_adv = training_log.TrainingLog.get_copy(train_log,
                                                  prefix="adversarial",
                                                  keep_writer=True)

In [1]:
validation_losses = vf_approx_adv.validation_loss(x_validation, v_validation)
train_log_adv.add_validation_loss(validation_losses)
validation_losses = vf_approx.validation_loss(x_validation, v_validation)
train_log.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    # adverserial samples
    (x, v) = samples_gen_adv.generate_samples(opt['num_samples_per_generation'], vf_approx_adv)
    samples_buff_adv.add_samples(x, v)
    # random samples
    (x, v) = samples_gen.generate_samples(opt['num_samples_per_generation'])
    samples_buff.add_samples(x, v)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff_adv.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff_adv.get_samples_from_indices(samples_indices)
        losses = vf_approx_adv.train_step(x, v)
        train_log_adv.add_train_loss(losses)
        x, v = samples_buff.get_samples_from_indices(samples_indices)
        losses = vf_approx.train_step(x, v)
        train_log.add_train_loss(losses)
    validation_losses = vf_approx_adv.validation_loss(x_validation, v_validation)
    train_log_adv.add_validation_loss(validation_losses)
    validation_losses = vf_approx.validation_loss(x_validation, v_validation)
    train_log.add_validation_loss(validation_losses)

NameError: name 'vf_approx_adv' is not defined

In [None]:
x0 = torch.tensor([np.pi+.05, .05, 0., 0.], dtype=vf.dtype)
vf_ctrl_N = 10

In [None]:
vf_ctrl, sys_ctrl = acrobot_utils.get_value_function(vf_ctrl_N)
ctrl = controllers.get_limited_lookahead_controller(vf_ctrl, vf_approx)
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, 1, sys.dx, ctrl, .1, 50)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

In [None]:
vf_ctrl, sys_ctrl = acrobot_utils.get_value_function(vf_ctrl_N)
ctrl = controllers.get_limited_lookahead_controller(vf_ctrl, vf_approx_adv)
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, 1, sys.dx, ctrl, .1, 50)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

In [None]:
vf_ctrl, sys_ctrl = acrobot_utils.get_value_function(vf_ctrl_N)
ctrl = controllers.get_limited_lookahead_controller(vf_ctrl)
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, 1, sys.dx, ctrl, .1, 50)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

In [None]:
# benchmark params
x0 = torch.tensor([np.pi, 0., 0., 0.], dtype=vf.dtype)
x0_eps = torch.Tensor([.15, .15, 0., 0.]).type(vf.dtype)
x_goal = torch.Tensor([np.pi, 0., 0., 0.]).type(vf.dtype)
num_breaks = [5, 5]
dt_sim = .1
N = 50

vf_ctrl_N = 10

In [None]:
vf_ctrl, sys_ctrl = acrobot_utils.get_value_function(vf_ctrl_N)
ctrl_baseline = controllers.get_limited_lookahead_controller(vf_ctrl, vf_approx)
bench_baseline = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_baseline, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [None]:
vf_ctrl, sys_ctrl = acrobot_utils.get_value_function(vf_ctrl_N)
ctrl_adv = controllers.get_limited_lookahead_controller(vf_ctrl, vf_approx_adv)
bench_adv = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_adv, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [None]:
Q = torch.diag(torch.Tensor([.1, .1, .1, .1]))
R = torch.diag(torch.Tensor([.01]))
x_nom = torch.Tensor([np.pi, 0., 0., 0.]).type(vf.dtype)
u_nom = torch.zeros(vf.u_lo[0].shape[0]).type(vf.dtype)
ctrl_lqr, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])
bench_lqr = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_lqr, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [None]:
conv_thresh = 1.
zmin = 0.
zmax = None

print("Baseline")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_baseline.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("Adversarial")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_adv.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("LQR")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_lqr.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("Baseline")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_baseline.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()

print("Adversarial")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_adv.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()

print("LQR")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_lqr.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()

In [None]:
vf_ctrl, sys_ctrl = acrobot_utils.get_value_function(vf_ctrl_N)
ctrl_no_model = controllers.get_limited_lookahead_controller(vf_ctrl)
bench_no_model = controllers.benchmark_controller(vf.u_dim[0], sys.dx, ctrl_no_model, x0, x0_eps, num_breaks, x_goal, dt_sim, N)

In [None]:
print("No Model")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_no_model.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.show()

print("No Model")
fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_no_model.detach().numpy() > conv_thresh).type(vf.dtype),
    ))
fig.show()