In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import sys
sys.path.append("..")
sys.path.append(".")
import torch
import numpy as np
import copy
import plotly
import plotly.graph_objs as go
import pickle
from datetime import datetime
plotly.offline.init_notebook_mode(connected=True)
import plotting_utils

import robust_value_approx.samples_generator as samples_generator
import robust_value_approx.samples_buffer as samples_buffer
import robust_value_approx.value_approximation as value_approximation
import robust_value_approx.training_log as training_log

In [3]:
import acrobot_utils

vf = acrobot_utils.get_value_function(N=15)

eps = .5
x0_lo = torch.Tensor([np.pi-eps, -eps, -1., -1.]).type(vf.dtype)
x0_up = torch.Tensor([np.pi+eps, eps, 1., 1.]).type(vf.dtype)

# file options
sys_name = 'acrobot'
x_samples_file = '../data/validation_' + sys_name + '_x'
v_samples_file = '../data/validation_' + sys_name + '_v'
model_file = '../data/' + sys_name

In [21]:
opt = dict(
    max_buffer_size = None,
#     max_buffer_size = 1000,
    batch_size = 60,
    nn_width = 128,
    nn_depth = 1,

    init_num_samples = 500,
    init_num_trainig_step = 10000,
    
    num_generations = 10,
    num_samples_per_generation = 100,
    num_train_step_per_gen = 10000,
    
    adv_max_iter = 3,
    adv_conv_tol = 1e-5,
    adv_learning_rate = .25,
    
    num_samples_validation = 1000,
)

In [6]:
samples_buff = samples_buffer.SamplesBuffer(x0_lo.shape[0]*(vf.N-1), vf.N-1, vf.dtype, max_size=opt['max_buffer_size'])
samples_gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)
vf_approx = value_approximation.InfiniteHorizonValueFunctionApproximation(x0_lo, x0_up, opt['nn_width'], opt['nn_depth'])
train_log = training_log.TrainingLog(1, prefix="baseline")

In [300]:
x_samples_validation, v_labels_validation = samples_gen.generate_samples(opt['num_samples_validation'])

In [301]:
torch.save(x_samples_validation, x_samples_file)
torch.save(v_labels_validation, v_samples_file)

In [7]:
x_samples_validation = torch.load(x_samples_file)
v_labels_validation = torch.load(v_samples_file)

In [8]:
x_samples_validation.shape

torch.Size([1000, 56])

In [9]:
samples_buff.add_samples(x_samples_validation, v_labels_validation)

In [302]:
(x_samples, v_labels) = samples_gen.generate_samples(opt['init_num_samples'])
samples_buff.add_samples(x_samples, v_labels)

In [20]:
for train_step_i in range(opt['init_num_trainig_step']):
    x, v = samples_buff.get_random_samples(opt['batch_size'])
    losses = vf_approx.train_step(x, v)
    train_log.add_train_loss(losses)

In [22]:
samples_buff_adv = copy.deepcopy(samples_buff)
samples_gen_adv = samples_generator.AdversarialSampleGenerator(vf, x0_lo, x0_up, 
                                                               max_iter=opt['adv_max_iter'],
                                                               conv_tol=opt['adv_conv_tol'],
                                                               learning_rate=opt['adv_learning_rate'])
vf_approx_adv = copy.deepcopy(vf_approx)
train_log_adv = training_log.TrainingLog.get_copy(
    train_log, prefix="adversarial", keep_writer=True)

In [None]:
validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
train_log_adv.add_validation_loss(validation_losses)
validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
train_log.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    # adverserial samples
    (x_samples, v_labels) = samples_gen_adv.generate_samples(opt['num_samples_per_generation'], vf_approx_adv)
    samples_buff_adv.add_samples(x_samples, v_labels)
    # random samples
    (x_samples, v_labels) = samples_gen.generate_samples(opt['num_samples_per_generation'])
    samples_buff.add_samples(x_samples, v_labels)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff_adv.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff_adv.get_samples_from_indices(samples_indices)
        losses = vf_approx_adv.train_step(x, v)
        train_log_adv.add_train_loss(losses)
        x, v = samples_buff.get_samples_from_indices(samples_indices)
        losses = vf_approx.train_step(x, v)
        train_log.add_train_loss(losses)
    validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
    train_log_adv.add_validation_loss(validation_losses)
    validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
    train_log.add_validation_loss(validation_losses)
    torch.save(vf_approx.model, 'vf_approx_model')
    torch.save(vf_approx_adv.model, 'vf_approx_adv_model')


No GPU/TPU found, falling back to CPU.


Couldn't retrieve source code for container of type Sequential. It won't be checked for correctness upon loading.


Couldn't retrieve source code for container of type Linear. It won't be checked for correctness upon loading.


Couldn't retrieve source code for container of type ReLU. It won't be checked for correctness upon loading.



In [292]:
import acrobot_utils
acro = acrobot_utils.AcrobotNLP()
vf = acrobot_utils.get_value_function(15)

# ctrl = lambda x: (torch.zeros(1), torch.zeros(1), None)

# ctrl = vf.get_optimal_controller()

# ctrl = value_approximation.get_inifinite_horizon_ctrl(acro.dyn, vf, vf_approx.model)
# ctrl = value_approximation.get_inifinite_horizon_ctrl(acro.dyn, vf, vf_approx_adv.model)

# ctrl = value_approximation.get_sampling_infinite_horizon_controller(acro.dx, vf, vf_approx.model)
ctrl = value_approximation.get_sampling_infinite_horizon_controller(acro.dx, vf, vf_approx_adv.model)

eps = 0.
x0 = torch.Tensor([np.pi+eps, -eps, 0., 0.]).double()
x_traj_sim, t_traj_sim = acro.sim_ctrl(x0, ctrl, .1, 20)

fig = go.Figure()
for i in range(2):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
fig.show()

In [None]:
import acrobot_utils

vf = acrobot_utils.get_value_function(15)
V = vf.get_value_function()
eps = 0.5
x0 = torch.Tensor([np.pi+eps, -eps, 0., 0.]).double()
x_traj, u_traj, alpha_traj = vf.sol_to_traj(x0, *(V(x0)[1:]))
fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        y=x_traj[i,:]
    ))
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    y=u_traj[0,:]
))
fig.show()

In [None]:
x0 = x_traj[:,1]
x_traj, u_traj, alpha_traj = vf.sol_to_traj(x0, *(V(x0)[1:]))
fig = go.Figure()
for i in range(2):
    fig.add_trace(go.Scatter(
        y=x_traj[i,:]
    ))
fig.show()

# Pendulum

In [57]:
import pendulum_utils

vf = pendulum_utils.get_value_function(10)
V = vf.get_value_function()
eps = 1.5
x0 = torch.Tensor([np.pi+eps, 0.]).double()
x_traj, u_traj, alpha_traj = vf.sol_to_traj(x0, *(V(x0)[1:]))
fig = go.Figure()
for i in range(2):
    fig.add_trace(go.Scatter(
        y=x_traj[i,:]
    ))
fig.show()

In [264]:
import pendulum_utils
pendulum = pendulum_utils.PendulumNLP()
vf = pendulum_utils.get_value_function(10)

# ctrl = lambda x: (torch.zeros(1), torch.zeros(1), None)

# ctrl = vf.get_optimal_controller()

# ctrl = value_approximation.get_inifinite_horizon_ctrl(pendulum.dyn, vf, vf_approx.model)
# ctrl = value_approximation.get_inifinite_horizon_ctrl(pendulum.dyn, vf, vf_approx_adv.model)


fig = go.Figure()

x0 = torch.Tensor([np.pi-1., -.1]).double()

ctrl = value_approximation.get_sampling_infinite_horizon_controller(pendulum.dx, vf, vf_approx.model)
x_traj_sim, t_traj_sim = acro.sim_ctrl(x0, ctrl, .1, 200)

for i in range(2):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:],
        name='baseline',
    ))
    
    
ctrl = value_approximation.get_sampling_infinite_horizon_controller(pendulum.dx, vf, vf_approx_adv.model)
x_traj_sim, t_traj_sim = acro.sim_ctrl(x0, ctrl, .1, 200)
for i in range(2):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:],
        name='adversarial',
    ))
    
fig.show()

In [208]:
eps=1.
ctrl(torch.Tensor([np.pi+eps,0.]).double())

(array([-18.11723651]), array([-18.11723651]), None)

In [242]:
import pendulum_utils

vf = pendulum_utils.get_value_function(N=10)

eps = .5
x0_lo = torch.Tensor([np.pi-eps, -10.]).type(vf.dtype)
x0_up = torch.Tensor([np.pi+eps, 10.]).type(vf.dtype)

# file options
sys_name = 'pendulum'
x_samples_file = '../data/validation_' + sys_name + '_x'
v_samples_file = '../data/validation_' + sys_name + '_v'
model_file = '../data/' + sys_name

In [243]:
opt = dict(
    max_buffer_size = None,
    batch_size = 30,
    nn_width = 32,
    nn_depth = 1,

    init_num_samples = 10,
    init_num_trainig_step = 1000,
    
    num_generations = 10,
    num_samples_per_generation = 10,
    num_train_step_per_gen = 1000,
    
    adv_max_iter = 3,
    adv_conv_tol = 1e-5,
    adv_learning_rate = .25,
    
    num_samples_validation = 500,
)

In [244]:
samples_buff = samples_buffer.SamplesBuffer(x0_lo.shape[0]*(vf.N-1), vf.N-1, vf.dtype, max_size=opt['max_buffer_size'])
samples_gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)
vf_approx = value_approximation.InfiniteHorizonValueFunctionApproximation(x0_lo, x0_up, opt['nn_width'], opt['nn_depth'])
train_log = training_log.TrainingLog(1, prefix="baseline")

In [245]:
x_samples_validation, v_labels_validation = samples_gen.generate_samples(opt['num_samples_validation'])

In [246]:
torch.save(x_samples_validation, x_samples_file)
torch.save(v_labels_validation, v_samples_file)

In [219]:
x_samples_validation = torch.load(x_samples_file)
v_labels_validation = torch.load(v_samples_file)

In [247]:
(x_samples, v_labels) = samples_gen.generate_samples(opt['init_num_samples'])
samples_buff.add_samples(x_samples, v_labels)

In [248]:
for train_step_i in range(opt['init_num_trainig_step']):
    x, v = samples_buff.get_random_samples(opt['batch_size'])
    losses = vf_approx.train_step(x, v)
    train_log.add_train_loss(losses)

In [249]:
samples_buff_adv = copy.deepcopy(samples_buff)
samples_gen_adv = samples_generator.AdversarialSampleGenerator(vf, x0_lo, x0_up, 
                                                               max_iter=opt['adv_max_iter'],
                                                               conv_tol=opt['adv_conv_tol'],
                                                               learning_rate=opt['adv_learning_rate'])
vf_approx_adv = copy.deepcopy(vf_approx)
train_log_adv = training_log.TrainingLog.get_copy(
    train_log, prefix="adversarial", keep_writer=True)

In [256]:
validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
train_log_adv.add_validation_loss(validation_losses)
validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
train_log.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    # adverserial samples
    (x_samples, v_labels) = samples_gen_adv.generate_samples(opt['num_samples_per_generation'], vf_approx_adv)
    samples_buff_adv.add_samples(x_samples, v_labels)
    # random samples
    (x_samples, v_labels) = samples_gen.generate_samples(opt['num_samples_per_generation'])
    samples_buff.add_samples(x_samples, v_labels)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff_adv.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff_adv.get_samples_from_indices(samples_indices)
        losses = vf_approx_adv.train_step(x, v)
        train_log_adv.add_train_loss(losses)
        x, v = samples_buff.get_samples_from_indices(samples_indices)
        losses = vf_approx.train_step(x, v)
        train_log.add_train_loss(losses)
    validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
    train_log_adv.add_validation_loss(validation_losses)
    validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
    train_log.add_validation_loss(validation_losses)
    torch.save(vf_approx.model, 'vf_approx_model')
    torch.save(vf_approx_adv.model, 'vf_approx_adv_model')

# Double Integrator

In [None]:
import double_integrator_utils

vf = double_integrator_utils.get_value_function(N=5)
x0_lo = -1 * torch.ones(vf.sys.x_dim, dtype=vf.dtype)
x0_up = 1 * torch.ones(vf.sys.x_dim, dtype=vf.dtype)

# validation options
num_breaks_validation = [100] * vf.sys.x_dim

# file options
sys_name = 'double_int'
x_samples_file = '../data/learn_value_function_' + sys_name + '_x'
v_samples_file = '../data/learn_value_function_' + sys_name + '_v'
model_file = '../data/' + sys_name

opt = dict(
    max_buffer_size = None,
    init_num_samples = 100,
    init_num_trainig_step = 100,
    num_generations = 50,
    num_samples_per_generation = 10,
    num_train_step_per_gen = 100,
    batch_size = 30,
    nn_width = 32,
    nn_depth = 1,
    num_samples_validation = 5000,
    adv_max_iter = 3,
    adv_conv_tol = 1e-5,
    adv_learning_rate = .25,
)

In [None]:
samples_buff = samples_buffer.SamplesBuffer(vf.sys.x_dim*(vf.N-1), vf.N-1, vf.dtype, max_size=opt['max_buffer_size'])
samples_gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)
vf_approx = value_approximation.FiniteHorizonValueFunctionApproximation(vf, x0_lo, x0_up, opt['nn_width'], opt['nn_depth'])
train_log = training_log.TrainingLog(vf.N-1, prefix="baseline", first_value_only=False)

In [None]:
x_samples_validation, v_labels_validation = samples_gen.generate_samples(opt['num_samples_validation'])

In [None]:
(x_samples, v_labels) = samples_gen.generate_samples(opt['init_num_samples'])
samples_buff.add_samples(x_samples, v_labels)
for train_step_i in range(opt['init_num_trainig_step']):
    x, v = samples_buff.get_random_samples(opt['batch_size'])
    losses = vf_approx.train_step(x, v)
    train_log.add_train_loss(losses)

In [None]:
samples_buff_adv = copy.deepcopy(samples_buff)
samples_gen_adv = samples_generator.MIPAdversarialSampleGenerator(vf, x0_lo, x0_up, 
                                                                  max_iter=opt['adv_max_iter'],
                                                                  conv_tol=opt['adv_conv_tol'],
                                                                  learning_rate=opt['adv_learning_rate'])
vf_approx_adv = copy.deepcopy(vf_approx)
train_log_adv = training_log.TrainingLog.get_copy(
    train_log, prefix="adversarial", keep_writer=True)

In [None]:
validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
train_log_adv.add_validation_loss(validation_losses)
validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
train_log.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    # adverserial samples
    (x_samples, v_labels) = samples_gen_adv.generate_samples(opt['num_samples_per_generation'], vf_approx_adv)
    samples_buff_adv.add_samples(x_samples, v_labels)
    # random samples
    (x_samples, v_labels) = samples_gen.generate_samples(opt['num_samples_per_generation'])
    samples_buff.add_samples(x_samples, v_labels)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff_adv.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff_adv.get_samples_from_indices(samples_indices)
        losses = vf_approx_adv.train_step(x, v)
        train_log_adv.add_train_loss(losses)
        x, v = samples_buff.get_samples_from_indices(samples_indices)
        losses = vf_approx.train_step(x, v)
        train_log.add_train_loss(losses)
    validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
    train_log_adv.add_validation_loss(validation_losses)
    validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
    train_log.add_validation_loss(validation_losses)