In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import sys
sys.path.append("..")
sys.path.append(".")
import torch
import numpy as np
import copy
import plotly
import plotly.graph_objs as go
import pickle
from datetime import datetime
plotly.offline.init_notebook_mode(connected=True)
import plotting_utils

import robust_value_approx.samples_generator as samples_generator
import robust_value_approx.samples_buffer as samples_buffer
import robust_value_approx.value_approximation as value_approximation
import robust_value_approx.training_log as training_log

In [None]:
import double_integrator_utils

vf = double_integrator_utils.get_value_function(N=5)
x0_lo = -1 * torch.ones(vf.sys.x_dim, dtype=vf.dtype)
x0_up = 1 * torch.ones(vf.sys.x_dim, dtype=vf.dtype)

# validation options
num_breaks_validation = [100] * vf.sys.x_dim

# file options
sys_name = 'double_int'
x_samples_file = '../data/learn_value_function_' + sys_name + '_x'
v_samples_file = '../data/learn_value_function_' + sys_name + '_v'
model_file = '../data/' + sys_name

opt = dict(
    max_buffer_size = None,
    init_num_samples = 100,
    init_num_trainig_step = 100,
    num_generations = 50,
    num_samples_per_generation = 10,
    num_train_step_per_gen = 100,
    batch_size = 30,
    nn_width = 32,
    nn_depth = 1,
    num_samples_validation = 5000,
    adv_max_iter = 3,
    adv_conv_tol = 1e-5,
    adv_learning_rate = .25,
)

In [None]:
samples_buff = samples_buffer.SamplesBuffer(vf.sys.x_dim*(vf.N-1), vf.N-1, vf.dtype, max_size=opt['max_buffer_size'])
samples_gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)
vf_approx = value_approximation.FiniteHorizonValueFunctionApproximation(vf, x0_lo, x0_up, opt['nn_width'], opt['nn_depth'])
train_log = training_log.TrainingLog(vf.N-1, prefix="baseline", first_value_only=False)

In [None]:
x_samples_validation, v_labels_validation = samples_gen.generate_samples(opt['num_samples_validation'])

In [None]:
(x_samples, v_labels) = samples_gen.generate_samples(opt['init_num_samples'])
samples_buff.add_samples(x_samples, v_labels)
for train_step_i in range(opt['init_num_trainig_step']):
    x, v = samples_buff.get_random_samples(opt['batch_size'])
    losses = vf_approx.train_step(x, v)
    train_log.add_train_loss(losses)

In [None]:
samples_buff_adv = copy.deepcopy(samples_buff)
samples_gen_adv = samples_generator.MIPAdversarialSampleGenerator(vf, x0_lo, x0_up, 
                                                                  max_iter=opt['adv_max_iter'],
                                                                  conv_tol=opt['adv_conv_tol'],
                                                                  learning_rate=opt['adv_learning_rate'])
vf_approx_adv = copy.deepcopy(vf_approx)
train_log_adv = training_log.TrainingLog.get_copy(
    train_log, prefix="adversarial", keep_writer=True)

In [None]:
validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
train_log_adv.add_validation_loss(validation_losses)
validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
train_log.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    # adverserial samples
    (x_samples, v_labels) = samples_gen_adv.generate_samples(opt['num_samples_per_generation'], vf_approx_adv)
    samples_buff_adv.add_samples(x_samples, v_labels)
    # random samples
    (x_samples, v_labels) = samples_gen.generate_samples(opt['num_samples_per_generation'])
    samples_buff.add_samples(x_samples, v_labels)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff_adv.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff_adv.get_samples_from_indices(samples_indices)
        losses = vf_approx_adv.train_step(x, v)
        train_log_adv.add_train_loss(losses)
        x, v = samples_buff.get_samples_from_indices(samples_indices)
        losses = vf_approx.train_step(x, v)
        train_log.add_train_loss(losses)
    validation_losses = vf_approx_adv.validation_loss(x_samples_validation, v_labels_validation)
    train_log_adv.add_validation_loss(validation_losses)
    validation_losses = vf_approx.validation_loss(x_samples_validation, v_labels_validation)
    train_log.add_validation_loss(validation_losses)

In [8]:
import acrobot_utils

vf = acrobot_utils.get_value_function(N=10)

In [11]:
V = vf.get_value_function()
x0 = torch.Tensor([0., 0., 0., 0.])
x_traj, u_traj, alpha_traj = vf.sol_to_traj(x0, *(V(x0)[1:]))

In [17]:
x0_lo = torch.Tensor([-.1, -.1, 0., 0.])
x0_up = torch.Tensor([.1, .1, 0., 0.])
samples_gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)


In [25]:
x_samples_validation, v_labels_validation = samples_gen.generate_samples(50)

In [20]:
v_labels_validation

tensor([[1055.4553,  944.3719,  838.2792,  734.9865,  634.1760,  535.9825,
          440.5579,  348.0281,  258.4139,  171.3907],
        [1033.3021,  931.3121,  833.7959,  738.3041,  644.1469,  551.0381,
          458.7567,  367.1791,  276.2048,  185.4814],
        [1042.7026,  937.3734,  837.4589,  739.7246,  643.8421,  549.7899,
          457.2322,  365.7521,  275.0143,  184.5814],
        [1049.4254,  940.5876,  836.9847,  735.9902,  637.2275,  540.7406,
          446.4974,  354.4348,  264.4545,  176.2029],
        [1035.6074,  932.7258,  834.7137,  738.7892,  644.3557,  551.1854,
          458.9863,  367.5130,  276.5753,  185.7936]])

In [29]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    y=v_labels_validation[:,0]
))
fig.show()

In [13]:
fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        y=x_traj[i,:]
    ))
fig.show()