# Pendulum

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("..")
sys.path.append(".")
import torch
import numpy as np
import copy
import plotly
import plotly.graph_objs as go
import pickle
from datetime import datetime
plotly.offline.init_notebook_mode(connected=True)

import robust_value_approx.samples_generator as samples_generator
import robust_value_approx.samples_buffer as samples_buffer
import robust_value_approx.value_approximation as value_approximation
import robust_value_approx.training_log as training_log
import robust_value_approx.controllers as controllers

from pendulum_utils import get_value_function



You may have already (directly or indirectly) imported `torch` which uses
`RTLD_GLOBAL`. Using `RTLD_GLOBAL` may cause symbol collisions which manifest
themselves in bugs like "free(): invalid pointer". Please consider importing
`pydrake` (and related C++-wrapped libraries like `cv2`, `open3d`, etc.)
*before* importing `torch`. For more details, see:
https://github.com/pytorch/pytorch/issues/3059#issuecomment-534676459




In [3]:
offline_horizon = 50
vf, sys = get_value_function(offline_horizon)
V = vf.get_value_function()

In [None]:
x0 = torch.Tensor([np.pi+.5, 1.]).double()
v, res = V(x0)
sys.plot_result(res).show()

In [None]:
# x_nom = torch.tensor([np.pi, 0.], dtype=vf.dtype)
# u_nom = torch.zeros(vf.u_dim[0], dtype=vf.dtype)
# Q = torch.diag(torch.Tensor([1., 1.]))
# R = torch.diag(torch.Tensor([.1]))
# ctrl, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])

ctrl = controllers.get_limited_lookahead_controller(get_value_function(offline_horizon)[0])

x0 = torch.tensor([np.pi+.5, 1.], dtype=vf.dtype)
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, vf.u_dim[0], sys.dx, ctrl, .1, 40, "backward")
controllers.plot_sim(t_traj_sim, x_traj_sim, "Full Horizon").show()

In [40]:
opt = dict(
    batch_size = 20,
    learning_rate = 1e-3,
    
    nn_width = 30,
    nn_depth = 2,

    num_samples_validation = [100, 100],
    
    init_num_samples = [10, 10],
    init_num_trainig_step = 2500,

    max_buffer_size = None,
    
    num_generations = 20,
    num_samples_per_generation = 20,
    num_train_step_per_gen = 500,
    
    adv_max_iter = 5,
    adv_conv_tol = 1e-5,
    adv_learning_rate = .1,
)

In [5]:
sys_name = 'pendulum'
validation_file = '../data/validation_' + sys_name
init_file = '../data/init_' + sys_name

In [6]:
x0_lo = torch.tensor([np.pi-.5, -1.], dtype=vf.dtype)
x0_up = torch.tensor([np.pi+.5, 1.], dtype=vf.dtype)

samples_gen_rand = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)
samples_gen_grid = samples_generator.GridSampleGenerator(vf, x0_lo, x0_up)
samples_gen_adv = samples_generator.AdversarialSampleGenerator(vf, x0_lo, x0_up, 
                                                               max_iter=opt['adv_max_iter'],
                                                               conv_tol=opt['adv_conv_tol'],
                                                               learning_rate=opt['adv_learning_rate'])

In [7]:
x_validation, v_validation = samples_gen_grid.generate_samples(opt['num_samples_validation'],
                                                               include_time=True,
                                                               show_progress=True)

Progress: [########################################] 100.0%


In [8]:
torch.save(x_validation, validation_file + '_x.pt')
torch.save(v_validation, validation_file + '_v.pt')

In [None]:
x_validation = torch.load(validation_file + '_x.pt')
v_validation = torch.load(validation_file + '_v.pt')

In [9]:
x_init, v_init = samples_gen_grid.generate_samples(opt['init_num_samples'],
                                                   include_time=True,
                                                   show_progress=True)

Progress: [########################################] 100.0%


In [10]:
torch.save(x_init, init_file + '_x.pt')
torch.save(v_init, init_file + '_v.pt')

In [None]:
x_init = torch.load(init_file + '_x.pt')
v_init = torch.load(init_file + '_v.pt')

In [43]:
model = value_approximation.NeuralNetworkModel(vf.dtype,
                                               1 + vf.x_dim[0],
                                               opt['nn_width'],
                                               opt['nn_depth'])

vf_approx = value_approximation.ValueFunctionApproximation(model, learning_rate=opt['learning_rate'])

train_log = training_log.TrainingLog(prefix="baseline")

In [44]:
samples_buff = samples_buffer.SamplesBuffer(1 + vf.x_dim[0], 1, vf.dtype, max_size=opt['max_buffer_size'])

In [45]:
samples_buff.add_samples(x_init, v_init)

In [46]:
for train_step_i in range(opt['init_num_trainig_step']):
    x, v = samples_buff.get_random_samples(opt['batch_size'])
    loss = vf_approx.train_step(x, v)
    train_log.add_train_loss(loss)

In [47]:
samples_buff_adv = copy.deepcopy(samples_buff)
vf_approx_adv = copy.deepcopy(vf_approx)
train_log_adv = training_log.TrainingLog.get_copy(train_log,
                                                  prefix="adversarial",
                                                  keep_writer=True)

In [54]:
# random samples
validation_losses = vf_approx.validation_loss(x_validation, v_validation)
train_log.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    (x, v) = samples_gen_rand.generate_samples(opt['num_samples_per_generation'], include_time=True)
    samples_buff.add_samples(x, v)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff.get_samples_from_indices(samples_indices)
        losses = vf_approx.train_step(x, v)
        train_log.add_train_loss(losses)
    validation_losses = vf_approx.validation_loss(x_validation, v_validation)
    train_log.add_validation_loss(validation_losses)

In [55]:
# adverserial samples
validation_losses = vf_approx_adv.validation_loss(x_validation, v_validation)
train_log_adv.add_validation_loss(validation_losses)
for gen_i in range(opt['num_generations']):
    (x, v) = samples_gen_adv.generate_samples(opt['num_samples_per_generation'], vf_approx_adv, include_time=True)
    samples_buff_adv.add_samples(x, v)
    for train_step_i in range(opt['num_train_step_per_gen']):
        samples_indices = samples_buff_adv.get_random_sample_indices(opt['batch_size'])
        x, v = samples_buff_adv.get_samples_from_indices(samples_indices)
        losses = vf_approx_adv.train_step(x, v)
        train_log_adv.add_train_loss(losses)
    validation_losses = vf_approx_adv.validation_loss(x_validation, v_validation)
    train_log_adv.add_validation_loss(validation_losses)

In [70]:
x0 = torch.tensor([np.pi+.5, 1.], dtype=vf.dtype)
dt_sim = .1
N = 50
online_horizon = 7
integration_mode = "foh"

In [71]:
ctrl = controllers.get_limited_lookahead_controller(get_value_function(online_horizon)[0])
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, vf.u_dim[0], sys.dx,
                                              ctrl, dt_sim, N, integration_mode=integration_mode)
controllers.plot_sim(t_traj_sim, x_traj_sim, "No lookahead").show()

In [72]:
ctrl = controllers.get_limited_lookahead_controller(get_value_function(online_horizon)[0],
                                                    vf_approx,
                                                    dt_sim*N)
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, vf.u_dim[0], sys.dx,
                                              ctrl, dt_sim, N, integration_mode=integration_mode)
controllers.plot_sim(t_traj_sim, x_traj_sim, "Baseline").show()

In [73]:
ctrl = controllers.get_limited_lookahead_controller(get_value_function(online_horizon)[0],
                                                    vf_approx_adv,
                                                    dt_sim*N)
x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, vf.u_dim[0], sys.dx,
                                              ctrl, dt_sim, N, integration_mode=integration_mode)
controllers.plot_sim(t_traj_sim, x_traj_sim, "Adversarial").show()

In [74]:
# benchmark params
x0 = torch.tensor([np.pi, 0.], dtype=vf.dtype)
x_goal = torch.tensor([np.pi, 0.], dtype=vf.dtype)

x0_eps = torch.tensor([1., 2.], dtype=vf.dtype)
num_breaks = [10, 10]

dt_sim = .1
N = 50
online_horizon = 7
integration_mode = "foh"

In [75]:
Q = torch.diag(torch.Tensor([1., 1.]))
R = torch.diag(torch.Tensor([.1]))
x_nom = torch.Tensor([np.pi, 0.]).type(vf.dtype)
u_nom = torch.zeros(vf.u_lo[0].shape[0]).type(vf.dtype)
ctrl_lqr, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])
bench_lqr = controllers.benchmark_controller(vf.u_dim[0], sys.dx,
                                             ctrl_lqr, x0, x0_eps, num_breaks,
                                             x_goal, dt_sim, N, integration_mode=integration_mode)

In [76]:
ctrl_no_model = controllers.get_limited_lookahead_controller(get_value_function(online_horizon)[0])
bench_no_model = controllers.benchmark_controller(vf.u_dim[0],
                                                  sys.dx,
                                                  ctrl_no_model,
                                                  x0, x0_eps, num_breaks,
                                                  x_goal, dt_sim, N,
                                                  integration_mode=integration_mode)

In [77]:
ctrl_baseline = controllers.get_limited_lookahead_controller(get_value_function(online_horizon)[0],
                                                             vf_approx,
                                                             dt_sim*N)
bench_baseline = controllers.benchmark_controller(vf.u_dim[0],
                                                  sys.dx,
                                                  ctrl_baseline,
                                                  x0, x0_eps, num_breaks,
                                                  x_goal, dt_sim, N,
                                                  integration_mode=integration_mode)

In [78]:
ctrl_adv = controllers.get_limited_lookahead_controller(get_value_function(online_horizon)[0],
                                                        vf_approx_adv,
                                                        dt_sim*N)
bench_adv = controllers.benchmark_controller(vf.u_dim[0],
                                             sys.dx,
                                             ctrl_adv,
                                             x0, x0_eps, num_breaks,
                                             x_goal, dt_sim, N,
                                             integration_mode=integration_mode)

In [81]:
conv_thresh = .2
zmin = 0.
zmax = 3

fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_lqr.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.update_layout(
    title="LQR",
)
fig.show()

fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_no_model.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.update_layout(
    title="No Model",
)
fig.show()

fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_baseline.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.update_layout(
    title="Baseline",
)
fig.show()

fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = bench_adv.detach().numpy(),
    zmin = zmin,
    zmax = zmax,
    ))
fig.update_layout(
    title="Adversarial",
)
fig.show()

fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_lqr.detach().numpy() < conv_thresh).type(vf.dtype),
    zmin = 0,
    zmax = 1,
    ))
fig.update_layout(
    title="LQR",
)
fig.show()

fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_no_model.detach().numpy() < conv_thresh).type(vf.dtype),
    ))
fig.update_layout(
    title="No Model",
)
fig.show()

fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_baseline.detach().numpy() < conv_thresh).type(vf.dtype),
    zmin = 0,
    zmax = 1,
    ))
fig.update_layout(
    title="Baseline",
)
fig.show()

fig = go.Figure()
fig.add_trace(go.Heatmap(
    z = torch.Tensor(bench_adv.detach().numpy() < conv_thresh).type(vf.dtype),
    zmin = 0,
    zmax = 1,
    ))
fig.update_layout(
    title="Adversarial",
)
fig.show()