In [None]:
%load_ext autoreload
%autoreload 2

In [243]:
import sys
sys.path.append("..")
sys.path.append(".")
import torch
import numpy as np
import copy
import plotly
import plotly.graph_objs as go
import pickle
from datetime import datetime
plotly.offline.init_notebook_mode(connected=True)

import robust_value_approx.samples_generator as samples_generator
import robust_value_approx.samples_buffer as samples_buffer
import robust_value_approx.value_approximation as value_approximation
import robust_value_approx.training_log as training_log
import robust_value_approx.controllers as controllers

import pendulum_utils
import double_pendulum_utils

In [244]:
# vf, sys = pendulum_utils.get_value_function(10)
vf, sys = double_pendulum_utils.get_value_function(20)

V = vf.get_value_function()

In [142]:
x0 = torch.tensor([np.pi/2, 0.], dtype=vf.dtype)
v, res = V(x0)

In [68]:
x0_lo = torch.tensor([-1., -1.], dtype=vf.dtype)
x0_up = torch.tensor([1., 1.], dtype=vf.dtype)
gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)
gen.generate_samples(30, show_progress=True, include_time=True)

Progress: [########################################] 100.0%


(tensor([[ 2.0000,  0.1002,  0.1032],
         [ 1.8000,  0.3957,  2.8513],
         [ 1.6000,  1.1008,  4.1998],
         [ 1.4000,  1.8575,  3.3675],
         [ 1.2000,  2.4082,  2.1398],
         [ 1.0000,  2.7469,  1.2472],
         [ 0.8000,  2.9434,  0.7173],
         [ 0.6000,  3.0567,  0.4158],
         [ 0.4000,  3.1174,  0.1913],
         [ 0.2000,  3.1375,  0.0101],
         [ 2.0000,  0.8323, -0.1823],
         [ 1.8000,  1.0025,  1.8848],
         [ 1.6000,  1.5057,  3.1463],
         [ 1.4000,  2.0832,  2.6295],
         [ 1.2000,  2.5204,  1.7419],
         [ 1.0000,  2.7998,  1.0525],
         [ 0.8000,  2.9672,  0.6219],
         [ 0.6000,  3.0662,  0.3673],
         [ 0.4000,  3.1200,  0.1707],
         [ 0.2000,  3.1379,  0.0091],
         [ 2.0000,  0.1231,  0.5855],
         [ 1.8000,  0.4845,  3.0288],
         [ 1.6000,  1.1977,  4.1027],
         [ 1.4000,  1.9284,  3.2046],
         [ 1.2000,  2.4507,  2.0183],
         [ 1.0000,  2.7699,  1.1741],
         [ 0

In [143]:
x0_lo = torch.tensor([-1., -1.], dtype=vf.dtype)
x0_up = torch.tensor([1., 1.], dtype=vf.dtype)
gen = samples_generator.GridSampleGenerator(vf, x0_lo, x0_up)
x, v = gen.generate_samples([10, 10], show_progress=True, include_time=True)

Progress: [########################################] 100.0%


In [292]:
x0_lo = torch.tensor([np.pi, 0., 0., 0.], dtype=vf.dtype) - 1.
x0_up = torch.tensor([np.pi, 0., 0., 0.], dtype=vf.dtype) + 1.
gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)
x, v = gen.generate_samples(1000, show_progress=True, include_time=True)

Progress: [########################################] 100.0%


In [None]:
x0_lo = torch.tensor([-1., -1.], dtype=vf.dtype)
x0_up = torch.tensor([1., 1.], dtype=vf.dtype)
gen = samples_generator.GridSampleGenerator(vf, x0_lo, x0_up)
x, v = gen.generate_samples([10, 10], show_progress=True, include_time=True)

In [249]:
fig = go.Figure()
fig.add_trace(go.Scatter3d(
    x=x[:,1],
    y=x[:,3],
    z=v[:,0],
    mode='markers',
))
fig.show()

In [250]:
buff = samples_buffer.SamplesBuffer(x.shape[1], v.shape[1], vf.dtype)

In [293]:
buff.add_samples(x, v)

In [294]:
scale = torch.mean(v)
# model = value_approximation.QuadraticModel(vf.dtype, 1+vf.x_dim[0], scaling=scale)
model = value_approximation.NeuralNetworkModel(vf.dtype, 1+vf.x_dim[0], 100, 1, scaling=scale)

vf_approx = value_approximation.ValueFunctionApproximation(model)

train_log = training_log.TrainingLog(prefix="baseline")

In [296]:
for train_step_i in range(5000):
    x_, v_ = buff.get_random_samples(30)
    loss = vf_approx.train_step(x_, v_)
    train_log.add_train_loss(loss)

In [266]:
vf_approx.model.Q

Parameter containing:
tensor([[ 0.3216,  0.0727, -0.2235,  0.4409, -0.0361],
        [-0.5020,  0.0106,  0.2511, -0.2144,  0.3774],
        [-0.2932,  0.1322,  0.5016,  0.1256,  0.3469],
        [ 0.3066,  0.1198,  0.1186,  0.4389, -0.0420],
        [-0.0310,  0.1006, -0.1365,  0.3888,  0.3531]], dtype=torch.float64,
       requires_grad=True)

In [311]:
# x0 = torch.tensor([0., 10.], dtype=vf.dtype)
x0 = torch.tensor([np.pi+.5, 1., 2., 1.], dtype=vf.dtype)

In [312]:
# vf_ctrl, sys_ctrl = pendulum_utils.get_value_function(3)
vf_ctrl, sys_ctrl = double_pendulum_utils.get_value_function(10)
ctrl = controllers.get_limited_lookahead_controller(vf_ctrl, vf_approx)

x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, vf.u_dim[0], sys.dx, ctrl, .1, 60)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
print("Baseline")
fig.show()

Baseline


In [313]:
# vf_ctrl, sys_ctrl = pendulum_utils.get_value_function(3)
vf_ctrl, sys_ctrl = double_pendulum_utils.get_value_function(10)
ctrl = controllers.get_limited_lookahead_controller(vf_ctrl)

x_traj_sim, t_traj_sim = controllers.sim_ctrl(x0, vf.u_dim[0], sys.dx, ctrl, .1, 60)

fig = go.Figure()
for i in range(4):
    fig.add_trace(go.Scatter(
        x=t_traj_sim,
        y=x_traj_sim[i,:]
    ))
print("Baseline")
fig.show()

Baseline
