In [None]:
%load_ext autoreload
%autoreload 2

In [11]:
import sys
sys.path.append("..")
sys.path.append(".")
import torch
import numpy as np
import copy
import plotly
import plotly.graph_objs as go
import pickle
from datetime import datetime
plotly.offline.init_notebook_mode(connected=True)

import robust_value_approx.samples_generator as samples_generator
import robust_value_approx.samples_buffer as samples_buffer
import robust_value_approx.value_approximation as value_approximation
import robust_value_approx.training_log as training_log
import robust_value_approx.controllers as controllers

import pendulum_utils

In [30]:
vf, sys = pendulum_utils.get_value_function(10)
V = vf.get_value_function()

In [31]:
x0 = torch.tensor([np.pi/2, 0.], dtype=vf.dtype)
v, res = V(x0)

In [52]:
torch.cat([x.unsqueeze(0) for x in res['x_traj']], axis=0)

tensor([[1.5708, 0.0000],
        [1.6987, 1.2792],
        [2.0402, 2.1359],
        [2.4290, 1.7518],
        [2.7206, 1.1641],
        [2.9081, 0.7111],
        [3.0217, 0.4249],
        [3.0895, 0.2530],
        [3.1266, 0.1181],
        [3.1390, 0.0063]], dtype=torch.float64)

In [58]:
res['cost_to_go'].unsqueeze(0).t()

tensor([[2.8772e+00],
        [2.0288e+00],
        [8.0533e-01],
        [3.0008e-01],
        [1.0183e-01],
        [3.2234e-02],
        [9.8413e-03],
        [3.1471e-03],
        [1.3231e-03],
        [5.2399e-04]], dtype=torch.float64)

In [68]:
x0_lo = torch.tensor([-1., -1.], dtype=vf.dtype)
x0_up = torch.tensor([1., 1.], dtype=vf.dtype)
gen = samples_generator.RandomSampleGenerator(vf, x0_lo, x0_up)
gen.generate_samples(30, show_progress=True, include_time=True)

Progress: [########################################] 100.0%


(tensor([[ 2.0000,  0.1002,  0.1032],
         [ 1.8000,  0.3957,  2.8513],
         [ 1.6000,  1.1008,  4.1998],
         [ 1.4000,  1.8575,  3.3675],
         [ 1.2000,  2.4082,  2.1398],
         [ 1.0000,  2.7469,  1.2472],
         [ 0.8000,  2.9434,  0.7173],
         [ 0.6000,  3.0567,  0.4158],
         [ 0.4000,  3.1174,  0.1913],
         [ 0.2000,  3.1375,  0.0101],
         [ 2.0000,  0.8323, -0.1823],
         [ 1.8000,  1.0025,  1.8848],
         [ 1.6000,  1.5057,  3.1463],
         [ 1.4000,  2.0832,  2.6295],
         [ 1.2000,  2.5204,  1.7419],
         [ 1.0000,  2.7998,  1.0525],
         [ 0.8000,  2.9672,  0.6219],
         [ 0.6000,  3.0662,  0.3673],
         [ 0.4000,  3.1200,  0.1707],
         [ 0.2000,  3.1379,  0.0091],
         [ 2.0000,  0.1231,  0.5855],
         [ 1.8000,  0.4845,  3.0288],
         [ 1.6000,  1.1977,  4.1027],
         [ 1.4000,  1.9284,  3.2046],
         [ 1.2000,  2.4507,  2.0183],
         [ 1.0000,  2.7699,  1.1741],
         [ 0

In [75]:
x0_lo = torch.tensor([-1., -1.], dtype=vf.dtype)
x0_up = torch.tensor([1., 1.], dtype=vf.dtype)
gen = samples_generator.GridSampleGenerator(vf, x0_lo, x0_up)
x, v = gen.generate_samples([10, 10], show_progress=True, include_time=True)
# gen.generate_samples([10, 10], show_progress=True, include_time=False)

Progress: [########################################] 100.0%


In [77]:
fig = go.Figure()
fig.add_trace(go.Scatter3d(
    x=x[:,1],
    y=x[:,2],
    z=v[:,0],
    mode='markers',
))
fig.show()

In [79]:
buff = samples_buffer.SamplesBuffer(x.shape[1], v.shape[1], vf.dtype)
buff.add_samples(x, v)

In [102]:
scale = torch.mean(v)
model = value_approximation.QuadraticModel(vf.dtype, 1+vf.x_dim[0], scaling=scale)
# model = value_approximation.NeuralNetworkModel(vf.dtype, 1+vf.x_dim[0], 50, 1, scaling=scale)

vf_approx = value_approximation.ValueFunctionApproximation(model)

train_log = training_log.TrainingLog(prefix="baseline")

In [108]:
for train_step_i in range(1000):
    x_, v_ = buff.get_random_samples(30)
    loss = vf_approx.train_step(x_, v_)
    train_log.add_train_loss(loss)

In [113]:
vf_approx.model.Q

Parameter containing:
tensor([[ 0.5269,  0.0788,  0.1871],
        [ 0.4359,  0.0570,  0.0725],
        [ 0.7818, -0.6231, -0.0215]], dtype=torch.float64, requires_grad=True)