In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("..")
sys.path.append(".")
import torch
import numpy as np
import copy
import plotly
import plotly.graph_objs as go
import pickle
from datetime import datetime
plotly.offline.init_notebook_mode(connected=True)

import robust_value_approx.samples_buffer as samples_buffer
import robust_value_approx.value_approximation as value_approximation
import robust_value_approx.training_log as training_log
import robust_value_approx.controllers as controllers

import acrobot_utils



You may have already (directly or indirectly) imported `torch` which uses
`RTLD_GLOBAL`. Using `RTLD_GLOBAL` may cause symbol collisions which manifest
themselves in bugs like "free(): invalid pointer". Please consider importing
`pydrake` (and related C++-wrapped libraries like `cv2`, `open3d`, etc.)
*before* importing `torch`. For more details, see:
https://github.com/pytorch/pytorch/issues/3059#issuecomment-534676459




In [3]:
dtype = torch.float64
x_dim = 4

In [4]:
Q = (torch.rand((x_dim, x_dim), dtype=dtype) - .5) * 2.
Q = .5 * Q.t()@Q
q = (torch.rand(x_dim, dtype=dtype) - .5) * 2.
c = (torch.rand(1, dtype=dtype) - .5) * 2.

In [5]:
vf, sys = acrobot_utils.get_value_function(25)
Q = torch.diag(torch.tensor([1., 1., 1., 1.]))
R = torch.diag(torch.tensor([1.]))
x_nom = torch.tensor([np.pi, 0., 0., 0.], dtype=vf.dtype)
u_nom = torch.zeros(vf.u_dim[0], dtype=vf.dtype)
ctrl, S = controllers.get_lqr_controller(sys.dx, x_nom, u_nom, Q, R, vf.u_lo[0], vf.u_up[0])
S = torch.tensor(S, dtype=vf.dtype)

Q = S
q = -2.*S@x_nom
c = x_nom@S@x_nom


No GPU/TPU found, falling back to CPU.



In [6]:
x = (torch.rand((1000, x_dim), dtype=dtype) - .5) * 2.
v = (torch.diag(x@Q@x.t()) + x@q + c).unsqueeze(1)

In [7]:
scale = torch.mean(v[:,0])
print(scale)

tensor(88760.2143, dtype=torch.float64)


In [8]:
model = value_approximation.QuadraticModel(vf.dtype, vf.x_dim[0], scaling=scale)

In [9]:
vf_approx = value_approximation.InfiniteHorizonValueFunctionApproximation(dtype, x_dim, model)
train_log = training_log.TrainingLog(1, prefix="quadratic")
samples_buff = samples_buffer.SamplesBuffer(x_dim, 1, dtype)
samples_buff.add_samples(x, v)

In [11]:
for i in range(5000):
    x_, v_ = samples_buff.get_random_samples(1000)
    losses = vf_approx.train_step(x_, v_)
    train_log.add_train_loss(losses)

In [12]:
print(vf_approx.model.Q)
print(vf_approx.model.Q * scale)
print(Q)
print("---")
print(vf_approx.model.q)
print(vf_approx.model.q * scale)
print(q)
print("---")
print(vf_approx.model.c)
print(vf_approx.model.c * scale)
print(c)
print("---")

Parameter containing:
tensor([[0.0981, 0.0283, 0.0443, 0.0156],
        [0.0283, 0.0083, 0.0128, 0.0046],
        [0.0443, 0.0128, 0.0200, 0.0071],
        [0.0156, 0.0046, 0.0071, 0.0025]], dtype=torch.float64,
       requires_grad=True)
tensor([[8706.5053, 2515.6380, 3932.3521, 1387.6225],
        [2515.6380,  737.7348, 1137.9721,  404.9252],
        [3932.3521, 1137.9721, 1776.6220,  627.4555],
        [1387.6225,  404.9252,  627.4555,  222.8334]], dtype=torch.float64,
       grad_fn=<MulBackward0>)
tensor([[8706.5053, 2515.6380, 3932.3521, 1387.6225],
        [2515.6380,  737.7348, 1137.9721,  404.9252],
        [3932.3521, 1137.9721, 1776.6220,  627.4555],
        [1387.6225,  404.9252,  627.4555,  222.8334]], dtype=torch.float64)
---
Parameter containing:
tensor([-0.6163, -0.1781, -0.2784, -0.0982], dtype=torch.float64,
       requires_grad=True)
tensor([-54704.5861, -15806.2200, -24707.6967,  -8718.6892],
       dtype=torch.float64, grad_fn=<MulBackward0>)
tensor([-54704.5861, -

In [16]:
x_test = (torch.rand((1000, x_dim), dtype=dtype) - .5) * 2.
v_test = (torch.diag(x_test@Q@x_test.t()) + x_test@q + c).unsqueeze(1)

In [17]:
v_pred = vf_approx.model(x_test).detach()
error = v_test - v_pred

In [18]:
squ_error = torch.pow(error, 2)
print(torch.mean(squ_error))
print(torch.max(squ_error))

tensor(2.0494e-22, dtype=torch.float64)
tensor(3.3881e-21, dtype=torch.float64)


In [19]:
vf_approx_2 = copy.deepcopy(vf_approx)
vf_approx_2.model.Q = torch.nn.Parameter(Q / scale)
v_pred = vf_approx_2.model(x_test).detach()
error = v_test - v_pred

In [20]:
squ_error = torch.pow(error, 2)
print(torch.mean(squ_error))
print(torch.max(squ_error))

tensor(7.4871e-22, dtype=torch.float64)
tensor(7.6233e-21, dtype=torch.float64)


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
#     x = x[:,1],
#     y = x[:,1],
#     z = v[:,0],
#     y = v[:,0],
    y = error[:,0],
    mode='markers',
))
fig.show()