# Solving-Hamilton-Jacobi-Bellman Equations (via FBSDEs)
#### Frederik Kelbel, Imperial College London

In [1]:
import torch
import plotly.graph_objects as go
import numpy as np
from operators import div, Δ, D, mdotb, bdotm, mdotm, bdotb
from FBSDEs import FBSDESolver
from pdes import HBJ
from plotly.subplots import make_subplots
from configs import CONFIG_FBSDES as MODEL_CONFIG
from itertools import product

## Plotting

In [2]:
def plot_losses(losses, avg_over=10):
    avgs = np.convolve(losses, np.ones(avg_over), 'valid') / avg_over
    fig = make_subplots(rows=1, cols=1)
    fig.add_trace(go.Scatter(x=np.arange(len(avgs)), y=avgs, mode='lines', name="Error at x=0.1"), row=1, col=1)
    fig.update_layout(
        title="Loss",
        xaxis_title="Iterations",
        yaxis_title="Loss",
        font=dict(
            family="Courier New, monospace",
            size=14
        )
    )
    fig.show()

### Linear-quadratic control problem 1-dimensional (Riccati Equation) 

Let $(\Omega, \mathcal{F}, \{\mathcal{F}_t\}_{t\in [0, T]}, \mathbb{P})$. We consider
$$
\begin{cases}
dX_s = [H_s(X_s) + M_s(X) u_s] ds + \sigma_s dW_s, \; s \in [0, T] \\
X_0 = x > 0
\end{cases},
$$

We aim to maximise
$$
J^u(t, x) := \mathbb{E}^{t, x} \Big[ \int_t^T X_s^T C_s X_s + \frac{1}{2}u_s^T D_s u_s ds + X_T^T R X_T\Big],
$$
with $C(t) = C \leq 0, R \leq 0$, and $D=D(t) < -\delta < 0$ given and deterministic ($\delta > 0$ some constant).

We write down the problem in its primal form as
$$
\begin{cases}
\partial_t J(t, x) + \inf_{u} \Big\{ \frac{1}{2} \sigma^2 \partial_{xx} J(t, x) + [H x + M u] \partial_x J(t, x) + C x^2 + \frac{1}{2}D u^2 \Big\} = 0 \text{ on $[0, T] \times (-\infty, \infty)$}
\\
J(T, x) = Rx^2 \text{ $\forall x \in \mathbb{R}$}
\end{cases}
$$

$$
\begin{cases}
        d\tilde{X}_t = (H(t, \tilde{X}_t) + \sigma(t, \tilde{X}_t) \Gamma^T \tilde{U}_t)dt + \sigma(t, \tilde{X}_t) dW_t, \quad t \in [0, T] \\
        \tilde{X}_0 = x
\end{cases}
$$
$$
\begin{cases}
        d\tilde{Y}_t = C(\tilde{X}_t) dt + \frac{1}{2}(\tilde{Z}_t^T \Gamma D^{-1} \Gamma^T \tilde{Z}_t)(t, \tilde{X}_t) dt + \tilde{Z}_t^T  dW_t, \quad t \in [0, T] \\
        \tilde{Y}_T = g(\tilde{X}_T)
\end{cases}.
$$

In [10]:
LQR_MODEL_CONFIG = {
    "batch_size": 64,
    "num_discretisation_steps": 50,
    "hidden_dim": 64,
    "learning_rate": 5e-3,
    "lr_decay": 0.99,
    "network_type": "GRU",
    "optimiser": "Adam"
}
model = LQR_MODEL_CONFIG
class LQR(HBJ):
    def __init__(self):
        super().__init__()
        sigma = 0.3*torch.ones((model["batch_size"], 1, 1))
        m = 2.0*torch.ones((model["batch_size"], 1, 1))
        self.H = lambda X, t: 0.1*X
        M = lambda X, t: m
        self.C = lambda X: 2.0*X**2
        self.D = torch.tensor([[0.2]])
        self.sigma = lambda X, t: sigma
        
        self.Gamma = lambda X, t: M(X, t)/sigma
        self.terminal_condition = lambda X: 0.001*X**2
        
        self.var_dim = 1
        self.terminal_time = 1     
        self.init_sampling_func = lambda X: (X-0.5)*2

In [11]:
eq = LQR()
solver = FBSDESolver(model, eq)
loss = np.array(list(solver.train(200)))
plot_losses(loss)

100%|██████████| 100/100 [00:17<00:00,  5.86 it/s]


In [12]:
fig = make_subplots(rows=1, cols=1, specs=[[{'type': 'surface'}]])
xs = np.linspace(-1, 1, 100)
ts = np.linspace(-1, 1, 100)
us_pred = np.array([[solver.u(x, t).item() for x in xs] for t in ts])
fig.add_trace(go.Surface(x=xs, y=ts, z=us_pred), row=1, col=1)
fig.update_layout(title='Approximation',
                  scene = dict(
                      xaxis_title="x",
                      yaxis_title="t",
                      zaxis_title="u(x, t)"),
                  margin=dict(l=50, r=50, b=50, t=50))
fig.show()

In [13]:
M=2.0
sigma= 0.3

In [14]:
n = 50
c_xs = np.zeros(n)
c_xs[0] = 0.75
uc_xs = np.zeros(n)
uc_xs[0] = c_xs[0]
dt = 1/n
ts = [t for t in np.linspace(0, 1, n)]
c_cum_cost = np.zeros(n)
uc_cum_cost = np.zeros(n)
for i in range(n-1):
    dW = np.sqrt(dt)*np.random.randn()
    c = solver.u(c_xs[i], i*dt).item()
    uc = 0
    c_xs[i+1] = c_xs[i] + (eq.H(c_xs[i], i*dt) + M*c)*dt + sigma*dW
    uc_xs[i+1] = uc_xs[i] + (eq.H(uc_xs[i], i*dt) + M*uc)*dt + sigma*dW
    c_cum_cost[i+1] = c_cum_cost[i] + eq.C(c_xs[i]) + eq.D*c**2
    uc_cum_cost[i+1] = uc_cum_cost[i] + eq.C(uc_xs[i]) + eq.D*uc**2

c_cum_cost[-1] += eq.terminal_condition(c_xs[-1])
uc_cum_cost[-1] += eq.terminal_condition(uc_xs[-1])
    
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=ts, y=c_xs, mode='lines', name="Controlled", line=dict(color="#00e476")), row=1, col=1)
fig.add_trace(go.Scatter(x=ts, y=uc_xs, mode='lines', name="Uncontrolled", line=dict(color="#FFe476")), row=1, col=1)
fig.add_trace(go.Scatter(x=ts, y=c_cum_cost, mode='lines', showlegend=False, line=dict(color="#00e476")), row=1, col=2)
fig.add_trace(go.Scatter(x=ts, y=uc_cum_cost, mode='lines', showlegend=False, line=dict(color="#FFe476")), row=1, col=2)
fig.update_layout(
    title="Minimise amount of X | Minimise the costs (hold both close to zero)",
    xaxis_title="t",
    yaxis_title="X",
    font=dict(
        family="Courier New, monospace",
        size=14
    )
)
fig.show()