# Solving-Hamilton-Jacobi-Bellman Equations
#### Frederik Kelbel, Imperial College London

## Dependencies

In [1]:
import torch
import plotly.graph_objects as go
import numpy as np
from operators import div, Δ, D
from DGM import DGMSolver, DGMPIASolver
from pdes import HBJ
from scipy.integrate import quad
from plotly.subplots import make_subplots
from configs import CONFIG_HBJS as MODEL_CONFIG

## Plotting

In [2]:
def plot_losses(losses, avg_over=10):
    avgs_1 = np.convolve(losses[:, 0], np.ones(avg_over), 'valid') / avg_over
    avgs_2 = np.convolve(losses[:, 1], np.ones(avg_over), 'valid') / avg_over
    fig = make_subplots(rows=1, cols=2)
    fig.add_trace(go.Scatter(x=np.arange(len(avgs_1)), y=avgs_1, mode='lines', name="Value Loss"), row=1, col=1)
    fig.add_trace(go.Scatter(x=np.arange(len(avgs_2)), y=avgs_2, mode='lines', name="Control Loss"), row=1, col=2)
    fig.update_layout(
        title="Loss",
        xaxis_title="Iterations",
        yaxis_title="Loss",
        font=dict(
            family="Courier New, monospace",
            size=14
        )
    )
    fig.show()

## Problem Formulation

Objective: Find the control process $u = (u_t)_{t \geq 0}$ in admissable set $\mathcal{A}$ for an Itô Process $X^u = (X_t^u)_{t \geq 0}$ satisfying:


$$
d X_t^u = \mu(t, X_t^u, u_t) dt + \sigma(t, X_t^u, u_t) d W_t, \quad X_0^u = 0.
$$

The agents performance is assessed via:
$$
J^u(t, x) = \mathbb{E}\Big[ \int_t^T F(s, X_s^u, u_s) ds + G(X_T^u) \;\Big|\; X_t^u = x \Big]
$$

Denote $J(t, x) = \sup_{u \in \mathcal{A}} J^u(t, x)$, then this value function satisfies the following HJB-equations:

$$
\begin{cases}
\partial_t J(t, x) + \sup_{u \in \mathcal{A}} \{\mathscr{L}^u_t J(t, x) + F(t, x, u)\} = 0 \\
J(T, x) = G(x)
\end{cases}
$$

### The Merton Problem (Consumption Optimization)

The goal is to find the optimal wealth consumption strategy over time such that the consumption is maximized.

Let $(\Omega, \mathcal{F}, \{\mathcal{F}_t\}_{t\in [0, T]}, \mathbb{P})$ be a filtered probability space. The evolution of an investor's wealth is described via
$$
\begin{cases}
dX_t = (\mu X_t- c_t(X_t)) dt + \sigma X_t dW_t, \; t>0 \\
X_0 = x > 0
\end{cases},
$$

with $\mu$ and $\sigma$ referring to drift and volatility, respectively. Let $\beta$ denote the discount rate, i.e. the depreciation constant. The intend is to maximise the objective
$$
J^c(t, X_t) = \mathbb{E}\Big[ \int_t^T e^{-\beta s} U \circ c_s(X_s) ds \Big]
$$
subject to a utility function $U$. 

Proceeding, we will set $U(c) = \frac{c^\alpha}{\alpha}, \; \alpha \in (0, 1)$. The respective HBJ-equation becomes:

$$
\partial_t J(t, x) + \sup_{c_t} \Big\{ (\mu x -c_t(x)) \partial_x J(t, x) + \frac{1}{2} \sigma^2 x^2 \partial_{xx} J(t, x) + e^{-\beta t}\frac{c^\alpha(x)}{\alpha} \Big\} = 0
$$

In [20]:
class CONSUMPTION(HBJ):
    def __init__(self):
        super().__init__()
        self.μ = 0.05
        self.σ = 0.1
        self.α = 0.1
        self.β = max(0, self.α*self.μ+0.5*self.α*(self.α-1)*self.σ**2)+0.1
        self.control_output = lambda u: torch.sigmoid(u)
        self.var_dims = [1] # wealth
        self.sampling_funcs = [lambda x: x]
        self.cost_function = lambda c, x, t: torch.exp(-self.β*t)*c**self.α/self.α
        self.terminal_cost = lambda x: 0
        self.differential_operator = lambda J, c, var, t: (self.μ*var[0]-c)*div(J, var[0]) + 0.5*self.σ**2*var[0]**2*Δ(J, var[0])

In [21]:
eq = CONSUMPTION()
model = MODEL_CONFIG
solver = DGMPIASolver(model, eq)
loss = np.array(list(solver.train(200)))
plot_losses(loss)

100%|██████████| 200/200 [00:01<00:00, 135.52 it/s]


In [19]:
A = 1/eq.α * ((eq.β - eq.μ*eq.α- 0.5*eq.α*(eq.α-1)*eq.σ**2)/(1-eq.α))**(eq.α-1)
c = lambda x: (A*eq.α)**(1/(eq.α-1))*x
print(c(1.0), solver.u(1.0, 1.0))

0.1111111111111111 1.02263577e-25


### The Merton Problem (With One Asset)

$$
\begin{cases}
\partial_t J(x, t, S) + \sup_{\pi \in \mathcal{A}} \Big\{ \big( (\pi ( \mu -r) + rx) \partial_x + \frac{1}{2} \sigma^2 \pi^2 \partial_{xx} + (\mu-r)S \partial_S + \frac{1}{2} \sigma^2 S^2 \partial_{SS} + \sigma \pi \partial_{xS} \big) J(x, t, S)\Big\} = 0 \\
J(x, T, S) = U(x)
\end{cases},
$$

where $U(x)=-e^{-\gamma x}$ with risk preference parameter $\gamma$. $\mu$, $\sigma$ and $r$ are asset drift, volatility and risk-free rate. $\pi$ is the proportion of wealth invested, it is the agent's control. $x$ is the agent's wealth and $S$ is the price of the asset.

In [6]:
class MERTON(HBJ):
    def __init__(self):
        super().__init__()
        μ = 0.05
        σ = 0.25
        γ = 1
        r = 0.02
        self.control_output = lambda u: torch.sigmoid(u)
        self.var_dims = [1, 1] # wealth, stock
        self.sampling_funcs = [lambda x: x, lambda x: x]
        self.cost_function = lambda u, x, t: 0
        self.terminal_cost = lambda x: -torch.exp(-γ*x)
        self.differential_operator = lambda J, u, var, t: \
            (u*(μ-r) + r*var[0])*div(J, var[0]) + 0.5*σ**2*u**2*Δ(J, var[0]) \
            + (μ-r)*var[1]*div(J, var[1]) + 0.5*σ**2*var[1]**2*Δ(J, var[1]) \
            + σ*u*div(div(J, var[0]), var[1])

In [7]:
eq = MERTON()
model = MODEL_CONFIG
solver = DGMPIASolver(model, eq)
loss = np.array(list(solver.train(200)))
plot_losses(loss)

100%|██████████| 200/200 [00:02<00:00, 78.88 it/s]
