In [4]:
import numpy as np
import torch
from scipy.integrate import odeint
import matplotlib.pyplot as plt

# Define the LQR Solver class as per the provided structure and functions
class LQRSolver:
    def __init__(self, H, M, C, R, D, T, sigma):
        self.H = H
        self.M = M
        self.C = C
        self.R = R
        self.D = D
        self.T = T
        self.sigma = sigma

    def riccati_ode(self, S_flat, t):
        S = S_flat.reshape(2, 2)
        SDot = -2 * self.H.T @ S - S @ self.M @ np.linalg.inv(self.D) @ self.M.T @ S + self.C
        return SDot.flatten()

    def solve_riccati(self, time_grid):
        S0 = self.R.flatten()
        S_sol = odeint(self.riccati_ode, S0, time_grid)
        return S_sol.reshape(-1, 2, 2)

    def compute_value_function(self, t_batch, x_batch):
        S_t = self.solve_riccati(t_batch)
        values = torch.zeros(t_batch.size(0), 1)
        for i, (t, S) in enumerate(zip(t_batch, S_t)):
            x = x_batch[i]
            v = x.float().T @ torch.tensor(S).float() @ x.float()
            values[i] = v
        return values

    def compute_control_function(self, t_batch, x_batch):
        S_t = self.solve_riccati(t_batch)
        controls = torch.zeros(t_batch.size(0), 2, dtype=torch.float)
        D_tensor = torch.tensor(self.D, dtype=torch.float)
        M_tensor = torch.tensor(self.M, dtype=torch.float)  # Convert self.M to a tensor of the same type

        for i, (t, S) in enumerate(zip(t_batch, S_t)):
            x = x_batch[i].float()
            S_tensor = torch.tensor(S, dtype=torch.float)
            control = -torch.linalg.inv(D_tensor) @ M_tensor.T @ S_tensor @ x
            controls[i, :] = control.flatten()

        return controls

# Problem constants from provided parameters
H = np.array([[1.0, 0.0], [0.0, 1.0]]) * 0.1
M = np.array([[1.0, 0.0], [0.0, 1.0]])
C = np.array([[0.0, 0.0], [0.0, 0.0]])
D = np.array([[1.0, 0.0], [0.0, 1.0]])
R = np.array([[1.0, 0.0], [0.0, 1.0]]) * 10
T = 1.0  # Assuming final time T=1, this needs to be provided
sigma = 0.1  # Assuming some sigma value

# Create an instance of LQRSolver
lqr_solver = LQRSolver(H, M, C, R, D, T, sigma)

# Test the class with some dummy data
time_grid = np.linspace(0, T, 100)  # Time grid for the ODE solution
x_batch = torch.rand(10, 2)  # Dummy batch of state vectors
t_batch = torch.linspace(0, T, 10)  # Dummy batch of time points

# Solving the Riccati ODE
S_sol = lqr_solver.solve_riccati(time_grid)

# Compute value function for the batch
values = lqr_solver.compute_value_function(t_batch, x_batch)

# Compute control function for the batch
controls = lqr_solver.compute_control_function(t_batch, x_batch)

(S_sol, values, controls)  # Return the results for inspection

(array([[[10.        ,  0.        ],
         [ 0.        , 10.        ]],
 
        [[ 9.06507824,  0.        ],
         [ 0.        ,  9.06507824]],
 
        [[ 8.28859265,  0.        ],
         [ 0.        ,  8.28859265]],
 
        [[ 7.63341635,  0.        ],
         [ 0.        ,  7.63341635]],
 
        [[ 7.0731821 ,  0.        ],
         [ 0.        ,  7.0731821 ]],
 
        [[ 6.58864801,  0.        ],
         [ 0.        ,  6.58864801]],
 
        [[ 6.16544265,  0.        ],
         [ 0.        ,  6.16544265]],
 
        [[ 5.79261601,  0.        ],
         [ 0.        ,  5.79261601]],
 
        [[ 5.46167838,  0.        ],
         [ 0.        ,  5.46167838]],
 
        [[ 5.16594614,  0.        ],
         [ 0.        ,  5.16594614]],
 
        [[ 4.90008578,  0.        ],
         [ 0.        ,  4.90008578]],
 
        [[ 4.65978933,  0.        ],
         [ 0.        ,  4.65978933]],
 
        [[ 4.4415392 ,  0.        ],
         [ 0.        ,  4.4415392 ]],
 

In [8]:
#Exercise 1.2

import numpy as np
import torch
import matplotlib.pyplot as plt

# Problem constants from provided parameters
H = torch.tensor([[1.0, 0.0], [0.0, 1.0]]) * 0.1
M = torch.tensor([[1.0, 0.0], [0.0, 1.0]])
sigma = 0.001
C = torch.tensor([[0.0, 0.0], [0.0, 0.0]])
D = torch.tensor([[1.0, 0.0], [0.0, 1.0]])
R = torch.tensor([[1.0, 0.0], [0.0, 1.0]]) * 10
T = 1.0  # Assuming final time T=1, this needs to be provided

# Initial state
x0 = torch.tensor([1.0, 1.0])  # Example initial state
lqr_solver = LQRSolver(H.numpy(), M.numpy(), C.numpy(), R.numpy(), D.numpy(), T, sigma)


# Define the optimal control function based on the LQRSolver class
def optimal_control(t, x, lqr_solver):
    # Assuming lqr_solver.solve_riccati returns the matrix S for time t
    S_t = lqr_solver.solve_riccati(np.array([t])).reshape(2, 2)
    S_tensor = torch.from_numpy(S_t).float()
    control = -torch.linalg.inv(D) @ M.T @ S_tensor @ x
    return control

# Define the explicit update function
def explicit_update(x, t, dt, lqr_solver):
    dW = torch.randn(x.shape) * np.sqrt(dt)
    a = optimal_control(t, x, lqr_solver)
    x_next = x + (H @ x + M @ a) * dt + sigma * dW
    return x_next

# Define the Monte Carlo simulation function
def monte_carlo_simulation(update_function, lqr_solver, N, num_samples):
    dt = T / N
    terminal_costs = []
    for _ in range(num_samples):
        x = x0
        for step in range(N):
            t = step * dt
            x = update_function(x, t, dt, lqr_solver)
        terminal_cost = x.T @ R @ x
        terminal_costs.append(terminal_cost.item())
    mse = np.mean(np.square(terminal_costs))
    return mse

# Example usage with explicit update
#N_time_steps = 100  # Number of time steps for simulation
#N_mc_samples = 100  # Number of Monte Carlo samples for simulation
#mse_explicit = monte_carlo_simulation(explicit_update, lqr_solver, N_time_steps, N_mc_samples)


# Log-log plot code would go here
time_steps = np.array([1, 10, 50, 100, 500, 1000, 5000, 10000])
mse_values_time = [monte_carlo_simulation(explicit_update, lqr_solver, n, 100) for n in time_steps]  # Randomly generated errors

monte_carlo_samples = np.array([10,50,100, 500, 1000, 5000, 10000, 50000, 100000])
mse_values_samples = [monte_carlo_simulation(explicit_update, lqr_solver, 200, n) for n in monte_carlo_samples]  # Randomly generated errors

# Log-Log plot for MSE against time steps
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.loglog(time_steps, mse_values_time, 'o-', label='Error vs Time Steps')
plt.xlabel('Number of Time Steps')
plt.ylabel('MSE')
plt.title('Log-Log Plot of MSE vs Time Steps')
plt.legend()

# Log-Log plot for MSE against Monte Carlo samples
plt.subplot(1, 2, 2)
plt.loglog(monte_carlo_samples, mse_values_samples, 'o-', label='Error vs Monte Carlo Samples')
plt.xlabel('Number of Monte Carlo Samples')
plt.ylabel('MSE')
plt.title('Log-Log Plot of MSE vs Monte Carlo Samples')
plt.legend()

plt.tight_layout()
plt.show()