In [2]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import torch
from scipy.integrate import solve_ivp
import torch
import torch.nn as nn
import numpy as np
import os
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [None]:
class LQR:
    def __init__(self, H, M, C, D, R, T,sigma):
        self.H = torch.tensor(H, dtype=torch.float32)
        self.M = torch.tensor(M, dtype=torch.float32)
        self.C = torch.tensor(C, dtype=torch.float32)
        self.D = torch.tensor(D, dtype=torch.float32)
        self.R = torch.tensor(R, dtype=torch.float32)
        self.T = T
        self.sigma = sigma 

    def solve_ricatti_ode(self, time_grid):
        H, M, C, D, R = self.H.numpy(), self.M.numpy(), self.C.numpy(), self.D.numpy(), self.R.numpy()
        T = self.T

        def ricatti_ode(t, S_flat):
            S = S_flat.reshape(2, 2)
            dSdt = -2 * H.T @ S + S @ M @ np.linalg.inv(D) @ M.T @ S - C
            return dSdt.flatten()
        sol = solve_ivp(ricatti_ode, [T, 0], R.flatten(), t_eval=np.flip(time_grid), vectorized=True,rtol=1e-6, atol=1e-9)
        S_values = sol.y.T.reshape(-1, 2, 2)
        S_values_tensor = torch.tensor(S_values, dtype=torch.float32)
        S_values_tensor_reversed = torch.flip(S_values_tensor, dims=[0])
        return S_values_tensor_reversed
    
    def control_problem_value(self, t, x):
        S_values = self.solve_ricatti_ode(t)
        v_values = torch.zeros(x.size(0), 1, dtype=torch.float32)  # initialize v_values
        # for cycle
        for i in range(x.size(0)):
            x_i = x[i]  # x_i shape as (1, 2)
            S_i = S_values[i]  # S_i shape as (2, 2)
            v_i = torch.matmul(torch.matmul(x_i.transpose(0, 1), S_i), x_i)  # calculate x_i * S_i * x_i'

            integral_value = 0  # initialize integral
            if self.sigma is not None:  
                sigma_mat = torch.tensor(self.sigma, dtype=torch.float32)
                # Integration from the current time point to the termination time T.
                for j in range(i, len(t) - 1):
                    S_j = S_values[j]  # get  S value at tj
                    tr_value = torch.trace(torch.matmul(torch.matmul(sigma_mat, sigma_mat.transpose(0, 1)), S_j))
                    delta_t = t[j + 1] - t[j]  
                    integral_value += tr_value * delta_t  # total integral

            v_i += integral_value  # Acumulation
            v_values[i] = v_i
        return v_values
    def markov_control_function(self, t, x):
        S_values = self.solve_ricatti_ode(t)  # use solve funtion to solve Ricatti ODE
        # calculate a for each sample 
        a_values = [-torch.inverse(self.D) @ self.M.T @ S_values[i].clone().detach().to(dtype=torch.float32) @ x_[None, :] for i, x_ in enumerate(x)]
        # Stack the tensors in the list into a new tensor with a shape of n×2×1.
        a_values = torch.stack(a_values).squeeze(1)  # The use of squeeze(1) is to remove any extra dimensions, if presen
        return a_values

In [None]:
H = [[0.1, 0.0],
     [0.0, 0.1]]
M = [[1, 0],
     [0, 1]]
C = [[0.0, 0.0],
     [0.0, 0.0]]
D = [[1, 0],
     [0, 1]]
R = [[10.0, 0.0],
     [0.0, 10.0]]
T = 10
N=100000
sigma =np.array([[0.001], [0.001]])
time_grid = np.linspace(0, T,N+1)
initial_x = torch.tensor([[[100.0], [50.0]]], dtype=torch.float32)

In [None]:
lqr = LQR(H=H, M=M, C=C, D=D, R=R, T=T,sigma=sigma)
lqr.solve_ricatti_ode(time_grid)
v=lqr.control_problem_value(time_grid, initial_x)

In [None]:
def apply_markov_control_function(model, t, X_simulated):
    # Initialize a tensor with the same shape as X_simulated to store control vectors
    A_values = torch.zeros_like(X_simulated)
    
    # Iterate over each simulated path
    for i in range(X_simulated.shape[0]):
        # Extract a single path
        x_path = X_simulated[i]
        
        # Calculate the control vector for this path
        a_path = model.markov_control_function(t, x_path)
        
        # Store the calculation result
        A_values[i] = a_path
        
    return A_values

In [None]:
s=lqr.solve_ricatti_ode(time_grid)

In [None]:
def wiener_process(N,T):            
    dW = np.random.normal(0, np.sqrt(T/N))
    return dW

In [None]:
def simulation (lqr,x,T,N,S_values):
    '''
    this function simulate single path

    '''
    time_grid = torch.linspace(0, T, steps=N+1)
    dt = T/N
    x_values = torch.zeros((N+1, 2, 1))
    x_values[0] = x
    for n in range(N):
        x_n = x_values[n]
        S_n = S_values[n]  # get current value of S
        dW =  wiener_process(N,T) # increments in Wiener process
            # calculate the second tem in control effect 
        control_effect = lqr.M @ torch.inverse(lqr.D) @ lqr.M.T @ S_n @ x_n
    
        # Generate increments of the Wiener process, assuming it has a shape of (2, 1), matching x_n.
        sigma_dW =  lqr.sigma * dW
    
        # update
        x_next = x_n + dt * (lqr.H @ x_n - control_effect) + sigma_dW
        # storage the x for next time point
        x_values[n+1] = x_next
    return x_values

In [None]:
def simulate_multiple_paths(lqr, x, T, N, S_values, n):
    '''
    Simulate multiple paths and stack the results.

    Parameters:
    lqr: Parameters of the LQR system.
    x: Initial state, with shape (2, 1).
    T: Total time.
    N: Number of time steps.
    S_values: Values of S for each time step, with shape (N, 2, 2).
    n: Number of paths to simulate.

    Returns:
    A tensor with shape (M, N+1, 2, 1) containing the states of all simulated paths.
    '''
    # Store the results of each simulation
    all_simulations = []

    for _ in range(n):
        simulation_result = simulation(lqr, x, T, N, S_values)
        all_simulations.append(simulation_result)

    # Use torch.stack to stack all simulation results, creating a new dimension
    stacked_simulations = torch.stack(all_simulations)
    
    return stacked_simulations


In [None]:
def compute_J_alpha(lqr, X_paths, alpha_paths, T, N):
    """
    Parameters:
    lqr: An instance of the LQR class, containing matrices C, D, R.
    X_paths: Tensor of state vector paths, with shape [number of simulations, time steps, 2, 1].
    alpha_paths: Tensor of control vector paths, with shape [number of simulations, time steps, 2, 1].
    T: Total time.
    N: Number of time steps.

    Returns:
    The average value of J_alpha for all simulated paths.
    """
    # Use the C, D, R matrices from the lqr object
    C = lqr.C
    D = lqr.D
    R = lqr.R
    dt = T/N
    # Compute the integral part
    integral_part = torch.sum(X_paths.transpose(2, 3) @ C @ X_paths + alpha_paths.transpose(2, 3) @ D @ alpha_paths, dim=1) * dt
    # Compute the terminal cost
    terminal_cost = torch.sum(X_paths[:, -1].transpose(1, 2) @ R @ X_paths[:, -1], dim=1)

    # Compute the total cost
    J_alpha = torch.mean(integral_part + terminal_cost)
    return J_alpha.item()


In [None]:
def simulate_and_compute_J_for_different_Ns(lqr, initial_x, T, Ns, fix_n):
    """
    Simulate paths for different time step sizes N and compute the corresponding J values.

    Parameters:
    - lqr: An instance of the LQR model.
    - initial_x: Initial state vector.
    - T: Total time.
    - Ns: A list containing different N values.
    - fix_n: Fixed number of paths for simulation.

    Returns:
    - Js: An array of J values corresponding to each N.
    """
    Js = np.zeros(len(Ns))
    
    for i, N in enumerate(Ns):
        # Create time grid
        time_grid = np.linspace(0, T, N + 1)
        S_values = lqr.solve_ricatti_ode(time_grid)
        # Simulate paths and apply control functions, compute J
        n_simulate = simulate_multiple_paths(lqr, initial_x, T, N, S_values, fix_n)
        n_u = apply_markov_control_function(lqr, time_grid, n_simulate)
        J = compute_J_alpha(lqr, n_simulate, n_u, T, N)
        
        Js[i] = J
    
    return Js

In [None]:
# fix_n large try this range of time steps
fix_n = 1000
Ns = [1, 10, 50, 100, 500, 1000, 5000]
Js1 = simulate_and_compute_J_for_different_Ns(lqr, initial_x, T, Ns, fix_n)
v = 2882.2737
error_vary = abs(Js1 - v)

In [None]:
Ns = [1, 10, 50, 100, 500, 1000, 5000]
plt.figure(figsize=(10, 6))
plt.loglog(Ns, error_vary, marker='o', label='Error')
plt.xlabel('N (Number of Time Steps)')
plt.ylabel('Error')
plt.title('Error Variation with Number of Time Steps')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
def simulate_and_compute_J_for_different_ns(lqr, initial_x, T, fix_N, ns):
    """
    Simulate different numbers of paths n for a fixed time step size N and compute the corresponding J values.

    Parameters:
    - lqr: An instance of the LQR model.
    - initial_x: Initial state vector.
    - T: Total time.
    - fix_N: Fixed time step size.
    - ns: A list containing different n values, each representing the number of paths in one simulation.

    Returns:
    - Js: An array of J values corresponding to each n.
    """
    Js = np.zeros(len(ns))
    time_grid = np.linspace(0, T, fix_N+1)  # Time grid is the same for all n
    S_values = lqr.solve_ricatti_ode(time_grid)
    for i, n in enumerate(ns):
        # Simulate paths and apply control functions, compute J
        n_simulate = simulate_multiple_paths(lqr, initial_x, T, fix_N, S_values, n)
        n_u = apply_markov_control_function(lqr, time_grid, n_simulate)
        J = compute_J_alpha(lqr, n_simulate, n_u, T, fix_N)
        
        Js[i] = J
    
    return Js

In [None]:
ns = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]
fix_N = 5000
Js2 = simulate_and_compute_J_for_different_ns(lqr, initial_x, T, fix_N, ns)

In [None]:
error2_vary = abs(Js2 - v)
plt.figure(figsize=(10, 6))
plt.loglog(ns, error2_vary, marker='o', label='Error')
plt.xlabel('n(number of samples)')
plt.ylabel('Error')
plt.title('Error Variation with Number of samples')
plt.grid(True)
plt.legend()
plt.show()