# SCDAA Coursework

In [1]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import torch
from scipy.integrate import solve_ivp

Exercise1.1   

In [125]:

class LQR:
    def __init__(self, H, M, C, D, R, T,sigma):
        self.H = torch.tensor(H, dtype=torch.float32)
        self.M = torch.tensor(M, dtype=torch.float32)
        self.C = torch.tensor(C, dtype=torch.float32)
        self.D = torch.tensor(D, dtype=torch.float32)
        self.R = torch.tensor(R, dtype=torch.float32)
        self.T = T
        self.sigma = sigma 

    def solve_ricatti_ode(self, time_grid):
        H, M, C, D, R = self.H.numpy(), self.M.numpy(), self.C.numpy(), self.D.numpy(), self.R.numpy()
        T = self.T

        def ricatti_ode(t, S_flat):
            S = S_flat.reshape(2, 2)
            dSdt = -2 * H.T @ S + S @ M @ np.linalg.inv(D) @ M.T @ S - C
            return dSdt.flatten()
        
        sol = solve_ivp(ricatti_ode, [T, time_grid[0]], R.flatten(), t_eval=time_grid, vectorized=True,rtol=1e-6, atol=1e-9)
        S_values = sol.y.T.reshape(-1, 2, 2)
        S_values_tensor = torch.tensor(S_values, dtype=torch.float32)
        S_values_reversed = torch.flip(S_values_tensor, [0])
        return S_values_reversed
    
    def control_problem_value(self, t, x):
        S_values = self.solve_ricatti_ode(t)
        v_values = torch.zeros(x.size(0), 1, dtype=torch.float32)  # 初始化 v_values

        # 循环遍历批次中的样本
        for i in range(x.size(0)):
            x_i = x[i]  # x_i 形状为 (1, 2)
            S_i = S_values[i]  # S_i 形状为 (2, 2)
            v_i = torch.matmul(torch.matmul(x_i.transpose(0, 1), S_i), x_i)  # 计算 x_i * S_i * x_i'

            integral_value = 0  # 初始化积分值
            if self.sigma is not None:  # 如果指定了 sigma
                sigma_mat = torch.tensor(self.sigma, dtype=torch.float32)
                # 从当前时间点到终止时间 T 的积分
                for j in range(i, len(t) - 1):
                    S_j = S_values[j]  # 获取时间点 t_j 的 S 值
                    tr_value = torch.trace(torch.matmul(torch.matmul(sigma_mat, sigma_mat.transpose(0, 1)), S_j))
                    delta_t = t[j + 1] - t[j]  # 计算时间差
                    integral_value += tr_value * delta_t  # 累积积分影响

            v_i += integral_value  # 将积分的影响加到 v_i 上
            v_values[i] = v_i

        return v_values.squeeze()
    
    def markov_control_function(self, t, x):
        # t_grid用于解决Ricatti ODE，确保是numpy数组
        S_values = self.solve_ricatti_ode(t)  # 解决Ricatti ODE
        # 计算每个样本的控制向量a
        a_values = [-torch.inverse(self.D) @ self.M.T @ torch.tensor(S_values[i], dtype=torch.float32) @ x_[None, :].T for i, x_ in enumerate(x)]
        # 将列表中的张量堆叠成一个新的张量
        a_values = torch.stack(a_values).squeeze(2)  # 移除多余的维度，确保最终形状是批次大小×2
    
        return a_values

# Example of how to initialize and use the class (without actual matrices and T)
# H, M, C, D, R are matrices and T is a scalar
# lqr = LQR(H=[[0, 1], [-1, 0]], M=[[1, 0], [0, 1]], C=[[1, 0], [0, 1]], D=[[1, 0], [0, 1]], R=[[1, 0], [0, 1]], T=1)
# time_grid = torch.linspace(0, 1, steps=100)  # Example time grid
# x = torch.tensor([[[1.0, 2.0]]])  # Example state tensor
# print(lqr.markov_control_function(time_grid, x))

Exercise 1.2

Some assumption for the model. We set the value for matrix H, M, C, D, R and sigma as follow. And time T is  1 years.

In [83]:
H = [[0, 1], [-1, 0]]
M = [[1, 0], [0, 1]]
C = [[1, 0], [0, 1]]
D = [[1, 0], [0, 1]]
R = [[1, 0], [0, 1]]
T = 1

# Initialize the LQR class instance
sigma = torch.tensor([[0.1], [0.2]])
lqr = LQR(H=H, M=M, C=C, D=D, R=R, T=T,sigma=sigma)

# Example time grid
time_grid = torch.linspace(0, T, steps=100)

# Example state tensor
initial_x = torch.tensor([[1.0], [2.0]])  # shape: (1, 2)

# 扩展x以匹配time_grid的长度
# 我们需要的形状是(batch_size, 1, 2)，其中batch_size等于time_grid的长度
x = initial_x.repeat(time_grid.size(0), 1, 1)
# Compute the Markov control function
markov_control = lqr.markov_control_function(time_grid, x)

# Compute the S values
S_values = lqr.solve_ricatti_ode(time_grid)

# Compute the control problem value (v value)
v_values = lqr.control_problem_value(time_grid, x)

# Print the results
print("Markov Control Function:")
print(markov_control)

print("\nS Values:")
print(S_values)

print("\nControl Problem Values (v Values):")
print(v_values)

print(x)

  a_values = [-torch.inverse(self.D) @ self.M.T @ torch.tensor(S_values[i], dtype=torch.float32) @ x_[None, :].T for i, x_ in enumerate(x)]
  sigma_mat = torch.tensor(self.sigma, dtype=torch.float32)


Markov Control Function:
tensor([[[[ 1.8959],
          [-1.1855]]],


        [[[ 1.8948],
          [-1.1873]]],


        [[[ 1.8937],
          [-1.1891]]],


        [[[ 1.8926],
          [-1.1909]]],


        [[[ 1.8914],
          [-1.1927]]],


        [[[ 1.8902],
          [-1.1946]]],


        [[[ 1.8890],
          [-1.1965]]],


        [[[ 1.8878],
          [-1.1985]]],


        [[[ 1.8865],
          [-1.2005]]],


        [[[ 1.8852],
          [-1.2025]]],


        [[[ 1.8838],
          [-1.2046]]],


        [[[ 1.8825],
          [-1.2068]]],


        [[[ 1.8811],
          [-1.2089]]],


        [[[ 1.8797],
          [-1.2112]]],


        [[[ 1.8782],
          [-1.2134]]],


        [[[ 1.8767],
          [-1.2157]]],


        [[[ 1.8752],
          [-1.2181]]],


        [[[ 1.8736],
          [-1.2205]]],


        [[[ 1.8720],
          [-1.2230]]],


        [[[ 1.8703],
          [-1.2255]]],


        [[[ 1.8686],
          [-1.2281]]],


        [

In [119]:
# 模拟过程
def wiener_process(dt):   
    '''
    这个function输入 dt
    输出 dw的值
    '''            
    dW = np.random.normal(0, np.sqrt(dt))
    return dW

def simulation (lqr,x,T,N):
    '''
    这个function模拟了单个路径
    输入：
    lqr model
    初始的x
    时间网格T and N
    '''
    time_grid = torch.linspace(0, T, steps=N+1)
    dt = T/N
    x_values = torch.zeros((N+1, 2, 1))
    x_values[0] = x
    S_values = lqr.solve_ricatti_ode(time_grid)
    for n in range(N):
        x_n = x_values[n]
        S_n = S_values[n]  # 获取当前时间点的 S 值
        dW =  wiener_process(dt) # Wiener 过程的增量
            # 计算控制效应的第二项
        control_effect = lqr.M @ torch.inverse(lqr.D) @ lqr.M.T @ S_n @ x_n
    
        # 生成Wiener过程的增量dW，假设它是形状为(2, 1)的，与x_n匹配
        sigma_dW =  lqr.sigma * dW
    
        # 更新状态
        x_next = x_n + dt * (lqr.H @ x_n - control_effect) + sigma_dW
        # 存储下一个时间点的状态
        x_values[n+1] = x_next
    return x_values

def simulation_ntimes(lqr,x,T,N,n):
    time_grid = torch.linspace(0, T, steps=N+1)
    final_v = np.zeros(n)
    initial_v = lqr.control_problem_value(time_grid, simulation (lqr,x,T,N))[0]
    for i in range(n):
        final_v[i] = lqr.control_problem_value(time_grid, simulation (lqr,x,T,N))[-1]
    return initial_v,final_v
    

In [120]:
x = simulation (lqr,initial_x,T,N=100)
time_grid = torch.linspace(0, T, steps=101)
markov_control = lqr.markov_control_function(time_grid, x)

# Compute the S values
S_values = lqr.solve_ricatti_ode(time_grid)

# Compute the control problem value (v value)
v_values = lqr.control_problem_value(time_grid, x)
print(v_values)

tensor([0.6078, 0.5969, 0.5842, 0.5547, 0.5850, 0.5624, 0.5758, 0.5872, 0.6024,
        0.6110, 0.6085, 0.6498, 0.6404, 0.6098, 0.6384, 0.6415, 0.6507, 0.6418,
        0.6928, 0.7080, 0.7281, 0.7213, 0.7316, 0.7487, 0.7711, 0.8232, 0.8710,
        0.8814, 0.8805, 0.8907, 0.8765, 0.9690, 0.9290, 0.9346, 0.9563, 0.9546,
        1.0208, 1.0402, 1.0790, 1.1019, 1.1282, 1.1347, 1.1140, 1.1386, 1.1905,
        1.2765, 1.2602, 1.3087, 1.4098, 1.4143, 1.3950, 1.4270, 1.4602, 1.5183,
        1.5464, 1.5755, 1.5937, 1.6022, 1.6543, 1.6666, 1.7175, 1.6820, 1.9215,
        1.9945, 1.9202, 2.0309, 2.1223, 2.2340, 2.2389, 2.2203, 2.2598, 2.2852,
        2.2869, 2.2864, 2.2873, 2.3166, 2.2909, 2.3599, 2.6077, 2.6049, 2.7966,
        2.6197, 2.4963, 2.4013, 2.3325, 2.2910, 2.2679, 2.2296, 2.2000, 2.1948,
        2.1319, 1.9477, 1.8373, 1.8459, 1.6471, 1.4233, 1.6141, 1.5466, 1.3131,
        1.0997, 0.9425])


  a_values = [-torch.inverse(self.D) @ self.M.T @ torch.tensor(S_values[i], dtype=torch.float32) @ x_[None, :].T for i, x_ in enumerate(x)]
  sigma_mat = torch.tensor(self.sigma, dtype=torch.float32)


In [124]:
initial_v,final_v= simulation_ntimes(lqr,initial_x,T,N=100,n=2000)
print(np.mean(final_v),initial_v)

  sigma_mat = torch.tensor(self.sigma, dtype=torch.float32)


1.1298177636936306 tensor(0.6078)


In [117]:
print(np.mean(final_v),initial_v)

1.10738555341959 tensor(0.6078)


Define the error.

Simulation