# SCDAA Coursework

In [1]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import torch
from scipy.integrate import solve_ivp

Exercise1.1

In [58]:

class LQR:
    def __init__(self, H, M, C, D, R, T):
        self.H = torch.tensor(H, dtype=torch.float32)
        self.M = torch.tensor(M, dtype=torch.float32)
        self.C = torch.tensor(C, dtype=torch.float32)
        self.D = torch.tensor(D, dtype=torch.float32)
        self.R = torch.tensor(R, dtype=torch.float32)
        self.T = T
        self.sigma = None  # This should be set based on the problem's context if needed

    def solve_ricatti_ode(self, time_grid):
        H, M, C, D, R = self.H.numpy(), self.M.numpy(), self.C.numpy(), self.D.numpy(), self.R.numpy()
        T = self.T

        def ricatti_ode(t, S_flat):
            S = S_flat.reshape(2, 2)
            dSdt = -2 * H.T @ S + S @ M @ np.linalg.inv(D) @ M.T @ S - C
            return dSdt.flatten()
        
        sol = solve_ivp(ricatti_ode, [T, time_grid[0]], R.flatten(), t_eval=torch.flip(time_grid, dims=[0]), vectorized=True)
        S_values = sol.y.T.reshape(-1, 2, 2)
        S_values_tensor = torch.tensor(S_values, dtype=torch.float32)
        # 使用 torch.flip() 函数反转张量的顺序
        S_values_reversed = torch.flip(S_values_tensor, [0])
        return S_values_reversed
    
    def control_problem_value(self, t, x):
        S_values = self.solve_ricatti_ode(t)
        v_values = []
        for i in range(x.size(0)):  # Loop over samples in the batch
            x_ = x[i].unsqueeze(0)   # Get the i-th sample and unsqueeze to add a batch dimension
            v = torch.matmul(torch.matmul(x_.transpose(1, 2), S_values[i]), x_)  # Perform matrix multiplication
            if self.sigma is not None:
                tr_value = np.trace(self.sigma @ self.sigma.T @ S_values[i].numpy())
                v += tr_value * (t[i+1] - t[i]) if i < len(t) - 1 else 0  # Add integral term
            v_values.append(v.item())  # Convert to scalar and append to the list
        return torch.tensor(v_values, dtype=torch.float32)


    def markov_control_function(self, t, x):
        S_values = self.solve_ricatti_ode(t)
        a_values = torch.stack([-torch.inverse(self.D) @ self.M.T @ S_values[i] @ x_ for i, x_ in enumerate(x)])
        return a_values.squeeze(dim=1)

# Example of how to initialize and use the class (without actual matrices and T)
# H, M, C, D, R are matrices and T is a scalar
# lqr = LQR(H=[[0, 1], [-1, 0]], M=[[1, 0], [0, 1]], C=[[1, 0], [0, 1]], D=[[1, 0], [0, 1]], R=[[1, 0], [0, 1]], T=1)
# time_grid = torch.linspace(0, 1, steps=100)  # Example time grid
# x = torch.tensor([[[1.0, 2.0]]])  # Example state tensor
# print(lqr.markov_control_function(time_grid, x))


Exercise 1.2

Some assumption for the model. We set the value for matrix H, M, C, D, R and sigma as follow. And time T is  1 years.

In [59]:
H = [[0, 1], [-1, 0]]
M = [[1, 0], [0, 1]]
C = [[1, 0], [0, 1]]
D = [[1, 0], [0, 1]]
R = [[1, 0], [0, 1]]
T = 1

# Initialize the LQR class instance
lqr = LQR(H=H, M=M, C=C, D=D, R=R, T=T)

# Example time grid
time_grid = torch.linspace(0, T, steps=100)

# Example state tensor
x = torch.tensor([[[1.0], [2.0]]])

# Compute the Markov control function
markov_control = lqr.markov_control_function(time_grid, x)

# Compute the S values
S_values = lqr.solve_ricatti_ode(time_grid)

# Compute the control problem value (v value)
v_values = lqr.control_problem_value(time_grid, x)

# Print the results
print("Markov Control Function:")
print(markov_control)

print("\nS Values:")
print(S_values)

print("\nControl Problem Values (v Values):")
print(v_values)

Markov Control Function:
tensor([[[ 1.0007],
         [-2.0001]]])

S Values:
tensor([[[ 0.5999, -0.8003],
         [ 0.8003,  0.5999]],

        [[ 0.6031, -0.7978],
         [ 0.7978,  0.6031]],

        [[ 0.6064, -0.7954],
         [ 0.7954,  0.6064]],

        [[ 0.6097, -0.7928],
         [ 0.7928,  0.6097]],

        [[ 0.6131, -0.7903],
         [ 0.7903,  0.6131]],

        [[ 0.6164, -0.7876],
         [ 0.7876,  0.6164]],

        [[ 0.6198, -0.7850],
         [ 0.7850,  0.6198]],

        [[ 0.6233, -0.7822],
         [ 0.7822,  0.6233]],

        [[ 0.6267, -0.7795],
         [ 0.7795,  0.6267]],

        [[ 0.6302, -0.7767],
         [ 0.7767,  0.6302]],

        [[ 0.6337, -0.7738],
         [ 0.7738,  0.6337]],

        [[ 0.6373, -0.7709],
         [ 0.7709,  0.6373]],

        [[ 0.6409, -0.7679],
         [ 0.7679,  0.6409]],

        [[ 0.6445, -0.7649],
         [ 0.7649,  0.6445]],

        [[ 0.6482, -0.7618],
         [ 0.7618,  0.6482]],

        [[ 0.6518, -0.

Define the error.

In [46]:
def calculate_absolute_error(predicted_values, true_values):
    """
    使用PyTorch计算预测值和真实值之间的绝对误差。

    参数:
    predicted_values: 预测值，可以是一个数值、列表、Numpy数组或PyTorch张量。
    true_values: 真实值，形状与predicted_values相同。
    返回:
    绝对误差，与输入数据的形状相同。
    """
    # 确保输入为PyTorch张量
    if not isinstance(predicted_values, torch.Tensor):
        predicted_values = torch.tensor(predicted_values)
    if not isinstance(true_values, torch.Tensor):
        true_values = torch.tensor(true_values)
    
    # 计算绝对误差
    absolute_error = torch.abs(predicted_values - true_values)
    
    return absolute_error

def calculate_max_error(predicted_values, true_values):
    """
    计算最大绝对误差。
    """
    absolute_error = calculate_absolute_error(predicted_values, true_values)
    max_error = torch.max(absolute_error)
    return max_error

def calculate_mean_error(predicted_values, true_values):
    """
    计算平均绝对误差。
    """
    absolute_error = calculate_absolute_error(predicted_values, true_values)
    mean_error = torch.mean(absolute_error)
    return mean_error

Simulation

In [47]:
def fit_monte_carlo_simulation(H, M, C, D, R, sigma, x0, S_values, T, N, num_simulations):
    dt = T / N
    average_cost_per_time = []

    for _ in range(num_simulations):
        # 进行模拟
        simulated_states = torch.zeros(N + 1, 1, 2)  # 初始化模拟的状态
        simulated_states[0] = torch.tensor(x0)  # 设置初始状态

        for i in range(N):
            # 计算控制量
            control = -torch.inverse(D) @ M.T @ S_values[0] @ simulated_states[i].T

            # 计算下一个状态
            noise = torch.randn(2)  # 生成高斯白噪声
            simulated_states[i + 1] = simulated_states[i] + (
                H @ simulated_states[i] + M @ control + noise @ sigma.T
            ) * dt

        # 计算每个时间点的成本
        cost_per_time = torch.einsum('ntd,dt->nt', simulated_states, C) + torch.einsum('ntd,dt->nt', control.unsqueeze(0), D) + torch.einsum('ntd,dt->nt', simulated_states[-1], R)
        average_cost_per_time.append(cost_per_time)

    # 计算每个时间点的平均成本
    average_cost_per_time_tensor = torch.stack(average_cost_per_time).mean(dim=0)
    return average_cost_per_time_tensor