This code performs a Monte Carlo simulation to compare two optimization strategies for selecting diffusion weights in a Diffusion Kalman Filter (DKF) network.

The two strategies involve minimizing the trace of the covariance matrix (which relates to the Mean Squared Error) and minimizing the Bhattacharyya distance (a measure of the similarity between two distributions). The results help identify the optimal set of weights that either minimize estimation error between the DKF and a centralized Kalman Filter (CKF).

In [None]:
!git clone https://github.com/RIPS-2024-Aerospace/Aerospace-Project.git

Cloning into 'Aerospace-Project'...
remote: Enumerating objects: 410, done.[K
remote: Counting objects: 100% (141/141), done.[K
remote: Compressing objects: 100% (87/87), done.[K
remote: Total 410 (delta 100), reused 68 (delta 54), pack-reused 269[K
Receiving objects: 100% (410/410), 26.23 MiB | 6.81 MiB/s, done.
Resolving deltas: 100% (190/190), done.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp

np.random.seed(163)

# replace with file paths
# %run "DiffKf.ipynb"
# %run "KF.ipynb"
%run "/content/Aerospace-Project/Standard Filters/DiffKf.ipynb"
%run "/content/Aerospace-Project/Standard Filters/KF.ipynb"

MONTE CARLO: COMPARISON OF MINIMIZING TRACE VS MINIMIZING B DISTANCE

In [None]:
from scipy.optimize import minimize

# Bhattacharyya distance function
# def bhattacharyya_distance(mu1, mu2, Sigma1, Sigma2):
#     Sigma = (Sigma1 + Sigma2) / 2
#     inv_Sigma = np.linalg.inv(Sigma)
#     term1 = 1/8 * np.dot(np.dot((mu1 - mu2).T, inv_Sigma), (mu1 - mu2))
#     term2 = 1/2 * np.log(np.linalg.det(Sigma) / np.sqrt(np.linalg.det(Sigma1) * np.linalg.det(Sigma2)))
#     return term1 + term2


# since the means are 0, I'm getting rid of the first term
def bhattacharyya_distance(Sigma1, Sigma2):
    Sigma_mean = (Sigma1 + Sigma2) / 2
    term1 = np.log(np.linalg.det(Sigma_mean) / np.sqrt(np.linalg.det(Sigma1) * np.linalg.det(Sigma2))) / 2
    return term1

# Function to get diffusion covariance
def get_diff_cov(prev_cov, Station_cov, dkf, num_stns, A, H, Q, R, C, C_unweighted, G):
    S = lambda i: np.sum([node.H.T @ np.linalg.inv(node.R) @ node.H for node in dkf.nodes[i].nbhrs], axis=0)

    S_full = np.block([[np.zeros(A.shape) if i != j else S(j) for j in range(num_stns)] for i in range(num_stns)])
    H_full = np.kron(np.eye(num_stns), H)
    P_full = np.block([[np.zeros(Station_cov[0].shape) if i != j else Station_cov[j] for j in range(num_stns)] for i in range(num_stns)])
    R_full = np.kron(np.eye(num_stns), R)

    C_full = np.kron(C, np.eye(A.shape[0]))
    A_full = np.kron(C_unweighted, np.eye(A.shape[0]))

    F_i = C_full.T @ (np.eye(S_full.shape[1]) - (P_full @ S_full)) @ np.kron(np.eye(num_stns), A)
    G_i = C_full.T @ (np.eye(S_full.shape[1]) - (P_full @ S_full)) @ np.kron(np.eye(num_stns), G[0])
    D_i = C_full.T @ P_full @ A_full.T @ H_full.T @ np.linalg.inv(R_full)

    term1 = (F_i @ prev_cov @ F_i.T)
    term2 = G_i @ np.kron(np.ones((num_stns, num_stns)), Q) @ G_i.T
    term3 = D_i @ R_full @ D_i.T

    return term1 + term2 + term3


# Function to run filters and return covariances
def run_filters(W):
    # print(W)
    dt = 10

    # define C
    C_adj = np.array([[1, 1, 0, 0, 1],
                      [1, 1, 1, 0, 0],
                      [0, 1, 1, 1, 0],
                      [0, 0, 1, 1, 1],
                      [1, 0, 0, 1, 1]])
    C = C_adj * np.reshape(W, (5, 5))
    C_unweighted = np.array([[1 if x != 0 else 0 for x in row] for row in C])
    num_stns = len(C[0])

    A = np.array([[1, dt, 0, 0], [0, 1, 0, 0],[0,0,1,dt], [0, 0, 0, 1]])
    H = np.array([[1, 0, 0, 0],[0,0,1,0]])

    dkf_state_size = len(A)
    dkf_measure_size = len(H)

    # change q to see how it affects the trace and B distance
    q = 0.004
    Q = q*np.array([[(dt**3)/3, (dt**2)/2, 0, 0], [(dt**2)/2, dt, 0, 0],[0,0,(dt**3)/3,(dt**2)/2], [0, 0, (dt**2)/2, dt]])
    R = np.array([[4,0],[0,4]])

    A_kf = np.kron(np.eye(num_stns), A)
    H_kf = np.kron(np.eye(num_stns), H)
    Q_kf = np.kron(np.eye(num_stns), Q)
    R_kf = np.kron(np.eye(num_stns), R)

    kf_state_size = A_kf.shape[0]
    kf_measure_size = R_kf.shape[0]

    F = [A for _ in range(num_stns)]
    G = [np.eye(A.shape[0]) for _ in range(num_stns)]
    H_dkf = [H for _ in range(num_stns)]

    Q_dkf = [Q for _ in range(num_stns)]
    R_dkf = [R for _ in range(num_stns)]

    procc_noise_kf = lambda : np.linalg.cholesky(Q_kf) @ np.random.normal(np.array([[0 for _ in range(kf_state_size)]]).T)
    measure_noise_kf = lambda : np.linalg.cholesky(R_kf) @ np.random.normal(np.array([[0 for _ in range(kf_measure_size)]]).T)

    measure_kf_to_dkf  = lambda z: [np.array([z[H.shape[0]*i + j] for j in range(H.shape[0])]) for i in range(num_stns)]
    state_kf_to_dkf = lambda z: [np.array([z[A.shape[0]*i + j] for j in range(A.shape[0])]) for i in range(num_stns)]

    # True Initial
    x0_kf = np.array([[np.random.normal(0, np.sqrt(Q_kf[i, i])) for i in range(kf_state_size)]]).T

    # Initial Estimate
    x_kf = np.array([[np.random.normal(0, 5) for i in range(kf_state_size)]]).T
    x_dkf = state_kf_to_dkf(x_kf)

    P_kf = 10 * np.copy(Q_kf)
    P_dkf = [10 * np.copy(Q) for _ in range(num_stns)]

    kf = KalmanFilter(A=A_kf, H=H_kf, Q=Q_kf, R=R_kf, P=P_kf, x0=x0_kf)
    dkf = DiffKF(C, F, G, H_dkf, R_dkf, Q_dkf, x_dkf, P_dkf)

    iters = 60

    truth = np.zeros((iters + 1, kf_state_size, 1))
    truth[0] = x0_kf

    measurements = np.zeros((iters + 1, kf_measure_size, 1))
    measurements[0] = (H_kf @ x0_kf) + measure_noise_kf()

    predictions_kf = np.zeros((iters, kf_state_size, 1))
    predictions_dkf = np.zeros((iters, num_stns, A.shape[0], 1))

    errors_kf = np.zeros((iters, kf_state_size, 1))
    errors_dkf = np.zeros((iters, num_stns, A.shape[0], 1))

    P_hist_kf = np.zeros((iters, kf_state_size, kf_state_size))
    P_hist_dkf = np.zeros((iters, num_stns, A.shape[0], A.shape[0]))
    full_system_P_hist = np.zeros((iters, kf_state_size, kf_state_size))
    prev_cov = np.block([[np.zeros(P_dkf[0].shape) if i != j else dkf.nodes[i].P for j in range(num_stns)] for i in range(num_stns)])

    for i in range(iters):
        kf.update(measurements[i])
        dkf.update(measure_kf_to_dkf(measurements[i]))

        predictions_dkf[i] = [dkf.nodes[j].x for j in range(num_stns)]
        errors_dkf[i] = [dkf.nodes[j].x - state_kf_to_dkf(truth[i])[j] for j in range(num_stns)]
        station_covs = [dkf.nodes[j].P for j in range(num_stns)]
        P_hist_dkf[i] = station_covs

        prev_cov = get_diff_cov(prev_cov, station_covs, dkf, num_stns, A, H, Q, R, C, C_unweighted, G)
        full_system_P_hist[i] = prev_cov

        predictions_kf[i] = kf.x
        errors_kf[i] = kf.x - truth[i]
        P_hist_kf[i] = kf.P

        kf.predict()
        dkf.predict()

        truth[i + 1] = A_kf @ x0_kf + procc_noise_kf()
        measurements[i + 1] = H_kf @ truth[i + 1] + measure_noise_kf()

    return (P_hist_kf[40], full_system_P_hist[40])

# Function to generate a random row-stochastic matrix based on a template
def generate_random_row_stochastic_matrix_template(template):
    matrix = template.copy()
    for i in range(matrix.shape[0]):
        pos_indices = np.where(matrix[i] != 0)[0]
        random_values = np.random.rand(len(pos_indices))
        random_values /= random_values.sum()  # Normalize to make row sum to 1
        matrix[i, pos_indices] = random_values
    return matrix

# Function to generate multiple row-stochastic matrices based on a template
def generate_multiple_row_stochastic_matrices_template(n, template):
    matrices = [generate_random_row_stochastic_matrix_template(template) for _ in range(n)]
    return matrices


# Template matrix
template_matrix = np.array([[0.34, 0.33, 0, 0, 0.33],
                            [0.33, 0.34, 0.33, 0, 0],
                            [0, 0.33, 0.34, 0.33, 0],
                            [0, 0, 0.33, 0.34, 0.33],
                            [0.33, 0, 0, 0.33, 0.34]])

# Generate 10 random matrices
random_matrices = generate_multiple_row_stochastic_matrices_template(100, template_matrix)

# Include the template matrix in the list of matrices
all_matrices = [template_matrix] + random_matrices

# Compute the Bhattacharyya distance for each C value
distances = []
traces = []
traces_kf = []
for i, C in enumerate(all_matrices):
  reference_cov, cov = run_filters(C)
  distance = bhattacharyya_distance(cov, reference_cov)
  distances.append(distance)
  trace = np.trace(cov)
  traces.append(trace)
  trace_kf = np.trace(reference_cov)
  traces_kf.append(trace_kf)

# Plot the results
# plt.figure(figsize=(10, 6))
# plt.plot(range(len(all_matrices)), distances, marker='o', label='Random Matrices')
# plt.axhline(distances[0], color='r', linestyle='--', label='Template Matrix')
# plt.xlabel('Matrix Index')
# plt.ylabel('Bhattacharyya Distance')
# plt.title('Bhattacharyya Distance vs. C Matrix Variations')
# plt.legend()
# plt.grid(True)
# plt.show()

print("Optimal weights by minimizing trace")
print(all_matrices[np.argmin(traces)]) # Weight Matrix by minimizing the Trace (MEAN SQUARED ERROR)
print()
print("Optimal weights by minimizing Bhattacharya Distance")
print(all_matrices[np.argmin(distances)]) # Weight Matrix by minimizing the Distance
print()
print(traces_kf) # traces of CKF
print(traces) # traces of DKF

Optimal weights by minimizing trace
[[0.34 0.33 0.   0.   0.33]
 [0.33 0.34 0.33 0.   0.  ]
 [0.   0.33 0.34 0.33 0.  ]
 [0.   0.   0.33 0.34 0.33]
 [0.33 0.   0.   0.33 0.34]]

Optimal weights by minimizing Bhattacharya Distance
[[0.10404715 0.88679962 0.         0.         0.00915323]
 [0.62430303 0.29078548 0.08491149 0.         0.        ]
 [0.         0.14320133 0.19211991 0.66467876 0.        ]
 [0.         0.         0.38717682 0.2128951  0.39992809]
 [0.07824148 0.         0.         0.16561947 0.75613906]]

[30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.683245687002977, 30.68324568700297

PARTICLE SWARM OPTIMIZATION - poor performance

BFGS