In [5]:
import numpy as np

# 生成一些9x9矩阵，确保它们是正定的，并且KL散度值较好
def generate_positive_definite_matrix(size, random_seed=None):
    if random_seed:
        np.random.seed(random_seed)
    A = np.random.rand(size, size)
    matrix = np.dot(A, A.T)  # 确保生成的是对称正定矩阵
    matrix += np.eye(size) * size  # 增加正则化，确保正定性
    return matrix

# 生成三个9x9的正定矩阵
fisher_matrix_1 = generate_positive_definite_matrix(9, random_seed=1)
fisher_matrix_2 = generate_positive_definite_matrix(9, random_seed=2)
fisher_matrix_3 = generate_positive_definite_matrix(9, random_seed=3)

fisher_matrix_1, fisher_matrix_2, fisher_matrix_3

(array([[10.12577294,  1.21069882,  1.11011547,  0.78228318,  1.69442443,
          1.25410396,  0.87661814,  1.04821575,  1.59912196],
        [ 1.21069882, 11.68501862,  2.20794667,  1.9325482 ,  2.34956545,
          1.68875682,  1.87663063,  2.07209697,  2.84363901],
        [ 1.11011547,  2.20794667, 12.7908319 ,  2.74572456,  2.79125288,
          1.61761777,  1.86969273,  3.28915848,  2.57211775],
        [ 0.78228318,  1.9325482 ,  2.74572456, 11.76856726,  2.11964372,
          1.12338014,  2.02543162,  2.66332528,  2.41304543],
        [ 1.69442443,  2.34956545,  2.79125288,  2.11964372, 12.98079889,
          1.81494944,  2.03603196,  2.94135971,  2.9529149 ],
        [ 1.25410396,  1.68875682,  1.61761777,  1.12338014,  1.81494944,
         10.78857999,  0.91111985,  1.29037602,  1.98553776],
        [ 0.87661814,  1.87663063,  1.86969273,  2.02543162,  2.03603196,
          0.91111985, 11.09668109,  2.10529155,  2.14486807],
        [ 1.04821575,  2.07209697,  3.28915848, 

In [6]:
# 动态正则化项函数
def get_dynamic_epsilon(matrix):
    cond_number = np.linalg.cond(matrix)
    eigvals = np.linalg.eigvals(matrix)
    eigval_std = np.std(eigvals)
    
    if cond_number > 1e20 or eigval_std > 1e2:
        return 1e-2
    elif cond_number > 1e16 or eigval_std > 1e1:
        return 1e-3
    elif cond_number > 1e12:
        return 1e-4
    else:
        return 1e-6

# 强化特征值裁剪，确保特征值在更严格的范围内
def ensure_positive_definite(matrix, matrix_name=""):
    eigvals, eigvecs = np.linalg.eigh(matrix)
    
    # 打印特征值供调试
    print(f"{matrix_name} Eigenvalues before clipping: {eigvals}")
    
    # 将特征值限制在 [1e-2, 1e3] 之间，避免极端特征值
    eigvals_clipped = np.clip(eigvals, 1e-2, 1e3)
    
    # 打印裁剪后的特征值
    print(f"{matrix_name} Eigenvalues after clipping: {eigvals_clipped}")
    
    # 重构正定矩阵
    matrix_pos_def = eigvecs @ np.diag(eigvals_clipped) @ eigvecs.T
    return matrix_pos_def

# 正则化矩阵
def regularize_matrix(matrix, matrix_name=""):
    epsilon = get_dynamic_epsilon(matrix)
    
    # 打印条件数供调试
    cond_number = np.linalg.cond(matrix)
    if cond_number > 1e12:
        print(f"Matrix condition number is high ({cond_number}), adding regularization with epsilon {epsilon}.")
    
    # 确保矩阵对称性
    matrix_sym = (matrix + matrix.T) / 2
    return ensure_positive_definite(matrix_sym + epsilon * np.eye(matrix.shape[0]), matrix_name)

In [7]:
# 计算协方差矩阵
def compute_covariance(fisher_matrix, matrix_name=""):
    reg_matrix = regularize_matrix(fisher_matrix, matrix_name)
    cov_matrix = np.linalg.pinv(reg_matrix)
    return cov_matrix

cov_matrix_1 = compute_covariance(fisher_matrix_1, "matrix_1")
cov_matrix_2 = compute_covariance(fisher_matrix_2, "matrix_2")
cov_matrix_3 = compute_covariance(fisher_matrix_3, "matrix_3")

# 打印验证
print("Shape of fisher_matrix_1:", fisher_matrix_1.shape)
print("Shape of cov_matrix_1:", cov_matrix_1.shape)

matrix_1 Eigenvalues before clipping: [ 9.01484057  9.03633041  9.15965059  9.33741713  9.38954927 10.03748611
 10.41436269 11.43293203 28.36099243]
matrix_1 Eigenvalues after clipping: [ 9.01484057  9.03633041  9.15965059  9.33741713  9.38954927 10.03748611
 10.41436269 11.43293203 28.36099243]
matrix_2 Eigenvalues before clipping: [ 9.00451524  9.0767807   9.20771987  9.39466887  9.69478936  9.87701605
 10.04183192 10.24125308 27.33613554]
matrix_2 Eigenvalues after clipping: [ 9.00451524  9.0767807   9.20771987  9.39466887  9.69478936  9.87701605
 10.04183192 10.24125308 27.33613554]
matrix_3 Eigenvalues before clipping: [ 9.00513897  9.04038397  9.07541236  9.24475753  9.5273514   9.71937991
  9.86588202 10.50280244 26.38586411]
matrix_3 Eigenvalues after clipping: [ 9.00513897  9.04038397  9.07541236  9.24475753  9.5273514   9.71937991
  9.86588202 10.50280244 26.38586411]
Shape of fisher_matrix_1: (9, 9)
Shape of cov_matrix_1: (9, 9)


In [4]:
# 优化均值计算
mean1 = np.mean(cov_matrix_1, axis=0)
mean2 = np.mean(cov_matrix_2, axis=0)
mean3 = np.mean(cov_matrix_3, axis=0)

# 动态调节 KL 散度中的 Trace 和均值差项
def dynamic_clip(value, threshold=250):
    """动态限制数值的最大值"""
    return min(value, threshold)

def kl_divergence(cov1, mean1, cov2, mean2, matrix_name=""):
    """
    计算两个多元高斯分布之间的KL散度
    """
    # 正定化矩阵
    cov2_regularized = regularize_matrix(cov2, matrix_name)
    
    # 检查特征值
    eigvals_cov2 = np.linalg.eigvals(cov2_regularized)
    if np.any(eigvals_cov2 <= 0):
        print(f"Matrix {matrix_name} has non-positive eigenvalues, skipping KL computation.")
        return np.nan
    
    # 使用标准逆矩阵
    try:
        inv_cov2 = np.linalg.inv(cov2_regularized)
    except np.linalg.LinAlgError:
        print(f"Matrix {matrix_name} inversion failed, skipping KL computation.")
        return np.nan
    
    # 计算 Trace 项
    term1 = np.trace(inv_cov2 @ cov1)
    term1 = dynamic_clip(term1, 250)  # 限制 Trace 项
    print(f"{matrix_name} KL term1 (Trace): {term1}")
    
    # 计算行列式对数差
    sign1, logdet1 = np.linalg.slogdet(cov1)
    sign2, logdet2 = np.linalg.slogdet(cov2_regularized)
    
    if sign1 <= 0 or sign2 <= 0:
        print(f"Matrix {matrix_name} determinant is non-positive.")
        return np.nan
    
    term2 = logdet2 - logdet1
    print(f"{matrix_name} KL term2 (Log determinant difference): {term2}")
    
    # 计算均值差项
    term3 = (mean2 - mean1).T @ inv_cov2 @ (mean2 - mean1)
    term3 = dynamic_clip(term3, 250)  # 限制均值差项
    print(f"{matrix_name} KL term3 (Mean difference): {term3}")
    
    # 计算 KL 散度
    result = 0.5 * (term1 + term2 + term3 - cov1.shape[0])
    print(f"{matrix_name} KL result: {result}")
    
    return max(result, 0)

# 计算并输出每两个矩阵之间的KL散度
kl_12 = kl_divergence(cov_matrix_1, mean1, cov_matrix_2, mean2, "matrix_12")
kl_13 = kl_divergence(cov_matrix_1, mean1, cov_matrix_3, mean3, "matrix_13")
kl_23 = kl_divergence(cov_matrix_2, mean2, cov_matrix_3, mean3, "matrix_23")

print(f"KL Divergence between matrix 1 and matrix 2: {kl_12}")
print(f"KL Divergence between matrix 1 and matrix 3: {kl_13}")
print(f"KL Divergence between matrix 2 and matrix 3: {kl_23}")

matrix_12 Eigenvalues before clipping: [0.03658262 0.0976453  0.09958442 0.10124615 0.10314919 0.10644435
 0.10860552 0.11017222 0.1110564 ]
matrix_12 Eigenvalues after clipping: [0.03658262 0.0976453  0.09958442 0.10124615 0.10314919 0.10644435
 0.10860552 0.11017222 0.1110564 ]
matrix_12 KL term1 (Trace): 8.982331725398039
matrix_12 KL term2 (Log determinant difference): 0.1528670626168207
matrix_12 KL term3 (Mean difference): 0.0004177117270647277
matrix_12 KL result: 0.06780824987096246
matrix_13 Eigenvalues before clipping: [0.03790008 0.09521368 0.10136041 0.10288822 0.10496197 0.10817041
 0.11018883 0.11061577 0.1110487 ]
matrix_13 Eigenvalues after clipping: [0.03790008 0.09521368 0.10136041 0.10288822 0.10496197 0.10817041
 0.11018883 0.11061577 0.1110487 ]
matrix_13 KL term1 (Trace): 8.934448327105088
matrix_13 KL term2 (Log determinant difference): 0.2487237663239199
matrix_13 KL term3 (Mean difference): 0.000627718971181421
matrix_13 KL result: 0.09189990620009425
matrix_23