In [11]:
import pandas as pd
import numpy as np
from sklearn.cross_decomposition import CCA
from scipy.stats import chi2

# 读取数据
data = pd.read_csv("table7_1.txt", sep="\t")

# 提取 (y1, y2) 和 (x1, x2, ..., x8)
Y = data[["y1", "y2"]]
X = data[["x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8"]]

# (a) 计算典型相关系数
cca = CCA(n_components=2)
cca.fit(X, Y)
X_c, Y_c = cca.transform(X, Y)

# 输出典型相关系数
canonical_correlations = [np.corrcoef(X_c[:, i], Y_c[:, i])[0, 1] for i in range(2)]
print("Canonical Correlations:", canonical_correlations)

# (b) 计算典型变量的标准化系数
x_weights = cca.x_weights_
y_weights = cca.y_weights_
print("\nStandardized Coefficients for X:")
print(x_weights)
print("\nStandardized Coefficients for Y:")
print(y_weights)

# (c) 测试每个典型相关系数的显著性
n, p, q = X.shape[0], X.shape[1], Y.shape[1]  # 样本数、X维度、Y维度
min_dim = min(p, q)
wilks_lambdas = np.cumprod([1 - corr**2 for corr in canonical_correlations])

print("\nSignificance Tests:")
for i, wilks_lambda in enumerate(wilks_lambdas):
    df = (p - i) * (q - i)
    chi_square = -(n - 1 - (p + q + 1) / 2) * np.log(wilks_lambda)
    p_value = 1 - chi2.cdf(chi_square, df)
    print(f"Canonical Correlation {i + 1}: Chi-Square = {chi_square:.4f}, p-value = {p_value:.4f}")


Canonical Correlations: [0.6207894352038877, 0.4946498167125937]

Standardized Coefficients for X:
[[ 0.67549159 -0.18697397]
 [ 0.1364265  -0.04131951]
 [-0.02873823  0.78138689]
 [ 0.4068809   0.22912107]
 [ 0.5540905   0.03012205]
 [-0.07873741  0.40707732]
 [-0.16275543 -0.2282434 ]
 [-0.13789773 -0.28558215]]

Standardized Coefficients for Y:
[[ 0.7967089   0.60436324]
 [-0.60436324  0.7967089 ]]

Significance Tests:
Canonical Correlation 1: Chi-Square = 11.4386, p-value = 0.7816
Canonical Correlation 2: Chi-Square = 18.0330, p-value = 0.0118
