In [22]:
import numpy as np
import pandas as pd

In [23]:
data = pd.read_csv("./data/T9-12.DAT", sep="\s+", header=None,
            names=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])

In [24]:
data_standard = (data - data.mean()) / data.std()

In [25]:
from sklearn.decomposition import FactorAnalysis

fa2 = FactorAnalysis(n_components=2)
fa3 = FactorAnalysis(n_components=3)
fa2.fit(data_standard)
fa3.fit(data_standard)

In [26]:
np.around(fa2.components_.T, 2)

array([[ 0.95,  0.03],
       [ 0.95, -0.02],
       [ 0.9 ,  0.21],
       [ 0.57,  0.77],
       [ 0.67,  0.28],
       [ 0.57, -0.17],
       [ 0.96, -0.19]])

In [27]:
np.around(fa2.noise_variance_, 2)

array([0.07, 0.07, 0.12, 0.06, 0.45, 0.63, 0.02])

In [28]:
fa2.components_.T

array([[ 0.95301314,  0.02521284],
       [ 0.95437358, -0.01853707],
       [ 0.90339518,  0.21063999],
       [ 0.56982344,  0.77041681],
       [ 0.67219252,  0.27573925],
       [ 0.56913034, -0.17198317],
       [ 0.96167478, -0.1868867 ]])

In [29]:
np.around(fa3.components_.T, 2)

array([[-0.89, -0.28, -0.26],
       [-0.82, -0.15, -0.5 ],
       [-0.93, -0.14, -0.09],
       [-0.83,  0.52,  0.14],
       [-0.72,  0.01, -0.16],
       [-0.59, -0.74,  0.28],
       [-0.77, -0.32, -0.49]])

In [30]:
np.around(fa3.noise_variance_, 2)

array([0.04, 0.03, 0.09, 0.01, 0.44, 0.01, 0.04])

In [31]:
fa2_rotate = FactorAnalysis(n_components=2, rotation='varimax')
fa2_rotate.fit(data_standard)
fa3_rotate = FactorAnalysis(n_components=3, rotation='varimax')
fa3_rotate.fit(data_standard)

In [32]:
np.around(fa2_rotate.components_.T, 2)

array([[0.88, 0.37],
       [0.9 , 0.33],
       [0.77, 0.52],
       [0.25, 0.92],
       [0.53, 0.5 ],
       [0.59, 0.05],
       [0.96, 0.17]])

In [33]:
np.around(fa2_rotate.noise_variance_, 2)

array([0.07, 0.07, 0.12, 0.06, 0.45, 0.63, 0.02])

In [34]:
np.around(fa3_rotate.components_.T, 2)

array([[-0.36, -0.44, -0.78],
       [-0.31, -0.19, -0.9 ],
       [-0.53, -0.44, -0.64],
       [-0.95, -0.03, -0.26],
       [-0.46, -0.21, -0.54],
       [-0.04, -0.94, -0.29],
       [-0.17, -0.3 , -0.91]])

In [35]:
np.around(fa3_rotate.noise_variance_, 2)

array([0.04, 0.03, 0.09, 0.01, 0.44, 0.01, 0.04])

In [37]:
for i in range(7):
    print(" & "
          .join(
        [str(np.around(fa2_rotate.components_.T[i][j], 2)) for j in range(2)] +
        [str(np.around(fa3_rotate.components_.T[i][j], 2)) for j in range(3)]
    ))

0.88 & 0.37 & -0.36 & -0.44 & -0.78
0.9 & 0.33 & -0.31 & -0.19 & -0.9
0.77 & 0.52 & -0.53 & -0.44 & -0.64
0.25 & 0.92 & -0.95 & -0.03 & -0.26
0.53 & 0.5 & -0.46 & -0.21 & -0.54
0.59 & 0.05 & -0.04 & -0.94 & -0.29
0.96 & 0.17 & -0.17 & -0.3 & -0.91


In [38]:
fa2.transform(data_standard)

array([[-0.9739665 , -0.02915019],
       [-1.46826049, -0.24536029],
       [-0.55347387, -0.65121202],
       [ 0.09515628,  0.59973273],
       [ 0.18535585, -0.43821143],
       [-0.76408395,  0.20445631],
       [-0.58140362, -0.33600054],
       [ 2.07302098,  0.60121101],
       [ 0.18784636, -0.3256912 ],
       [ 0.95297479,  0.0659501 ],
       [ 0.32648043,  0.10576176],
       [ 0.08899151, -0.38373239],
       [ 0.6353074 ,  1.03246205],
       [ 0.10416132, -1.14735972],
       [ 0.26580757,  0.11025924],
       [-1.60657592, -0.36206716],
       [ 0.13348251, -0.23107591],
       [ 0.44338555, -0.39268966],
       [-0.24961167, -1.55683002],
       [ 0.08339221,  1.73650279],
       [-1.36258431,  0.5037334 ],
       [ 0.6322209 , -2.37429702],
       [-1.41766083,  0.15171578],
       [ 0.71124037, -0.22979079],
       [ 1.15837543,  0.76066019],
       [-0.68332041,  0.05827651],
       [ 1.00655304,  0.18827026],
       [ 1.40430854, -1.53882066],
       [-1.20827041,

In [48]:
print(" & "
      .join(
    [str(np.around((fa2.components_ ** 2).sum(axis=0), 2)[i]) for i in range(7)]
))

0.91 & 0.91 & 0.86 & 0.92 & 0.53 & 0.35 & 0.96


In [71]:
sigma2 = fa2.components_.T @ fa2.components_ + fa2.noise_variance_.T

In [72]:
for i in range(7):
    print(
        " & ".join(
            [str(np.around(sigma2[i][j], 2)) for j in range(7)]
        )
    )

0.98 & 0.98 & 0.99 & 0.63 & 1.1 & 1.16 & 0.93
0.98 & 0.98 & 0.98 & 0.59 & 1.09 & 1.17 & 0.94
0.94 & 0.93 & 0.98 & 0.74 & 1.12 & 1.1 & 0.85
0.63 & 0.6 & 0.8 & 0.98 & 1.05 & 0.82 & 0.42
0.72 & 0.71 & 0.78 & 0.66 & 0.98 & 0.96 & 0.62
0.61 & 0.62 & 0.6 & 0.25 & 0.79 & 0.98 & 0.6
0.98 & 0.99 & 0.95 & 0.47 & 1.05 & 1.21 & 0.98


In [57]:
print(" & "
      .join(
    [str(np.around((fa3.components_ ** 2).sum(axis=0), 2)[i]) for i in range(7)]
))

0.94 & 0.95 & 0.89 & 0.97 & 0.54 & 0.97 & 0.94


In [59]:
print(" & "
      .join(
    [str(np.around(fa3.noise_variance_, 2)[i]) for i in range(7)]
))

0.04 & 0.03 & 0.09 & 0.01 & 0.44 & 0.01 & 0.04


In [55]:
sigma3 = fa3.components_.T @ fa3.components_ + fa3.noise_variance_

In [56]:
for i in range(7):
    print(
        " & ".join(
            [str(np.around(sigma3[i][j], 2)) for j in range(7)]
        )
    )

0.98 & 0.94 & 0.98 & 0.57 & 1.12 & 0.67 & 0.94
0.94 & 0.98 & 0.92 & 0.54 & 1.1 & 0.47 & 0.97
0.93 & 0.86 & 0.98 & 0.69 & 1.12 & 0.64 & 0.85
0.6 & 0.56 & 0.77 & 0.98 & 1.01 & 0.16 & 0.44
0.72 & 0.7 & 0.77 & 0.59 & 0.98 & 0.39 & 0.67
0.7 & 0.49 & 0.71 & 0.15 & 0.81 & 0.98 & 0.59
0.94 & 0.96 & 0.9 & 0.41 & 1.07 & 0.57 & 0.98


In [61]:
data_standard.corr().values

array([[1.        , 0.92607578, 0.88400227, 0.5720363 , 0.70807381,
        0.67440731, 0.92731157],
       [0.92607578, 1.        , 0.84252323, 0.54150803, 0.74590974,
        0.46538802, 0.94429598],
       [0.88400227, 0.84252323, 1.        , 0.70036303, 0.63747116,
        0.64108861, 0.85256816],
       [0.5720363 , 0.54150803, 0.70036303, 1.        , 0.59073605,
        0.14690742, 0.41263945],
       [0.70807381, 0.74590974, 0.63747116, 0.59073605, 1.        ,
        0.38595021, 0.57455327],
       [0.67440731, 0.46538802, 0.64108861, 0.14690742, 0.38595021,
        1.        , 0.56637215],
       [0.92731157, 0.94429598, 0.85256816, 0.41263945, 0.57455327,
        0.56637215, 1.        ]])

In [62]:
for i in range(7):
    print(
        " & ".join(
            [str(np.around(data_standard.corr().values[i][j], 2)) for j in range(7)]
        )
    )

1.0 & 0.93 & 0.88 & 0.57 & 0.71 & 0.67 & 0.93
0.93 & 1.0 & 0.84 & 0.54 & 0.75 & 0.47 & 0.94
0.88 & 0.84 & 1.0 & 0.7 & 0.64 & 0.64 & 0.85
0.57 & 0.54 & 0.7 & 1.0 & 0.59 & 0.15 & 0.41
0.71 & 0.75 & 0.64 & 0.59 & 1.0 & 0.39 & 0.57
0.67 & 0.47 & 0.64 & 0.15 & 0.39 & 1.0 & 0.57
0.93 & 0.94 & 0.85 & 0.41 & 0.57 & 0.57 & 1.0


In [63]:
u = (2/49-1/98) * ((2*7*7+3*7-1)/(6*8))

In [65]:
s1 = data_standard.cov().values

In [74]:
m = 98 * np.log(np.linalg.det(1/2 * (sigma2 + s1))) - 49 * np.log(np.linalg.det(s1)) - 49 * np.log(np.abs(np.linalg.det(sigma2)))

In [76]:
chi = (1-u) * 49 * m
print(chi)

297098.0691864721


In [77]:
m = 98 * np.log(np.linalg.det(1/2 * (sigma3 + s1))) - 49 * np.log(np.linalg.det(s1)) - 49 * np.log(np.abs(np.linalg.det(sigma3)))

In [78]:
chi = (1-u) * 49 * m
print(chi)

212435.7030203306
