# only centralized

In [7]:
import numpy as np
from sklearn.decomposition import PCA

# 原始两列数据
x = np.array([8.895, 8.960, 8.630, 9.500, 8.370])
y = np.array([40, 40, 110, 100, 505])

# 合并数据为矩阵
data = np.vstack((x, y)).T

# 1. 数据中心化 (减去每列的均值)
data_mean = np.mean(data, axis=0)
data_centered = data - data_mean

# 2. 计算中心化后数据的协方差矩阵
cov_matrix_centered = np.cov(data_centered, rowvar=False)
print("中心化后的协方差矩阵：\n", cov_matrix_centered)

# 3. 计算PCA
pca = PCA()
pca.fit(data_centered)

# 4. 获取PCA向量和方差
pca_components = pca.components_
pca_variance = pca.explained_variance_

print("PCA向量：\n", pca_components)
print("PCA方差：\n", pca_variance)

# 5. 计算原数据在PCA空间中的投影坐标
pca_projection = pca.transform(data_centered)
print("投影后的坐标：\n", pca_projection)


中心化后的协方差矩阵：
 [[ 1.783050e-01 -5.302375e+01]
 [-5.302375e+01  3.848000e+04]]
PCA向量：
 [[-0.00137796  0.99999905]
 [ 0.99999905  0.00137796]]
PCA方差：
 [3.84800731e+04 1.05240401e-01]
投影后的坐标：
 [[-1.18999920e+02 -1.39977089e-01]
 [-1.19000010e+02 -7.49771509e-02]
 [-4.89996214e+01 -3.08519740e-01]
 [-5.90008107e+01  5.47699849e-01]
 [ 3.46000362e+02 -2.42258690e-02]]


# Standardized 

In [14]:
import numpy as np
from sklearn.decomposition import PCA

# 原始两列数据
x = np.array([8.895, 8.960, 8.630, 9.500, 8.370])
y = np.array([40, 40, 110, 100, 505])

# 合并数据为矩阵
data = np.vstack((x, y)).T

# 1. 数据标准化 (减去均值并除以标准差)
data_mean = np.mean(data, axis=0)
data_std = np.std(data, axis=0)
data_standardized = (data - data_mean) / data_std

# 2. 计算标准化后数据的协方差矩阵
cov_matrix_standardized = np.cov(data_standardized, rowvar=False)
print("标准化后数据的协方差矩阵：\n", cov_matrix_standardized/1.25)

# 3. 计算PCA
pca = PCA()
pca.fit(data_standardized)

# 4. 获取PCA向量和方差
pca_components = pca.components_
pca_variance = pca.explained_variance_

print("PCA向量：\n", pca_components)
print("PCA方差：\n", pca_variance)

# 5. 计算原数据在PCA空间中的投影坐标
pca_projection = pca.transform(data_standardized)
print("投影后的坐标：\n", pca_projection)


标准化后数据的协方差矩阵：
 [[ 1.         -0.64013437]
 [-0.64013437  1.        ]]
PCA向量：
 [[-0.70710678  0.70710678]
 [ 0.70710678  0.70710678]]
PCA方差：
 [2.05016796 0.44983204]
投影后的坐标：
 [[-0.52452267 -0.43465583]
 [-0.64621735 -0.31296114]
 [ 0.25372851 -0.64868437]
 [-1.41540963  0.93985054]
 [ 2.33242114  0.45645079]]


# PCA 默认中心化，这里我们手动实现

# Nor then Cent

In [21]:
import numpy as np
from sklearn.decomposition import PCA

# 原始两列数据
x = np.array([8.895, 8.960, 8.630, 9.500, 8.370])
y = np.array([40, 40, 110, 100, 505])

# 合并数据为矩阵
data = np.vstack((x, y)).T

# 1. 数据归一化 (缩放到 [0, 1])
data_min = np.min(data, axis=0)
data_max = np.max(data, axis=0)
data_normalized = (data - data_min) / (data_max - data_min)

# 2. 数据中心化 (减去归一化后数据的均值)
data_centered = data_normalized - np.mean(data_normalized, axis=0)

# 3. 计算中心化后数据的协方差矩阵
cov_matrix_centered = np.cov(data_centered, rowvar=False)
print("归一化并中心化后的协方差矩阵：\n", cov_matrix_centered)

# 4. 计算PCA
pca = PCA()
pca.fit(data_centered)

# 5. 获取PCA向量和方差
pca_components = pca.components_
pca_variance = pca.explained_variance_

print("PCA向量：\n", pca_components)
print("PCA方差：\n", pca_variance)

# 6. 计算原数据在PCA空间中的投影坐标
pca_projection = pca.transform(data_centered)
print("投影后的坐标：\n", pca_projection)


归一化并中心化后的协方差矩阵：
 [[ 0.13963897 -0.10091112]
 [-0.10091112  0.17796277]]
PCA向量：
 [[-0.63774788  0.77024518]
 [ 0.77024518  0.63774788]]
PCA方差：
 [0.2615152  0.05608654]
投影后的坐标：
 [[-0.2106616  -0.14684941]
 [-0.24734621 -0.10254327]
 [ 0.05484963 -0.23147707]
 [-0.45272421  0.34782855]
 [ 0.85588238  0.1330412 ]]


# Cent then Scal

In [22]:
import numpy as np
from sklearn.decomposition import PCA

# 原始两列数据
x = np.array([8.895, 8.960, 8.630, 9.500, 8.370])
y = np.array([40, 40, 110, 100, 505])

# 合并数据为矩阵
data = np.vstack((x, y)).T

# 1. 数据中心化 (减去每列的均值)
data_mean = np.mean(data, axis=0)
data_centered = data - data_mean

# 2. 数据scale
data_min = np.min(data_centered, axis=0)
data_max = np.max(data_centered, axis=0)
data_normalized = data_centered  / data_max 

# 3. 计算归一化后数据的协方差矩阵
cov_matrix_normalized = np.cov(data_normalized, rowvar=False)
print("中心化并归一化后的协方差矩阵：\n", cov_matrix_normalized)

# 4. 计算PCA
pca = PCA()
pca.fit(data_normalized)

# 5. 获取PCA向量和方差
pca_components = pca.components_
pca_variance = pca.explained_variance_

print("PCA向量：\n", pca_components)
print("PCA方差：\n", pca_variance)

# 6. 计算原数据在PCA空间中的投影坐标
pca_projection = pca.transform(data_normalized)
print("投影后的坐标：\n", pca_projection)


中心化并归一化后的协方差矩阵：
 [[ 0.45067372 -0.24363725]
 [-0.24363725  0.32142738]]
PCA向量：
 [[-0.79258376  0.60976305]
 [ 0.60976305  0.79258376]]
PCA方差：
 [0.63811257 0.13398852]
投影后的坐标：
 [[-0.23995786 -0.24932784]
 [-0.32186239 -0.18631576]
 [ 0.21732304 -0.34587393]
 [-0.8965607   0.47461148]
 [ 1.24105791  0.30690604]]
