In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# สร้าง DataFrame
data = {
    'Daily Steps': [8000, 6000, 10000, 7500, 9000],
    'Hours of Sleep': [7, 6, 8, 7, 6],
    'Daily Calorie Intake': [2200, 2000, 2500, 2300, 2400]
}

df = pd.DataFrame(data)

# Standardize ข้อมูล
scaler = StandardScaler()
df_standardized = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
mean_values = scaler.mean_
std_dev_values = np.sqrt(scaler.var_)

# แสดงผลลัพธ์
print("Standardized Data:")
print(df_standardized)


Standardized Data:
   Daily Steps  Hours of Sleep  Daily Calorie Intake
0    -0.073721        0.267261             -0.464991
1    -1.548141       -1.069045             -1.627467
2     1.400699        1.603567              1.278724
3    -0.442326        0.267261              0.116248
4     0.663489       -1.069045              0.697486


In [18]:
from sklearn.decomposition import PCA

# ทำ PCA
pca = PCA()
pca.fit(df_standardized)

# คำนวณ eigenvalue
eigenvalues = pca.explained_variance_

# คำนวณ percentage of eigenvalue
percentage_of_eigenvalues = eigenvalues / sum(eigenvalues) * 100

# แสดงผลลัพธ์
print("Eigenvalues:")
print(eigenvalues)
print("\nPercentage of Eigenvalues:")
print(percentage_of_eigenvalues)


Eigenvalues:
[3.06132691 0.6282033  0.06046978]

Percentage of Eigenvalues:
[81.6353844  16.75208811  1.61252749]


In [19]:
# ทำ PCA โดยกำหนด n_components=3
pca_3d = PCA(n_components=3)
pca_result_3d = pca_3d.fit_transform(df_standardized)

# แสดงผลลัพธ์
print("PCA Result (n_components=3):")
df_pca_3d = pd.DataFrame(data=pca_result_3d, columns=['PC1', 'PC2', 'PC3'])
print(df_pca_3d)


PCA Result (n_components=3):
        PC1       PC2       PC3
0  0.192642 -0.430669 -0.265505
1  2.477648 -0.214868 -0.057761
2 -2.445685 -0.427375 -0.067049
3  0.064978 -0.336170  0.404181
4 -0.289583  1.409081 -0.013865


In [20]:
# แสดง Percentage of Variance Explained
percentage_of_variance_explained = pca_3d.explained_variance_ratio_
print("Percentage of Variance Explained (n_components=3):")
print(percentage_of_variance_explained)


Percentage of Variance Explained (n_components=3):
[0.81635384 0.16752088 0.01612527]


In [21]:
# ทำ PCA โดยกำหนด n_components=2
pca_2d = PCA(n_components=2)
pca_result_2d = pca_2d.fit_transform(df_standardized)
print(df_standardized,pca_result_2d)

# แสดงผลลัพธ์
df_pca_2d = pd.DataFrame(data=pca_result_2d, columns=['PC1', 'PC2'])
print(df_pca_2d)


   Daily Steps  Hours of Sleep  Daily Calorie Intake
0    -0.073721        0.267261             -0.464991
1    -1.548141       -1.069045             -1.627467
2     1.400699        1.603567              1.278724
3    -0.442326        0.267261              0.116248
4     0.663489       -1.069045              0.697486 [[ 0.19264239 -0.4306685 ]
 [ 2.47764812 -0.21486839]
 [-2.44568512 -0.42737451]
 [ 0.06497801 -0.33616995]
 [-0.28958339  1.40908134]]
        PC1       PC2
0  0.192642 -0.430669
1  2.477648 -0.214868
2 -2.445685 -0.427375
3  0.064978 -0.336170
4 -0.289583  1.409081


In [22]:
# # สร้าง inverse_transformer จาก PCA object ที่ component=3
# inverse_transformer_3 = pca_3d.inverse_transform(pca_result_3d)
# print(inverse_transformer_3,"\n")

# # Inverse Standardization
# inverse_transformed_data_3 = inverse_transformer_3 * std_dev_values + mean_values

# print(inverse_transformed_data_3)


In [23]:
import numpy as np

# Assuming you have PCA object 'pca_3d' with n_components=3
# Assuming you have mean_values and std_dev_values used during standardization

# Inverse PCA
inverse_transformer_3 = pca_3d.inverse_transform(pca_result_3d)  # Replace 'X_pca' with your PCA result
print("Inverse PCA:\n", inverse_transformer_3, "\n")

# Inverse Standardization
inverse_transformed_data_3 = inverse_transformer_3 * std_dev_values + mean_values
print("Inverse Standardization:\n", inverse_transformed_data_3)


Inverse PCA:
 [[-0.07372098  0.26726124 -0.46499055]
 [-1.54814054 -1.06904497 -1.62746694]
 [ 1.40069858  1.60356745  1.27872403]
 [-0.44232587  0.26726124  0.11624764]
 [ 0.6634888  -1.06904497  0.69748583]] 

Inverse Standardization:
 [[8.0e+03 7.0e+00 2.2e+03]
 [6.0e+03 6.0e+00 2.0e+03]
 [1.0e+04 8.0e+00 2.5e+03]
 [7.5e+03 7.0e+00 2.3e+03]
 [9.0e+03 6.0e+00 2.4e+03]]


In [24]:
# สร้าง inverse_transformer จาก PCA object ที่ component=2
inverse_transformer_2 = pca_2d.inverse_transform(pca_result_2d)
print(inverse_transformer_2,"\n")
# Inverse Standardization
inverse_transformed_data_2 = inverse_transformer_2 * std_dev_values + mean_values
print(inverse_transformed_data_2)


[[-0.2635086   0.27397534 -0.27944083]
 [-1.58942891 -1.06758431 -1.58710052]
 [ 1.35277092  1.60526298  1.32558148]
 [-0.15341101  0.25704033 -0.16621585]
 [ 0.6535776  -1.06869434  0.70717572]] 

[[7.74255954e+03 7.00502437e+00 2.23192318e+03]
 [5.94399372e+03 6.00109305e+00 2.00694490e+03]
 [9.93498775e+03 8.00126882e+00 2.50806166e+03]
 [7.89190319e+03 6.99235137e+00 2.25140314e+03]
 [8.98655579e+03 6.00026239e+00 2.40166711e+03]]


In [25]:
from sklearn.metrics import mean_squared_error

# คำนวณ Mean Square Error (MSE) ระหว่างผลลัพธ์การประมาณกลับข้อมูล และข้อมูลต้นฉบับ
mse = mean_squared_error(df_standardized, inverse_transformed_data_3)  # หรือ inverse_transformed_data_2
print("Mean Square Error (MSE):", mse)

Mean Square Error (MSE): 24224997.09277771
