In [10]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Step 1: Generate a synthetic dataset with 10 variables
np.random.seed(0)  # For reproducibility
n_samples = 100

# Creating some correlated variables with varying standard deviations
data = np.column_stack([
    np.random.normal(0, 1, n_samples),   # Variable 1
    np.random.normal(0, 2, n_samples),   # Variable 2
    np.random.normal(0, 1.5, n_samples), # Variable 3
    np.random.normal(0, 1, n_samples) * 2 + np.random.normal(0, 1, n_samples) * 0.5,  # Variable 4
    np.random.normal(0, 2, n_samples),   # Variable 5
    np.random.normal(0, 1.2, n_samples), # Variable 6
    np.random.normal(0, 1.8, n_samples), # Variable 7
    np.random.normal(0, 1.5, n_samples), # Variable 8
    np.random.normal(0, 1, n_samples),   # Variable 9
    np.random.normal(0, 2.2, n_samples)  # Variable 10
])

# Step 2: Standardize the data
scaler = StandardScaler()
data_normalized = scaler.fit_transform(data)

# Step 3: Perform PCA
pca = PCA(n_components=data.shape[1])
pca.fit(data_normalized)

# Eigenvalues
eigenvalues = pca.explained_variance_

# Step 4: Apply Kaiser's Criterion
factors_to_retain = sum(eigenvalues > 1)

# Step 5: Display a table of eigenvalues
eigenvalue_table = pd.DataFrame({
    'Factor': range(1, len(eigenvalues) + 1),
    'Eigenvalue': eigenvalues
})
print("Table of Eigenvalues:\n")
print(eigenvalue_table)

# Step 6: Print the number of factors to retain
print(f"\nNumber of factors to retain based on Kaiser's Criterion: {factors_to_retain}")

Table of Eigenvalues:

   Factor  Eigenvalue
0       1    1.666635
1       2    1.342876
2       3    1.161448
3       4    1.107718
4       5    1.027012
5       6    0.959039
6       7    0.905032
7       8    0.809215
8       9    0.620485
9      10    0.501549

Number of factors to retain based on Kaiser's Criterion: 5
