In [5]:
import numpy as np
from sklearn.decomposition import PCA, IncrementalPCA

# Synthetic data
np.random.seed(42)
X = np.random.randn(1000, 50)

# Standard PCA
pca = PCA(n_components=5)
pca.fit(X)

# Incremental PCA with many small batches
ipca = IncrementalPCA(n_components=5)
batch_size = 5
for i in range(0, X.shape[0], batch_size):
    ipca.partial_fit(X[i:i+batch_size])

# Compare explained variance
print("Explained variance ratio (PCA):  ", np.round(pca.explained_variance_ratio_, 6))
print("Explained variance ratio (IPCA): ", np.round(ipca.explained_variance_ratio_, 6))

# Compare components
diff = np.abs(pca.components_ - ipca.components_).max()
print(f"\nMaximum absolute difference between components: {diff:.2e}")

# Compare reconstruction error
X_pca = pca.inverse_transform(pca.transform(X))
X_ipca = ipca.inverse_transform(ipca.transform(X))
reconstruction_diff = np.mean((X_pca - X_ipca)**2)
print(f"Mean squared reconstruction difference: {reconstruction_diff:.2e}")



Explained variance ratio (PCA):   [0.02969  0.028286 0.027658 0.027259 0.026527]
Explained variance ratio (IPCA):  [0.025853 0.024345 0.023182 0.022021 0.021579]

Maximum absolute difference between components: 5.50e-01
Mean squared reconstruction difference: 1.79e-01
