In [1]:
import numpy as np
import time
from sklearn.decomposition import PCA

# Load C++ generated data
cpp_data = np.loadtxt('cpp_pca_data.txt')
cpp_cov = np.loadtxt('cpp_pca_cov.txt')
cpp_time = float(open('cpp_pca_time.txt').read())

# Time sklearn PCA
start = time.time()
pca = PCA()
pca.fit(cpp_data)  # Using same input data
py_time = (time.time() - start) * 1000  # Convert to ms

# Compare results
print(f"\nPerformance Comparison:")
print(f"C++ Time: {cpp_time:.2f} ms")
print(f"Python Time: {py_time:.2f} ms")
print(f"Speedup: {py_time/cpp_time:.1f}x")

# Verify correctness
print("\nTop 5 Eigenvalues Comparison:")
print("C++ Eigenvalues:", np.loadtxt('parallel_pca_eigenvalues.txt')[:5])
print("Python Eigenvalues:", pca.explained_variance_[:5])

# Covariance matrix comparison
print("\nCovariance Matrix Difference (Frobenius norm):")
print(np.linalg.norm(cpp_cov - np.cov(cpp_data.T, bias=False)))


Performance Comparison:
C++ Time: 67412.00 ms
Python Time: 48.76 ms
Speedup: 0.0x

Top 5 Eigenvalues Comparison:
C++ Eigenvalues: [1.71938 1.68082 1.64598 1.60192 1.59035]
Python Eigenvalues: [1.7193807  1.68082032 1.64598227 1.60192144 1.59034892]

Covariance Matrix Difference (Frobenius norm):
2.2972332255409115e-05
