<a href="https://colab.research.google.com/github/Dev-180Memes/pca-project/blob/main/PCA_Experiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.decomposition import PCA, SparsePCA, KernelPCA
from sklearn.preprocessing import StandardScaler
import time
import psutil
import os
import matplotlib.pyplot as plt

In [None]:
def get_memory_usage():
  process = psutil.Process(os.getpid())
  return process.memory_info().rss / 1024 / 1024

In [None]:
def reconstruction_error(original, reconstructed):
  return np.mean(np.square(original - reconstructed))

def sparsity_level(matrix):
  return 1.0 - (np.count_nonzero(matrix) / float(matrix.size))

In [None]:
def visualize_samples(original, reconstructions, method_names, n_samples=5):
    fig, axes = plt.subplots(n_samples, len(method_names) + 1, figsize=(15, 2*n_samples))
    for i in range(n_samples):
        axes[i, 0].imshow(original[i].reshape(28, 28), cmap='gray')
        axes[i, 0].axis('off')
        if i == 0:
            axes[i, 0].set_title('Original')
        for j, (recon, name) in enumerate(zip(reconstructions, method_names)):
            axes[i, j+1].imshow(recon[i].reshape(28, 28), cmap='gray')
            axes[i, j+1].axis('off')
            if i == 0:
                axes[i, j+1].set_title(name)
    plt.tight_layout()
    plt.savefig('mnist_reconstructions.png')
    plt.close()

In [None]:
def visualize_metrics(metrics):
    methods = list(metrics.keys())
    evr = [metrics[m]['evr'] for m in methods]
    recon_error = [metrics[m]['recon_error'] for m in methods]
    sparsity = [metrics[m]['sparsity'] for m in methods]

    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
    ax1.bar(methods, evr, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
    ax1.set_title('Explained Variance Ratio')
    ax1.set_ylim(0, 1)
    ax1.tick_params(axis='x', rotation=45)
    ax2.bar(methods, recon_error, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
    ax2.set_title('Reconstruction Error')
    ax2.tick_params(axis='x', rotation=45)
    ax3.bar(methods, sparsity, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
    ax3.set_title('Sparsity Level')
    ax3.set_ylim(0, 1)
    ax3.tick_params(axis='x', rotation=45)
    plt.tight_layout()
    plt.savefig('pca_metrics_comparison.png')
    plt.close()

In [None]:
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
X = x_train.reshape(-1, 784).astype(np.float32) / 255.0
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
metrics = {
    'Traditional': {'evr': 0, 'recon_error': 0, 'sparsity': 0, 'time': 0, 'memory': 0},
    'Sparse': {'evr': 0, 'recon_error': 0, 'sparsity': 0, 'time': 0, 'memory': 0}
}
reconstructions = []

In [None]:
n_components=50

In [None]:
start_time = time.time()
start_memory = get_memory_usage()
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)
X_reconstructed = pca.inverse_transform(X_pca)

metrics['Traditional']['time'] = time.time() - start_time
metrics['Traditional']['memory'] = get_memory_usage() - start_memory
metrics['Traditional']['evr'] = np.sum(pca.explained_variance_ratio_)
metrics['Traditional']['recon_error'] = reconstruction_error(X_scaled, X_reconstructed)
metrics['Traditional']['sparsity'] = sparsity_level(pca.components_)
reconstructions.append(scaler.inverse_transform(X_reconstructed))

In [None]:
start_time = time.time()
start_memory = get_memory_usage()
sparse_pca = SparsePCA(n_components=n_components, alpha=1.0, ridge_alpha=0.01)
X_sparse = sparse_pca.fit_transform(X_scaled)
X_reconstructed_sparse = np.dot(X_sparse, sparse_pca.components_) + sparse_pca.mean_

total_var = np.var(X_scaled, axis=0).sum()
recon_var = np.var(X_reconstructed_sparse, axis=0).sum()

metrics['Sparse']['time'] = time.time() - start_time
metrics['Sparse']['memory'] = get_memory_usage() - start_memory
metrics['Sparse']['evr'] = recon_var / total_var if total_var > 0 else 0
metrics['Sparse']['recon_error'] = reconstruction_error(X_scaled, X_reconstructed_sparse)
metrics['Sparse']['sparsity'] = sparsity_level(sparse_pca.components_)
reconstructions.append(scaler.inverse_transform(X_reconstructed_sparse))



In [None]:
samples_viz = visualize_samples(X[:5], [r[:5] for r in reconstructions],
                 ['Traditional PCA', 'Sparse PCA'])
metrics_viz = visualize_metrics(metrics)

In [None]:
samples_viz

In [None]:
for method in metrics:
    print(f"\n{method}:")
    print(f"Explained Variance Ratio: {metrics[method]['evr']:.4f}")
    print(f"Reconstruction Error: {metrics[method]['recon_error']:.4f}")
    print(f"Sparsity Level: {metrics[method]['sparsity']:.4f}")
    print(f"Execution Time: {metrics[method]['time']:.2f} seconds")
    print(f"Memory Utilization: {metrics[method]['memory']:.2f} MB")


Traditional:
Explained Variance Ratio: 0.5507
Reconstruction Error: 0.4110
Sparsity Level: 0.0514
Execution Time: 2.14 seconds
Memory Utilization: 232.69 MB

Sparse:
Explained Variance Ratio: 0.5395
Reconstruction Error: 0.4114
Sparsity Level: 0.3049
Execution Time: 1034.23 seconds
Memory Utilization: 229.85 MB
