In [1]:
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler

# Load Fashion MNIST dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Flatten images from 28x28 to 784
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a baseline model
clf = RandomForestClassifier(n_estimators=10, random_state=42)
start_time = time.time()
clf.fit(X_train, y_train)
baseline_train_time = time.time() - start_time

# Evaluate baseline model
start_time = time.time()
y_pred = clf.predict(X_test)
baseline_inference_time = time.time() - start_time
baseline_accuracy = accuracy_score(y_test, y_pred)

print(f"Baseline Model - Accuracy: {baseline_accuracy:.4f}, Training Time: {baseline_train_time:.2f}s, Inference Time: {baseline_inference_time:.4f}s")

# Apply PCA
pca = PCA(n_components=0.95)  # Retain 95% variance
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

print(f"Reduced Dimensionality: {X_train_pca.shape[1]}")

# Train model on PCA-transformed data
clf_pca = RandomForestClassifier(n_estimators=10, random_state=42)
start_time = time.time()
clf_pca.fit(X_train_pca, y_train)
pca_train_time = time.time() - start_time

# Evaluate PCA model
start_time = time.time()
y_pred_pca = clf_pca.predict(X_test_pca)
pca_inference_time = time.time() - start_time
pca_accuracy = accuracy_score(y_test, y_pred_pca)

print(f"PCA Model - Accuracy: {pca_accuracy:.4f}, Training Time: {pca_train_time:.2f}s, Inference Time: {pca_inference_time:.4f}s")

# Compare Performance
print("\nPerformance Comparison:")
print(f"Accuracy Drop: {baseline_accuracy - pca_accuracy:.4f}")
print(f"Training Time Reduction: {baseline_train_time - pca_train_time:.2f}s")
print(f"Inference Time Reduction: {baseline_inference_time - pca_inference_time:.4f}s")


Baseline Model - Accuracy: 0.8551, Training Time: 7.38s, Inference Time: 0.0417s
Reduced Dimensionality: 256
PCA Model - Accuracy: 0.8171, Training Time: 13.45s, Inference Time: 0.0243s

Performance Comparison:
Accuracy Drop: 0.0380
Training Time Reduction: -6.07s
Inference Time Reduction: 0.0174s
