In [3]:
import time
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1, return_X_y=True)
X, y = mnist

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression without PCA
start_time_no_pca = time.time()
logisticRegr_no_pca = LogisticRegression(solver='lbfgs', max_iter=1000)
logisticRegr_no_pca.fit(X_train_scaled, y_train)
y_pred_no_pca = logisticRegr_no_pca.predict(X_test_scaled)
accuracy_no_pca = accuracy_score(y_test, y_pred_no_pca)
time_no_pca = time.time() - start_time_no_pca

# Apply PCA
pca = PCA(n_components=0.95)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Logistic Regression with PCA
start_time_with_pca = time.time()
logisticRegr_with_pca = LogisticRegression(solver='lbfgs', max_iter=1000)
logisticRegr_with_pca.fit(X_train_pca, y_train)
y_pred_with_pca = logisticRegr_with_pca.predict(X_test_pca)
accuracy_with_pca = accuracy_score(y_test, y_pred_with_pca)
time_with_pca = time.time() - start_time_with_pca

# Output results
print("Without PCA:")
print(f"Accuracy: {accuracy_no_pca:.4f}")
print(f"Time taken: {time_no_pca:.4f} seconds")

print("\nWith PCA:")
print(f"Accuracy: {accuracy_with_pca:.4f}")
print(f"Time taken: {time_with_pca:.4f} seconds")

Without PCA:
Accuracy: 0.9159
Time taken: 6.1767 seconds

With PCA:
Accuracy: 0.9221
Time taken: 4.8411 seconds
