In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml


In [None]:
# Load the MNIST dataset
print("Loading the MNIST dataset...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target
print("MNIST dataset loaded successfully.")


In [None]:
# Scale the data
print("Scaling the dataset...")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# Apply PCA
print("Applying PCA...")
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
print(f"Explained variance ratio: {pca.explained_variance_ratio_}")


In [None]:
# Convert to DataFrame for visualization
pca_df = pd.DataFrame(data=X_pca, columns=['PC1', 'PC2'])
pca_df['Label'] = y


In [None]:
# Visualize the PCA results
print("Visualizing the results...")
plt.figure(figsize=(10, 7))
sns.scatterplot(data=pca_df, x='PC1', y='PC2', hue='Label', palette='tab10', s=10, alpha=0.7)
plt.title('PCA on MNIST Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title='Digit', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
