In [None]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import MeanShift
from sklearn.metrics import confusion_matrix
import seaborn as sns
import os
os.environ['LOKY_MAX_CPU_COUNT'] = '-1'  # This restricts joblib to use only 1 core (adjust if necessary)


In [None]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype('float32') / 255.0  # Normalize data to [0, 1]
y = mnist.target.astype(int)  # Convert labels to integers

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# Reduce dimensions with PCA (e.g., to 50 components)
pca = PCA(n_components=50)
X_pca = pca.fit_transform(X_scaled)

print(f"Shape after PCA reduction: {X_pca.shape}")


In [None]:
# Initialize Mean Shift Clustering model
mean_shift = MeanShift()

# Fit and predict cluster labels
cluster_labels = mean_shift.fit_predict(X_pca)

# Print unique cluster labels to see the distribution
print(f"Number of clusters: {len(np.unique(cluster_labels))}")


In [None]:
# Create a confusion matrix to compare the true labels with the cluster labels
conf_mat = confusion_matrix(y, cluster_labels)

# Visualize the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='viridis')
plt.title('Confusion Matrix for Mean Shift Clustering on MNIST')
plt.xlabel('Cluster Labels')
plt.ylabel('True Labels')
plt.show()


In [None]:
# Create a confusion matrix to compare the true labels with the cluster labels
conf_mat = confusion_matrix(y, cluster_labels)

# Visualize the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='viridis')
plt.title('Confusion Matrix for Mean Shift Clustering on MNIST')
plt.xlabel('Cluster Labels')
plt.ylabel('True Labels')
plt.show()


In [None]:
# Reduce dataset to 2D using PCA
pca_2d = PCA(n_components=2)
X_2d = pca_2d.fit_transform(X_scaled)

# Plot the clustering results in 2D
plt.figure(figsize=(10, 7))
sns.scatterplot(x=X_2d[:, 0], y=X_2d[:, 1], hue=cluster_labels, palette='tab10', s=10)
plt.title('Mean Shift Clustering on MNIST (2D PCA Projection)')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.show()
