
# Step-by-Step K-Means Clustering Visualization

This notebook demonstrates the K-Means clustering process through a sequence of animated or static plots.

1. Initial Random Centroids
2. Data Points Assigned to Nearest Centroid (color-coded)
3. Centroids Updated to New Mean Position
4. Repeat Assignments & Updates Until Convergence


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs

In [None]:
# Generate synthetic data
X, _ = make_blobs(n_samples=300, centers=4, cluster_std=1.5, random_state=40)
np.random.seed(40)

In [None]:
# Step 1: Initial random centroids
k = 4
initial_centroids = X[np.random.choice(X.shape[0], k, replace=False)]

plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], s=30, color='gray', alpha=0.5)
plt.scatter(initial_centroids[:, 0], initial_centroids[:, 1], c='red', marker='X', s=200, label='Initial Centroids')
plt.title("Step 1: Randomly Initialized Centroids")
plt.legend()
plt.show()

In [None]:
# Step 2: Assign points to nearest centroid
from scipy.spatial.distance import cdist

def assign_clusters(X, centroids):
    distances = cdist(X, centroids)
    return np.argmin(distances, axis=1)

labels = assign_clusters(X, initial_centroids)

plt.figure(figsize=(8, 6))
for i in range(k):
    plt.scatter(X[labels == i, 0], X[labels == i, 1], s=30, label=f'Cluster {i+1}')
plt.scatter(initial_centroids[:, 0], initial_centroids[:, 1], c='black', marker='X', s=200, label='Centroids')
plt.title("Step 2: Points Assigned to Nearest Centroid")
plt.legend()
plt.show()

In [None]:
# Step 3: Update centroids based on mean of points
new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])

plt.figure(figsize=(8, 6))
for i in range(k):
    plt.scatter(X[labels == i, 0], X[labels == i, 1], s=30, label=f'Cluster {i+1}')
plt.scatter(initial_centroids[:, 0], initial_centroids[:, 1], c='black', marker='X', s=200, label='Old Centroids')
plt.scatter(new_centroids[:, 0], new_centroids[:, 1], c='red', marker='X', s=200, label='Updated Centroids')
plt.title("Step 3: Centroids Updated to Cluster Means")
plt.legend()
plt.show()

In [None]:
# Step 4: Repeat until convergence (simplified loop)
centroids = initial_centroids.copy()
for _ in range(10):  # run a few iterations manually
    labels = assign_clusters(X, centroids)
    centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])

plt.figure(figsize=(8, 6))
for i in range(k):
    plt.scatter(X[labels == i, 0], X[labels == i, 1], s=30, label=f'Cluster {i+1}')
plt.scatter(centroids[:, 0], centroids[:, 1], c='red', marker='X', s=200, label='Final Centroids')
plt.title("Step 4: Final Clusters After Convergence")
plt.legend()
plt.show()