In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs

In [None]:
# Distance Function
def euclidean_distance(a, b):
    return np.linalg.norm(a - b)

In [None]:
# Getting Centroids
def initialize_centroids(X, k):
    indices = np.random.choice(len(X), k, replace=False)
    return X[indices]

In [None]:
# Creating Clusters
def assign_clusters(X, centroids):
    labels = []
    for point in X:
        distances = [euclidean_distance(point, c) for c in centroids]
        labels.append(np.argmin(distances))
    return np.array(labels)

In [None]:
# Updating Centroids
def update_centroids(X, labels, k):
    centroids = []
    for i in range(k):
        cluster_points = X[labels == i]
        if len(cluster_points) == 0:  # avoid empty cluster
            centroids.append(X[np.random.randint(len(X))])
        else:
            centroids.append(cluster_points.mean(axis=0))
    return np.array(centroids)

In [None]:
# K Means 
def kmeans(X, k, max_iters=100, tol=1e-4):
    centroids = initialize_centroids(X, k)
    for _ in range(max_iters):
        old_centroids = centroids
        labels = assign_clusters(X, centroids)
        centroids = update_centroids(X, labels, k)
        if np.all(np.linalg.norm(centroids - old_centroids, axis=1) < tol):
            break
    return centroids, labels

In [None]:
# Creating Synthetic Data
X, _ = make_blobs(n_samples=300, centers=4, cluster_std=0.6, random_state=42)

In [None]:
# Testing out K Means Implementation
centroids, labels = kmeans(X, k=4)
print(f"These are the centroids for each of the clusters: \n{centroids}")      

In [None]:
# Visualization of Results
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
plt.scatter(centroids[:, 0], centroids[:, 1], marker='X', c='red', s=200)
plt.title("K-Means from Scratch")
plt.show()

In [None]:
from sklearn.datasets import make_moons

X, _ = make_moons(n_samples=300, noise=0.1)
centroids, labels = kmeans(X, k=2)

plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='coolwarm')
plt.title("K-Means on Two Moons (Fails)")
plt.show()

In [None]:
def compute_inertia(X, centroids, labels):
    return sum(np.sum((X[labels == i] - c)**2) for i, c in enumerate(centroids))

inertias = []
for k in range(1, 10):
    centroids, labels = kmeans(X, k)
    inertias.append(compute_inertia(X, centroids, labels))

plt.plot(range(1, 10), inertias, marker='o')
plt.title("Elbow Method to Choose K")
plt.xlabel("Number of Clusters (K)")
plt.ylabel("Inertia")
plt.show()
