<a href="https://colab.research.google.com/github/B34R-e/Basic-Machine-Learning/blob/main/Implementation_of_K_means_Clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [107]:
import random
import numpy as np
from numpy.linalg import norm
# Initialize k centroids as random points in the data space
def initialize_centroids(X, k):
  indices = random.sample(range(len(X)), k)
  return [X[i] for i in indices]

In [108]:
# Compute the Euclidean distance between each data observation and each centroid
def euclidean_distance(p1, p2):
  # return math.sqrt(sum(x1 - x2)**2 for x1, x2 in zip(p1, p2))
  return norm(np.array(p1) - np.array(p2))

In [109]:
# Assign each data observation to the closest centroid
def assign_points_to_clusters(X, centroids):
  clusters = []
  for point in X:
    distances = [euclidean_distance(point, centroid) for centroid in centroids]
    cluster = distances.index(min(distances))
    clusters.append(cluster)
  return clusters

In [110]:
# update each centroid by taking average of all data observations assigned to it
def calculate_mean(points):
  num_points = len(points)
  if num_points == 0:
    return []
  dimension = len(points[0])
  mean = [0] * dimension
  for point in points:
    for i in range(dimension):
      mean[i] += point[i]
  return [coor / num_points for coor in mean]

def update_centroids(X, clusters, k):
  centroids = [[] for _ in range (k)]
  for i, cluster in enumerate(clusters):
    centroids[cluster].append(X[i])
  return [calculate_mean(cluster_points) for cluster_points in centroids]

In [111]:
import random
import math
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

def has_converged(old_centroids, new_centroids, epsilon = 6):
  return all(euclidean_distance(old_centroids[i], new_centroids[i]) < epsilon for i in range(len(old_centroids)))

def kmeans(X, k, max_iters = 100):
  centroids = initialize_centroids(X, k)

  for _ in range(max_iters):
    # Assign data points to clusters
    clusters = assign_points_to_clusters(X, centroids)

    # Update centroids
    new_centroids = update_centroids(X, clusters, k)

    # Check convergence
    if has_converged(centroids, new_centroids):
      break

    centroids = new_centroids

  return (clusters, centroids)

In [112]:
# Example usage:

X , _ = make_blobs(n_samples =200 ,centers = 3 ,random_state = 42)
(clusters, centroids) = kmeans(X, k = 3)