In [9]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_blobs

In [12]:
X,y_true = make_blobs(n_samples = 100,centers = 6,cluster_std= 1.0, random_state= 43)

In [13]:
X

array([[ 2.45852157e+00,  1.35834361e+00],
       [ 2.73758913e+00,  7.52937748e-01],
       [-7.30400204e+00,  3.01853581e+00],
       [-7.92713639e+00, -5.76947714e+00],
       [-2.28798468e+00,  6.86487487e+00],
       [ 3.35754660e+00, -1.37271837e+00],
       [-5.76377238e+00,  1.56288813e+00],
       [-5.54125836e+00,  7.28687147e+00],
       [ 3.23100919e+00, -1.83538325e-03],
       [-7.32697503e+00, -4.49667569e+00],
       [-1.89570644e+00,  4.81730264e+00],
       [-9.43459093e+00,  4.71540204e+00],
       [-7.98807486e+00, -4.26305875e+00],
       [-6.48295876e+00,  5.93712360e+00],
       [ 3.94864303e+00,  2.10546399e-01],
       [-7.64533429e+00,  5.42801225e+00],
       [-1.63356975e+00,  6.07764339e+00],
       [-2.62432672e+00,  8.15686940e+00],
       [-3.36028712e+00,  6.99436547e+00],
       [-8.40682102e+00,  4.71801167e+00],
       [-4.62092130e+00,  7.75168931e+00],
       [ 2.08900104e+00,  7.49119707e-01],
       [-8.81294982e+00,  4.01027293e+00],
       [-7.

In [14]:
def initialize_centroid(X,k):
  np.random.seed(42)
  random_incidices = np.random.permutation(X.shape[0])[:k]
  centroids = X[random_incidices]
  return centroids

In [15]:
def assign_clusters(X,centroids):
  clusters = []
  for x in X:
    distances = np.linalg.norm(x - centroids,axis = 1)
    cluster = np.argmin(distances)
    clusters.append(clusters)
  return np.array(clusters)

In [16]:
def update_centroids(X,clusters,k):
  centroids = np.zeros((k, X.shape[1]))
  for i in range(k):
    points = X[clusters == i]
    if len(points) > 0:
      centroids[i] = points.mean(axis = 0)
  return centroids

In [17]:
def kmeans(X,k,max_iters = 50, tol = 1e-4):
  centroids = initialize_centroid(X,k)
  for m in range(max_iters):
    clusters = assign_clusters(X,centroids)
    new_centroids = update_centroids(X,clusters,k)
    if np.all(np.abs(new_centroids - centroids) < tol):
      break
    centroids = new_centroids
  return clusters,centroids

In [None]:
clusters, centroids = kmeans(X,k=3)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize= (10,10))
for i in range(k):
  plt.scatter(X[clusters == i,0],X[clusters == i,1], s = 200 , c= 'green', lable = 'Clusters')
  plt.scatter(centroids[:,0], centroids[:,1],s= 200, c = 'blue',label = 'Centroids')
