<a href="https://colab.research.google.com/github/Saadkhalid913/K-Means-Cluster-Model/blob/main/Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from math import sqrt  
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs 
from sklearn.metrics import accuracy_score, confusion_matrix 
from sklearn.preprocessing import MinMaxScaler

class KMeans():

  def __init__(self, X: np.array, K: int):
    self.X = X 
    self.K = K 
    self.centres = np.random.rand(K, X.shape[1])
    self.Normalize()
    self.Classes = None
    self.n_samples = X.shape[0]

  def GetEuclideanDistance(self, v1, v2):
    return sqrt(np.sum((v1-v2)**2))
  
  def Normalize(self):
    M = MinMaxScaler()
    self.X = M.fit_transform(self.X)

  def FindClosestCentre(self, v):
    ClosestCentreDistance = float("inf")
    ClosestCentreIndex = None 

    for i in range(self.K):
      center = self.centres[i]
      distance = self.GetEuclideanDistance(v, center)
      if distance < ClosestCentreDistance:
        ClosestCentreDistance = distance
        ClosestCentreIndex = i
    return ClosestCentreIndex
  
  def MapClosestCentre(self):
    vec = []
    for i in range(self.n_samples):
      index = self.FindClosestCentre(self.X[i])
      vec.append(index)

    self.Classes = np.array(vec).reshape(-1, 1)

  def ChangeCenters(self):
    for i in range(self.K):
      self.centres[i] = np.sum(self.X[np.array(self.Classes == i).reshape(-1)], axis=0) / len(self.X[np.array(self.Classes == i).reshape(-1)])

if __name__ == "__main__":
  k = 4 
  TrainingX, TrainingY = make_blobs(n_samples=500, n_features=10, cluster_std=10, centers=k)

  km = KMeans(TrainingX, k)
  km.MapClosestCentre()

  for i in range(50):
    km.MapClosestCentre()
    km.ChangeCenters()

  data = np.concatenate((km.X , km.Classes), axis=1)

  # plt.scatter(data[ : , [0]], data[ : , [1]], c="r")
  # plt.scatter(km.centres[ : , [0]], km.centres[ : , [1]], c="b")
  # plt.show()


  print(confusion_matrix(TrainingY, km.Classes.reshape(-1)))
  # print(np.concatenate((TrainingY.reshape(-1, 1), km.Classes), axis =1 ))

[[  0 124   0   1]
 [  0   0 125   0]
 [  0   0   0 125]
 [125   0   0   0]]
