# K-Means Algorithm from scratch

#### 1) Decide no of clusters
#### 2) Select random centroids
#### 3) Assign clusters
#### 4) Move centroids
#### 5) Check if done

In [3]:
import random
import numpy

We use the matrix form for distances as it generalizes well to higher dimensions

In [5]:
import numpy as np
class KMeans:

    def __init__(self, n_clusters=2, max_iter=100):
        self.n_clusters=n_clusters
        self.max_iter = max_iter
        self.centroids = None

    def fit_predict(self, X):
        # select random points as centroids
        random_index=random.sample(range(0, X.shape[0]),self.n_clusters)
        self.centroids=X[random_index]

        for i in range(self.max_iter):
            cluster_group=self.assign_clusters(X)
            old_centroids = self.centroids
            # move centroids
            self.centroids = self.move_centroids(X,cluster_group)
            # check finish
            if (old_centroids == self.centroids).all():
                break

        return cluster_group

    def assign_clusters(self, X):
        cluster_group=[]
        distances=[]

        # use matrix form
        for row in X:
            for centroid in self.centroids:
                distances.append(np.sqrt(np.dot(row-centroid,row-centroid)))
            min_distance=min(distances)
            index_pos=distances.index(min_distance)
            cluster_group.append(index_pos)
            distances.clear()

        return np.array(cluster_group)
    

    def move_centroids(self, X, cluster_group):
        new_centroids=[]
        cluster_type=np.unique(cluster_group)

        for type in cluster_type:
            new_centroids.append(X[cluster_group == type].mean(axis=0))

        return np.array(new_centroids)