In [13]:
from sklearn.datasets import make_blobs, make_classification
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [32]:
X,y = make_blobs(n_samples=100, n_features=3, centers=None, cluster_std=1.0, center_box=(-8.0, 8.0), shuffle=True, random_state=None, return_centers=False)

In [34]:
fig = px.scatter_3d(x = X[:,0], y = X[:,1], z = X[:,2],title='3d Overview of data distribution')
fig.show()

In [96]:
class K_Means:

  def __init__(self,n_clusters = 2 ,max_iter = 100):
    self.n_clusters = n_clusters   # define number of clusters
    self.max_iter = max_iter  # set limit for iterations

  def fit(self,X):
    # randomly select centroids
    centroids = X[np.random.choice(X.shape[0],self.n_clusters,replace=False)]

    # make clusters through centroids
    clusters = self._get_clusters(X,centroids)

    # iterate loop for moving the centroids
    for i in range(self.max_iter):
        # get new centroids
        new_centroids = self._move_centroids(X,clusters)
        # check wetheer old centroids are equal to new centroids
        # if both centroids are equal then break the loop
        if (new_centroids == centroids).all() : break
        # make old centroid to new centroid
        centroids = new_centroids
        # find clusters for new centroids
        clusters = self._get_clusters(X,centroids)
    return clusters

  def _move_centroids(self,X,clusters): # move centroids position
    # create a empty list to store new centroids values
    new_centroids = []
    # interate loop, how much types centroids, we have to create
    for cluster in np.unique(clusters):
      # get all values of X for cluster
      _X = X[y==cluster]
      # create a list to store centroid values of _X
      centroid = []

      for idx in range(len(_X[0])):
        # calculatte mean of each axis and append in centroid
        centroid.append(np.mean(_X[:,idx]))

      new_centroids.append(centroid)
    return np.array(new_centroids)

  def _get_clusters(self,X,centroids):
    # create two list to store clusters and distances
    clusters = []
    distance = []
    # iterate through each row of x
    for row in X :
      for centroid in centroids:
        # calculate euclidian distance for each point to each cluster
        distance.append(np.sqrt(np.dot(row - centroid,row - centroid)))
      # get minimum distance and find nearest centroid index value
      min_distance = min(distance)
      idx = distance.index(min_distance)
      clusters.append(idx)
      distance.clear()
    return np.array(clusters)


In [99]:
Model = K_Means(n_clusters=3)
pred_y = Model.fit(X)

In [101]:
fig = px.scatter_3d(x = X[:,0], y = X[:,1], z = X[:,2],title='3d Overview of K Means Clustering',color=pred_y)
fig.show()