#  K-Means Clustering

## Prepare input data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go

K = 5

all_color = ["blue", "green", "red", "yellow", "magenta", "cyan", "black"]
color = all_color[:K]

X, t = make_blobs(n_features=3, centers=K, random_state=3, cluster_std=1)
N = len(X)

## Visualize the data in 3d space

In [None]:
df = pd.DataFrame(X, columns=["x1", "x2", "x3"])
fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black")])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.show()

## Pick random points

In [None]:
# TODO: fill here
Mu = np.zeros((K, 3))
Mu[:,0] = np.random.uniform(low=min(X[:,0]), high=max(X[:,0]), size=(K,))
Mu[:,1] = np.random.uniform(low=min(X[:,1]), high=max(X[:,1]), size=(K,))
Mu[:,2] = np.random.uniform(low=min(X[:,2]), high=max(X[:,2]), size=(K,))

## Visualize random centers

In [None]:
fig.add_trace(go.Scatter3d(x=Mu[:,0], y=Mu[:,1], z=Mu[:,2], marker_size=2, marker_color=color, mode='markers'))
fig.show()

## Distance function

In [None]:
def distance(p1, p2):
  return np.math.pow(p1[0] - p2[0], 2) + np.math.pow(p1[1] - p2[1], 2) + np.math.pow(p1[2] - p2[2], 2)

## Assign function: assign a data point to closest cluster center

In [None]:
def assign(point, Mu, K):
  dist = np.zeros((K, 1))
  for k in range(0,K):
    dist[k] = distance(point, Mu[k])
  
  return np.argmin(dist.ravel())

## Move function: change the cluster centers to the average of their assigned points

In [None]:
def move(data, idx, Mu):
  if (data[idx].size == 0):
    return Mu
  else:
    return np.average(data[idx], axis=0)

## Iteration

In [None]:
# TODO: fill here

Mu_prev = np.zeros((K, 3))
Mu_trace = np.zeros((1, K, 3))
Mu_trace[0] = Mu
C = np.zeros((N,1), dtype=int)
while(not np.array_equal(Mu, Mu_prev)):

  Mu_prev = Mu.copy()
  
  # Step 1
  for n in range(0, N):
    C[n] = assign(X[n], Mu, K)
  
  C = C.ravel()

  # Step 2
  for k in range(0, K):
    Mu[k] = move(X, C==k, Mu[k]) 

  Mu_trace = np.append(Mu_trace, Mu.reshape((1,K,3)), axis=0)

## Visualize final assignment and centers

In [None]:
color_assign = []
for i in range(0,N):
  color_assign.append(color[C[i]])
  
fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color=color_assign, opacity=0.4)])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
for i in range(K):
  fig.add_trace(go.Scatter3d(x=Mu_trace[:,i,0], y=Mu_trace[:,i,1], z=Mu_trace[:,i,2], marker_size=3, marker_color=color[i]))
fig.show()