### K-Means Clustering Algorithm
Given an initial set of k means $m_1^{(1)},â€¦,m_k^{(1)}$ (see below), the algorithm proceeds by alternating between two steps:

* <b>Assignment step:</b> Assign each observation to the cluster based on the closest centroid
* <b>Update step:</b> Calculate the new means (centroids) of the observations in the new clusters

The algorithm has converged when the assignments no longer change. The algorithm does not guarantee to find the optimum.

In this example we will look at an animation of K-Means clustering using `bqplot` and `ipywidgets`. The UI provides the following controls:
* Slider for choosing the number of points (n)
* Slider for choosing the number of clusters (K)
* Slider for choosing the cluster standard deviation

Upon clicking the 'GO' button the animation is started. Lloyd's algorithm is a heuristic algortithm. The algorithm does not guarantee to find the optimum and can get stuck in the local minimum sometimes. Clicking the 'RETRY' button will reassign the centroids randomly and tries the algorithm again

In [None]:
from time import sleep

import numpy as np
from sklearn.datasets import make_blobs

from ipywidgets import *

from bqplot import OrdinalColorScale, CATEGORY10
import bqplot.pyplot as plt

In [None]:
n_slider = IntSlider(description='points', value=150, min=20, max=300, step=10)
k_slider = IntSlider(description='K', value=3, min=2, max=10)
cluster_std_slider = FloatSlider(description='cluster std', value=.8, min=.5, max=3)

iter_label_tmpl = 'Iterations: {}'
iter_label = Label(value=iter_label_tmpl.format(''))
iter_label.layout.width = '300px'

fig = plt.figure(title='K-Means Clustering', animation_duration=1000)
fig.layout.width = '1000px'
fig.layout.height = '800px'

plt.scales(scales={'color': OrdinalColorScale(colors=CATEGORY10)})

axes_options = {'x': {'label': 'X1'}, 
                'y': {'label': 'X2'}, 
                'color': {'visible': False}}

# scatter of 2D features
points_scat = plt.scatter([], [], color=[], stroke='black', axes_options=axes_options)

# scatter of centroids
centroid_scat = plt.scatter([], [], color=[], stroke_width=3, stroke='black',
                            default_size=400, axes_options=axes_options)

go_btn = Button(description='GO', button_style='success', layout=Layout(width='50px'))
retry_btn = Button(description='RETRY', button_style='warning', 
                   layout=Layout(width='70px'), disabled=True)

def start_animation(generate_points=True):
    global X
    go_btn.disabled = True
    
    # get the values of parameters from sliders
    n = n_slider.value
    K = k_slider.value
    
    # generate 2D features made from K blobs
    if generate_points:
        X, _ = make_blobs(n_samples=n, centers=K, cluster_std=cluster_std_slider.value)

        # plot the points on a scatter chart
        with points_scat.hold_sync():
            points_scat.x = X[:, 0]
            points_scat.y = X[:, 1]
    
    # randomly pick K data points to be centroids
    random_indices = np.random.choice(np.arange(n), size=K, replace=False)
    centroids = X[random_indices]
    
    i = 0
    
    # try for 10 iterations
    while i < 10:
        iter_label.value = iter_label_tmpl.format(i + 1)
        
        with centroid_scat.hold_sync():
            centroid_scat.x = centroids[:, 0]
            centroid_scat.y = centroids[:, 1]
            centroid_scat.color = np.arange(K)
            
        # assign clusters to points based on the closest centroid
        clusters = np.argmin(np.linalg.norm(X.reshape(n, 1, 2) - centroids, axis=2), axis=1)
        
        # color code the points by their clusters
        points_scat.color = clusters

        # compute new centroids from the clusters
        new_centroids = np.array([X[clusters == k].mean(axis=0) for k in range(K)])
        
        if np.all(centroids == new_centroids):
            # if centroids don't change we are done
            break
        else: 
            # update the centroids and repeat
            centroids = new_centroids
            i = i + 1
            sleep(1)
            
    go_btn.disabled = False
    retry_btn.disabled = False

go_btn.on_click(lambda btn: start_animation())
retry_btn.on_click(lambda btn: start_animation(generate_points=False))

controls_layout = VBox([n_slider, k_slider, cluster_std_slider, 
                        HBox([go_btn, retry_btn]), 
                        iter_label],
                       layout=Layout(overflow_x='hidden'))
controls_layout.layout.margin = '60px 0px 0px 0px'

HBox([fig, controls_layout])