# Clustering Tool Demonstration

This demo code shows how you can apply KMeans to classify data in an unsupervised fashion. This demo will run on two toy data generation sets: blobs and moons.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn import datasets

In [None]:
def plot_dataset(plot_handle,X,y,n_classes,centers=None):
    colors = ['rs', 'bs', 'gs', 'ys', 'ks']
    for i in range(0,n_classes):
        plot_handle.plot(X[:,0][y==i], X[:,1][y==i], colors[i])
    if centers is not None:
        for c in centers:
            plot_handle.plot(c[0],c[1], 'k*')
    plot_handle.show()

In [None]:
n = 1000

X_moons, y_moons = datasets.make_moons(n_samples=n, noise=.1)
X_blobs, y_blobs = datasets.make_blobs(n_samples=n, cluster_std=2.0, centers=4)

In [None]:
fig1 = plt.figure(num=1, figsize=(8,5))
plot_dataset(plt,X_moons, y_moons, n_classes=2)

In [None]:
# Plot the blob data
fig2 = plt.figure(num=2, figsize=(8,5))
plot_dataset(plt,X_blobs,y_blobs,n_classes=4)

In [None]:
# Experiment with KMeans clustering.
from sklearn.cluster import KMeans

In [None]:
kmeans = KMeans(n_clusters=4, tol=1e-3, n_init=20)
kmeans.fit(X_blobs)

In [None]:
y_pred = kmeans.predict(X_blobs)

In [None]:
fig3 = plt.figure(num=3, figsize=(8,5))
plot_dataset(plt,X_blobs,y_pred,n_classes=4,centers=kmeans.cluster_centers_)

In [None]:
# How about the moons?
kmeans_moons = KMeans(n_clusters=2, tol=1e-3, n_init=20)
kmeans_moons.fit(X_moons)
y_moons_pred = kmeans_moons.predict(X_moons)
fig4 = plt.figure(num=4, figsize=(8,5))
plot_dataset(plt, X_moons, y_moons_pred, n_classes=2, centers=kmeans_moons.cluster_centers_)

Clearly, you need to understand the shape of your data before throwing KMeans (or really and clustering algorithm) at it.
The `moon` toy dataset is in general more challenging for simple unsupervised and supervised techniques.