# Simple Clustering Visualization with Scikit-learn and Bokeh

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import cluster, datasets
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler

In [2]:
from bokeh.plotting import figure, output_notebook, show
from bokeh.palettes import Spectral6
from bokeh.models import ColumnDataSource

In [4]:
np.random.seed(0)

In [5]:
output_notebook()

In [6]:
# Generate datasets. We choose the size big enough to see the scalability
# of the algorithms, but not too big to avoid too long running times
n_samples = 1500
noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05)

colors = np.array([Spectral6])

clustering_algorithm = 'MiniBatchKMeans'

In [7]:
source = ColumnDataSource(data=dict())

plot = figure(plot_width=400, plot_height=400, title=clustering_algorithm,
              title_text_font_size='10pt')

In [8]:
X, y = noisy_circles
# normalize dataset for easier parameter selection
X = StandardScaler().fit_transform(X)
two_means = cluster.MiniBatchKMeans(n_clusters=2)
two_means.fit(X)
y_pred = two_means.predict(X)
colors = [Spectral6[i] for i in y_pred]

plot.circle(X[:, 0], X[:, 1], fill_color=colors, line_color=None)

<bokeh.models.renderers.GlyphRenderer at 0x107cc29b0>

In [9]:
show(plot)