# Clustering

In [6]:
import numpy as np
import bokeh, bokeh.plotting, bokeh.models, bokeh.io, bokeh.layouts
import sklearn.datasets

from sklearn.cluster import KMeans, DBSCAN

In [7]:
# plotting code

bokeh.io.output_notebook()

def plot_clusters(X, model):
    figure = bokeh.plotting.figure(
        tools="pan,wheel_zoom,box_zoom,reset",
        width=800,
        height=600
    )
    max_label = np.max(model.predict(X))
    colors = bokeh.palettes.Set3[max_label+1]
    x1_span = max(X[:, 0])-min(X[:, 0])
    x1_range = np.arange(min(X[:, 0])-0.1*x1_span, max(X[:, 0])+0.1*x1_span, 0.01*x1_span)
    x2_span = max(X[:, 1])-min(X[:, 1])
    x2_range = np.arange(min(X[:, 1])-0.1*x2_span, max(X[:, 1])+0.1*x2_span, 0.01*x2_span)
    x1_range, x2_range = np.repeat(x1_range, x2_range.shape[0]), np.repeat(x2_range.reshape(1, -1), x1_range.shape[0], axis=0).flatten()
    color = [colors[model.predict([[x1, x2]])[0]] for x1, x2 in zip(x1_range, x2_range)]
    figure.square(x1_range, x2_range, color=color, size=10)
    figure.circle(X[:,0], X[:,1], size=10, color='black')
    figure.add_layout(bokeh.models.LinearAxis(), 'right')
    figure.add_layout(bokeh.models.LinearAxis(), 'above')
    bokeh.io.show(figure)


In [8]:
X_iris, y_iris = sklearn.datasets.load_iris(return_X_y=True, as_frame=True)
X = np.asarray(X_iris[['sepal length (cm)', 'sepal width (cm)']])

In [9]:
model = KMeans(n_clusters=7, n_init='auto')
model.fit(X)
print(model.predict(X[0:3]))

[1 1 1]


In [10]:
plot_clusters(X, model)

In [11]:
model = DBSCAN()
model.fit(X)
print(model.predict(X[0:3]))  # throws an error

AttributeError: 'DBSCAN' object has no attribute 'predict'

In [12]:
model = DBSCAN()
print(model.fit_predict(X)[0:3])

[0 0 0]


https://scikit-learn.org/stable/modules/clustering.html

https://kanezaki.github.io/pytorch-unsupervised-segmentation/ICASSP2018_kanezaki.pdf    