In [17]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_blobs
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN

%matplotlib notebook

In [18]:
#Data

X, y = make_blobs(n_samples = 500, n_features = 2, centers = 5, 
                           cluster_std = 0.6, random_state = 0)
colors = np.array([plt.cm.Spectral(val)
          for val in np.linspace(0, 1, len(set(y)))])
plt.figure(figsize=(8,6))
plt.scatter(X[:,0], X[:,1], c= colors[y], s= 20)    

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x25d025e5760>

In [19]:
#Agglomerative clustering with complete linkage

clf = AgglomerativeClustering(n_clusters=5, linkage= 'complete')  
predicted = clf.fit_predict(X)
plt.figure(figsize=(8,6))
plt.title("Complete clustering")
plt.scatter(X[:,0], X[:,1], c= colors[predicted], s= 20)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x25d0261fa90>

In [20]:
#Agglomerative clustering with single linkage

clf = AgglomerativeClustering(n_clusters=5, linkage= 'single')  
predicted = clf.fit_predict(X)

plt.figure(figsize=(8,6))
plt.title("Single clustering")
plt.scatter(X[:,0], X[:,1], c= colors[predicted], s= 20)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x25d0175ab20>

In [21]:
#DBSCAN

db = DBSCAN().fit(X)
predicted = db.labels_
n_clusters_ = len(set(predicted)) - (1 if -1 in predicted else 0)
n_noise_ = list(predicted).count(-1)

print("Estimated number of clusters: %d" % n_clusters_)
print("Estimated number of noise points: %d" % n_noise_)

Estimated number of clusters: 5
Estimated number of noise points: 17


In [22]:
#DBSCAN plot

unique_labels = set(predicted)
core_samples_mask = np.zeros_like(predicted, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True

colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
plt.figure(figsize=(8,6))
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = [0, 0, 0, 1]
        
    class_member_mask = predicted == k

    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], "o", markerfacecolor=tuple(col), markeredgecolor=tuple(col), markersize=4,)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], "o", markerfacecolor=tuple(col), markeredgecolor=tuple(col), markersize=3,)

plt.title(f"Estimated number of clusters: {n_clusters_}")
plt.show()

<IPython.core.display.Javascript object>