In [None]:
%load_ext nb_black

# Scikit Learn Clustering Animations

In [None]:
# Imports
import os
from itertools import cycle
from pathlib import Path

import imageio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.cluster import AffinityPropagation, MeanShift, estimate_bandwidth
from sklearn.datasets import make_blobs

In [None]:
# Plots in dark mode
plt.style.use("dark_background")

# Paths
PATH = os.getcwd()
MS_PATH = PATH + "/ms_cluster"
AF_PATH = PATH + "/af_cluster"

In [None]:
# Generate sample data
centers = [[1, 1], [-1, -1], [1, -1]]
matrix_x, _ = make_blobs(n_samples=10000, centers=centers, cluster_std=0.6)

## Mean Shift Clustering

In [None]:
def plot_ms_results(X, n_clusters_, labels, cluster_centers):
    plt.figure(1)
    plt.clf()
    colors = cycle("bgrcmykbgrcmykbgrcmykbgrcmyk")
    for k, col in zip(range(n_clusters_), colors):
        my_members = labels == k
        cluster_center = cluster_centers[k]
        plt.plot(X[my_members, 0], X[my_members, 1], col + ".")
        plt.plot(
            cluster_center[0],
            cluster_center[1],
            "o",
            markerfacecolor=col,
            markeredgecolor="k",
            markersize=14,
        )
    plt.title("Estimated number of clusters: %d" % n_clusters_)
    return plt

In [None]:
# Create folder for MS cluster plots
Path(MS_PATH).mkdir(parents=True, exist_ok=True)

for itr in range(0, 10):
    # Mean shift clustering
    bandwidth = estimate_bandwidth(matrix_x, quantile=0.2, n_samples=500)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, max_iter=itr)
    ms.fit(matrix_x)

    cluster_centers = ms.cluster_centers_
    labels = ms.labels_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)

    # Plotting the results
    plot_ms_results(matrix_x, n_clusters_, labels, cluster_centers).savefig(
        "ms_cluster/ms_cluster_" + str(itr) + ".png", dpi=600
    )

In [None]:
# Creating gif
filenames = []
for file in os.listdir(MS_PATH):
    if file.endswith(".png"):
        filenames.append(os.path.join(MS_PATH, file))
filenames.sort()
images = []
for filename in filenames:
    images.append(imageio.imread(filename))
kargs = {"duration": 1}
imageio.mimsave(PATH + "/ms_clustering.gif", images, **kargs)

## Affinity Propagation Clustering

In [None]:
# Generate sample data
matrix_x = np.load("X.npy")

In [None]:
def plot_af_clustering_results(X, n_clusters_, labels, cluster_centers_indices):
    plt.close("all")
    plt.figure(1)
    plt.clf()

    colors = cycle("bgrcmykbgrcmykbgrcmykbgrcmyk")
    for k, col in zip(range(n_clusters_), colors):
        class_members = labels == k
        cluster_center = X[cluster_centers_indices[k]]
        plt.plot(X[class_members, 0], X[class_members, 1], col + ".")
        plt.plot(
            cluster_center[0],
            cluster_center[1],
            "o",
            markerfacecolor=col,
            markeredgecolor="k",
            markersize=14,
        )
        for x in X[class_members]:
            plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)

    plt.title("Estimated number of clusters: %d" % n_clusters_)
    return plt

In [None]:
# Create folder for AF cluster plots
Path(AF_PATH).mkdir(parents=True, exist_ok=True)

for itr in range(1, 10):
    # Affinity propagation clustering
    af = AffinityPropagation(convergence_iter=itr).fit(matrix_x)
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_
    n_clusters_ = len(cluster_centers_indices)
    # Plotting the results
    plot_af_clustering_results(
        matrix_x, n_clusters_, labels, cluster_centers_indices
    ).savefig("af_cluster/af_cluster_" + str(itr) + ".png", dpi=600)

In [None]:
# Creating gif
filenames = []
for file in os.listdir(AF_PATH):
    if file.endswith(".png"):
        filenames.append(os.path.join(AF_PATH, file))
filenames.sort()
images = []
for filename in filenames:
    images.append(imageio.imread(filename))
kargs = {"duration": 1}
imageio.mimsave(PATH + "/af_clustering.gif", images, **kargs)