# Clustering
The task is grouping of similar objects into sets. It is the unsupervised learning technique.

In [None]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import pairwise_distances_argmin
import matplotlib.pyplot as plt

### As the first step we generate some random data of points in 2D

In [None]:
np.random.seed(0)

batch_size = 45
centers = [[1, 1], [-1, -1], [1, -1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7)

In [None]:
fig = plt.figure()

ax = fig.add_subplot(1, 1, 1)
ax.plot(X[:, 0], X[:, 1], "w", markerfacecolor='k', marker=".")
ax.set_title("Data")
ax.set_xticks(())
ax.set_yticks(());


### Now we use the K-Means clustering algorithm to find 3 cluster centers

In [None]:
k_means = KMeans(init="k-means++", n_clusters=3, n_init=10)
k_means.fit(X)

### We will use the pairwise distances metric to assign each point of the dataset to the center

In [None]:
k_means_cluster_centers = k_means.cluster_centers_
k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)

In [None]:
fig = plt.figure()
colors = ["#4EACC5", "#FF9C34", "#4E9A06"]

ax = fig.add_subplot(1, 1, 1)
for k, col in zip(range(n_clusters), colors):
    my_members = k_means_labels == k
    cluster_center = k_means_cluster_centers[k]
    ax.plot(X[my_members, 0], X[my_members, 1], "w", markerfacecolor=col, marker=".")
    ax.plot(
        cluster_center[0],
        cluster_center[1],
        "o",
        markerfacecolor=col,
        markeredgecolor="k",
        markersize=6,
    )
ax.set_title("KMeans")
ax.set_xticks(())
ax.set_yticks(());
