In [None]:
!pip install kmeans-pytorch

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from kmeans_pytorch import kmeans, kmeans_predict

In [None]:
# set random seed
np.random.seed(123)

In [None]:
# data
data_size, dims, num_clusters = 1000, 2, 3
x = np.random.randn(data_size, dims) / 6
x = torch.from_numpy(x)

In [None]:
x

In [None]:
def plot_train(x, axis=[-1, 1, -1, 1]):
    plt.figure(figsize=(8, 6), dpi=160)
    plt.scatter(x[:, 0], x[:, 1], c='tab:gray', cmap='cool')
    plt.axis(axis)
    plt.tight_layout()
    plt.show()

In [None]:
plot_train(x)

In [None]:
# set device
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

In [None]:
# k-means
cluster_ids_x, cluster_centers = kmeans(
    X=x, num_clusters=num_clusters, distance='euclidean', device=device
)

In [None]:
# cluster IDs and cluster centers
print(cluster_ids_x)
print(cluster_centers)

In [None]:
# more data
y = np.random.randn(5, dims) / 6
y = torch.from_numpy(y)

In [None]:
# predict cluster ids for y
cluster_ids_y = kmeans_predict(
    y, cluster_centers, 'euclidean', device=device
)

In [None]:
print(cluster_ids_y)

In [None]:
# plot
plt.figure(figsize=(8, 6), dpi=160)
plt.scatter(x[:, 0], x[:, 1], c=cluster_ids_x, cmap='cool')
plt.scatter(y[:, 0], y[:, 1], c=cluster_ids_y, cmap='cool', marker='X', edgecolors='white')
plt.scatter(
    cluster_centers[:, 0], cluster_centers[:, 1],
    c='white',
    alpha=0.6,
    edgecolors='black',
    linewidths=2
)
plt.axis([-1, 1, -1, 1])
plt.tight_layout()
plt.show()

In [None]:
from sklearn.datasets import make_blobs

In [None]:
X, Y = make_blobs(n_samples=300, centers=3, random_state=0, cluster_std=0.5)
X = (X - X.mean()) / X.std()
#X = X + 10
X = torch.from_numpy(X)
axis_range = [-2.5, 2.5, -2.5, 2.5]

In [None]:
plot_train(X, axis_range)

In [None]:
# k-means
new_num_cluster = 3
cluster_ids_x, cluster_centers = kmeans(
    X=X, num_clusters=new_num_cluster, distance='euclidean', device=device
)

In [None]:
# plot
plt.figure(figsize=(8, 6), dpi=160)
plt.scatter(X[:, 0], X[:, 1], c=cluster_ids_x, cmap='cool')
plt.scatter(
    cluster_centers[:, 0], cluster_centers[:, 1],
    c='white',
    alpha=0.6,
    edgecolors='black',
    linewidths=2
)
plt.axis(axis_range)
plt.tight_layout()
plt.show()

### another case
### K-means may not be consistant

In [None]:
size = 100
size_quar = 25
dim = 2
x_new = np.zeros((size, dim))
x_new[:size_quar, 0] =  np.random.random(size=size_quar) -0.1
x_new[:size_quar, 1] =  np.random.random(size=size_quar) / 2 + 0.4

x_new[size_quar:2*size_quar, 0] =  -1 * np.random.random(size=size_quar) +0.1
x_new[size_quar:2*size_quar, 1] =  np.random.random(size=size_quar) / 2 + 0.4

x_new[2*size_quar:3*size_quar, 0] =  -1 * np.random.random(size=size_quar) + 0.1
x_new[2*size_quar:3*size_quar, 1] =  np.random.random(size=size_quar) / 2 - 0.4

x_new[3*size_quar:, 0] =  np.random.random(size=size_quar) -0.1
x_new[3*size_quar:, 1] =  np.random.random(size=size_quar) / 2 - 0.4

x_new = torch.from_numpy(x_new)

In [None]:
plot_train(x_new)

In [None]:
# k-means
new_num_cluster = 2
cluster_ids_x, cluster_centers = kmeans(
    X=x_new, num_clusters=new_num_cluster, distance='euclidean', device=device
)

In [None]:
# plot
plt.figure(figsize=(8, 6), dpi=160)
plt.scatter(x_new[:, 0], x_new[:, 1], c=cluster_ids_x, cmap='cool')
plt.scatter(
    cluster_centers[:, 0], cluster_centers[:, 1],
    c='white',
    alpha=0.6,
    edgecolors='black',
    linewidths=2
)
plt.axis([-1, 1, -1, 1])
plt.tight_layout()
plt.show()

### reference
### https://github.com/subhadarship/kmeans_pytorch
### https://github.com/subhadarship/kmeans_pytorch/blob/master/example.ipynb