In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

n_samples = 1500
dataset = datasets.make_blobs(n_samples=n_samples, centers=2, center_box=(-7.0, 7.5),
                              cluster_std=[1.4, 1.7],
                              random_state=42)
X_2, _ = datasets.make_blobs(n_samples=n_samples, random_state=170, centers=[[-4, -3]], cluster_std=[1.9])
transformation = [[1.2, -0.8], [-0.4, 1.7]]
X_2 = np.dot(X_2, transformation)
X, y = np.concatenate((dataset[0], X_2)), np.concatenate((dataset[1], np.array([2] * len(X_2))))

In [2]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# сначала получим предсказанные кластеры при помощи метода кластеризации
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)
kmeans_pred = kmeans.labels_

# теперь посчитаем коэффициент силуэта
silhouette_score(X=X, labels=kmeans_pred, metric='euclidean')

0.5131660482634046

In [4]:
from sklearn.mixture import GaussianMixture
gm = GaussianMixture(n_components=3, random_state=42)
y_pred = gm.fit_predict(X)

silhouette_score(X=X, labels=y_pred, metric='euclidean')

0.3988405457243407

In [8]:
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN

k_means = KMeans(n_clusters=3, random_state=42)
k_means.fit(X)
k_means_pred = k_means.labels_

gm = GaussianMixture(n_components=3, random_state=42)
gm_y_pred = gm.fit_predict(X)

ac = AgglomerativeClustering(n_clusters=3)
ac.fit(X)
ac_pred = ac.labels_

dbscan = DBSCAN(eps=0.9, min_samples=35)
dbscan.fit(X)
dbscan_pred = dbscan.labels_

display(round(silhouette_score(X=X, labels=k_means_pred, metric='euclidean'), 2))
display(round(silhouette_score(X=X, labels=gm_y_pred, metric='euclidean'), 2))
display(round(silhouette_score(X=X, labels=ac_pred, metric='euclidean'), 2))
display(round(silhouette_score(X=X, labels=dbscan_pred, metric='euclidean'), 2))

0.51

0.4

0.48

0.45

In [12]:
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN

clusters = range(2, 11)
k_means_s = -1
gm_s = -1
ac_s = -1
k_means_c = 0
gm_c = 0
ac_c = 0

for n_clusters in clusters:
    k_means = KMeans(n_clusters=n_clusters, random_state=42)
    k_means.fit(X)
    k_means_pred = k_means.labels_
    s = silhouette_score(X=X, labels=k_means_pred, metric='euclidean')
    k_means_s = max([s, k_means_s])    
    if s == k_means_s:
        k_means_c = n_clusters
    
for n_clusters in clusters:
    gm = GaussianMixture(n_components=n_clusters, random_state=42)
    gm_y_pred = gm.fit_predict(X)
    s = silhouette_score(X=X, labels=gm_y_pred, metric='euclidean')
    gm_s = max([s, gm_s])
    if s == gm_s:
        gm_c = n_clusters
    
for n_clusters in clusters:
    ac = AgglomerativeClustering(n_clusters=n_clusters)
    ac.fit(X)
    ac_pred = ac.labels_
    s = silhouette_score(X=X, labels=ac_pred, metric='euclidean')
    ac_s = max([s, ac_s])
    if s == ac_s:
        ac_c = n_clusters

display(k_means_c, gm_c, ac_c)

3

4

4

In [14]:
from sklearn.metrics.cluster import homogeneity_score
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

X = StandardScaler().fit_transform(X)

k_means = KMeans(n_clusters=3, random_state=42)
k_means.fit(X)
k_means_pred = k_means.labels_

gm = GaussianMixture(n_components=3, random_state=42)
gm_y_pred = gm.fit_predict(X)

ac = AgglomerativeClustering(n_clusters=3)
ac.fit(X)
ac_pred = ac.labels_

dbscan = DBSCAN(eps=0.9, min_samples=35)
dbscan.fit(X)
dbscan_pred = dbscan.labels_

display(round(homogeneity_score(labels_true=y, labels_pred=k_means_pred), 2))
display(round(homogeneity_score(labels_true=y, labels_pred=gm_y_pred), 2))
display(round(homogeneity_score(labels_true=y, labels_pred=ac_pred), 2))
display(round(homogeneity_score(labels_true=y, labels_pred=dbscan_pred), 2))

0.8

0.93

0.91

0.0