In [15]:
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score
from sklearn.preprocessing import StandardScaler
from scipy.optimize import linear_sum_assignment
from sklearn.metrics import confusion_matrix
from sklearn.metrics import silhouette_score
from sklearn.datasets import load_digits
from sklearn.cluster import KMeans
import numpy as np

In [5]:
digits = load_digits()

X = digits.data
y = digits.target

X = StandardScaler().fit_transform(X)

In [19]:
Ks = range(2, 15)
inertias = []
silhouettes = []

for k in Ks:
    km = KMeans(n_clusters=k, n_init=20, random_state=42)
    labels = km.fit_predict(X)
    inertias.append(km.inertia_)
    result = silhouette_score(X, labels)
    print(f"K={k}, silhouette_score={result}")
    silhouettes.append(result)

K=2, silhouette_score=0.10563771488565656
K=3, silhouette_score=0.10543796242693222
K=4, silhouette_score=0.09543189191950129
K=5, silhouette_score=0.10229832879268379
K=6, silhouette_score=0.09878895847774896
K=7, silhouette_score=0.12199491492727683
K=8, silhouette_score=0.12659037328976036
K=9, silhouette_score=0.13074331026578428
K=10, silhouette_score=0.14232145865373522
K=11, silhouette_score=0.15319412999429807
K=12, silhouette_score=0.15432497021238875
K=13, silhouette_score=0.15652677762481143
K=14, silhouette_score=0.1566829738909186


In [16]:
aris = []
nmis = []
inertias = []

for i in range(10):
    km = KMeans(n_clusters=10, n_init=1, random_state=i)
    labels = km.fit_predict(X)

    inertias.append(km.inertia_)
    aris.append(adjusted_rand_score(y, labels))
    nmis.append(normalized_mutual_info_score(y, labels))

print("Inertia mean/std:", np.mean(inertias), np.std(inertias))
print("ARI mean/std:", np.mean(aris), np.std(aris))
print("NMI mean/std:", np.mean(nmis), np.std(nmis))

Inertia mean/std: 70703.54595799283 846.678675018618
ARI mean/std: 0.5306385594828958 0.052413849951397015
NMI mean/std: 0.66536840697101 0.03347102221433487


In [17]:
ari = adjusted_rand_score(y, labels)
nmi = normalized_mutual_info_score(y, labels)

print(f"ARI: {ari}\nNMI: {nmi}")

ARI: 0.5980787882264063
NMI: 0.7067180449876724


In [14]:
cm = confusion_matrix(y, labels)
row, col = linear_sum_assignment(-cm)
accuracy = cm[row, col].sum() / cm.sum()

print(f"accuracy: {accuracy}")

accuracy: 0.676126878130217
