In [3]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.mixture import GaussianMixture
from scipy.optimize import linear_sum_assignment

data = load_iris()
X = StandardScaler().fit_transform(data.data)
y = data.target

def align_labels(y_true, y_pred):
    D = max(y_pred.max(), y_true.max()) + 1
    cost = np.zeros((D, D), dtype=int)
    for i in range(len(y_pred)):
        cost[y_pred[i], y_true[i]] += 1
    r, c = linear_sum_assignment(cost.max() - cost)
    mapping = dict(zip(r, c))
    return np.array([mapping.get(label, label) for label in y_pred])

def evaluate(y_true, y_pred):
    aligned = align_labels(y_true, y_pred)
    return [
        (aligned == y_true).mean(),
        precision_score(y_true, aligned, average='macro'),
        recall_score(y_true, aligned, average='macro'),
        f1_score(y_true, aligned, average='macro')
    ]

best_name, best_metrics = None, [0, 0, 0, 0]

def update(name, y_true, y_pred):
    global best_name, best_metrics
    m = evaluate(y_true, y_pred)
    if m[0] > best_metrics[0]:
        best_name, best_metrics = name, m

km_params = {'n_clusters': [2, 3, 4, 5, 6], 'init': ['k-means++', 'random'], 'n_init': [10, 20]}
km_best = GridSearchCV(KMeans(), km_params, cv=3).fit(X, y).best_estimator_
update('KMeans', y, km_best.fit_predict(X))

gmm_params = {'n_components': [2, 3, 4, 5, 6], 'covariance_type': ['full', 'tied', 'diag', 'spherical']}
gmm_best = RandomizedSearchCV(GaussianMixture(), gmm_params, cv=3, n_iter=10).fit(X, y).best_estimator_
update('GMM', y, gmm_best.fit_predict(X))

for n in range(2, 7):
    update(f'Hierarchical_{n}', y, AgglomerativeClustering(n_clusters=n).fit_predict(X))

for eps in [0.3, 0.5, 0.7, 0.9, 1.1]:
    d_pred = DBSCAN(eps=eps).fit_predict(X)
    if len(set(d_pred)) - (-1 in d_pred) < 2:
        continue
    mask = d_pred != -1
    update(f'DBSCAN_{eps}', y[mask], d_pred[mask])

print(best_name, best_metrics)


  _warn_prf(average, modifier, msg_start, len(result))


Hierarchical_3 [0.8266666666666667, 0.8586854460093897, 0.8266666666666667, 0.8194291398836854]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
