In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score

In [2]:
def compare_algorithms(X, max_clusters):
    resusts = []
    cluster_range = range(2, max_clusters + 1)

    # KMeans
    for n_clusters in cluster_range:
        model = KMeans(n_clusters=n_clusters, random_state=0, n_init='auto')
        cluster = model.fit_predict(X)
        score = silhouette_score(X, cluster)
        resusts.append(['KMeans', n_clusters, score])

    # AgglomerativeClustering
    for n_clusters in cluster_range:
        model = AgglomerativeClustering(n_clusters=n_clusters)
        cluster = model.fit_predict(X)
        score = silhouette_score(X, cluster)
        resusts.append(['AgglomerativeClustering', n_clusters, score])

    # DBSCAN
    eps_values = np.arange(0.1, 0.9, 0.1)

    for eps in eps_values:
        model = DBSCAN(eps=eps, min_samples=5)
        cluster = model.fit_predict(X)
        
        if len(set(cluster)) > 1:
            score = silhouette_score(X, cluster)
            resusts.append(['DBSCAN', eps, score])

    return pd.DataFrame(resusts, columns=['Algorithm', 'Parameter', 'Silhouette Score'])


In [6]:
iris = datasets.load_iris()
scaler = StandardScaler()
scaler_data = scaler.fit_transform(iris.data)

results = compare_algorithms(scaler_data, 10)

results

Unnamed: 0,Algorithm,Parameter,Silhouette Score
0,KMeans,2.0,0.58175
1,KMeans,3.0,0.459948
2,KMeans,4.0,0.386941
3,KMeans,5.0,0.345511
4,KMeans,6.0,0.343719
5,KMeans,7.0,0.329236
6,KMeans,8.0,0.335194
7,KMeans,9.0,0.352488
8,KMeans,10.0,0.348607
9,AgglomerativeClustering,2.0,0.577035


In [10]:
max_score = results['Silhouette Score'].max()

results[results['Silhouette Score'] == max_score]

Unnamed: 0,Algorithm,Parameter,Silhouette Score
0,KMeans,2.0,0.58175
