In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score

In [None]:
def compare_algorithms(X, max_clusters):
  # VAI ARMAZENAR OS RESULTADOS DE CADA ALGORÍTMO
  results = []
  cluster_range = range(2, max_clusters +1)

  # KMeans
  for n_clusters in cluster_range:
    kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init='auto')
    clusters = kmeans.fit_predict(X)
    silhouette_avg = silhouette_score(X, clusters)
    results.append(('KMeans', n_clusters, silhouette_avg))

  # Agglomerative
  for n_clusters in cluster_range:
    agglomerative = AgglomerativeClustering(n_clusters=n_clusters)
    clusters = agglomerative.fit_predict(X)
    silhouette_avg = silhouette_score(X, clusters)
    results.append(('Agglomerative', n_clusters, silhouette_avg))

  # DBSCAN -> eps = distância máxima entre os pontos
  eps_values = np.arange(0.1, 0.9, 0.1)
  for eps in eps_values:
    dbscan = DBSCAN(eps=eps, min_samples=5)
    clusters = dbscan.fit_predict(X)
    if len(set(clusters)) > 1:
      silhouette_avg = silhouette_score(X, clusters)
      results.append(('DBSCAN', eps, silhouette_avg))


  return results


In [None]:
iris = datasets.load_iris()
scaler = StandardScaler()
scaled_data = scaler.fit_transform(iris.data)

results = compare_algorithms(scaled_data, 10)

# CRIANDO UM DATAFRAME PRA VISUALIZAR MELHOR OS RESULTADOS
df = pd.DataFrame(results, columns=['Agrupador', 'Clusters', 'Score'])
df

In [None]:
# BUSCANDO O AGRUPADOR COM O MAIOR SCORE
max_score_index = df['Score'].idxmax()
df.loc[max_score_index]