In [51]:
# Bibilotecas para carregar os dados
from sklearn.datasets import load_iris
import numpy as np
import pandas as pd

# Gráfico 
import plotly.express as px
from sklearn.decomposition import PCA

# ML
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import optuna

# Interface
import gradio as gr

In [52]:
# 1. Carregar o conjunto de dados Iris
iris = load_iris()
X = iris.data
y = iris.target 

In [53]:
# 2. Escalar os dados
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:

# 3. KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42, n_init='auto')
labels = kmeans.fit_predict(X_scaled)


In [None]:
# 4. Criar DataFrame para visualização
df_plot = px.data.iris()
df_plot['PCA1'] = X_pca[:, 0]
df_plot['PCA2'] = X_pca[:, 1]
df_plot['Cluster'] = labels.astype(str)

# 5. Plot interativo com Plotly
fig = px.scatter(
    df_plot,
    x='PCA1',
    y='PCA2',
    color='Cluster',
    title='Clusters no Conjunto Iris (KMeans + PCA)',
    labels={'Cluster': 'Grupo'},
    hover_data=['species']
)

fig.show()

In [None]:
# 6. Redução de dimensionalidade para 2D
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

In [None]:
# 7. Função objetivo para o Optuna
def objective(trial):
    n_clusters = trial.suggest_int('n_clusters', 2, 10)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
    kmeans.fit(X_scaled)
    score = silhouette_score(X_scaled, kmeans.labels_)
    return score

In [None]:
# 8. Rodar otimização
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

[I 2025-06-24 22:04:05,076] A new study created in memory with name: no-name-a8c512e8-8e05-4f2c-9964-dfc2fba4e1fc
[I 2025-06-24 22:04:05,101] Trial 0 finished with value: 0.5817500491982808 and parameters: {'n_clusters': 2}. Best is trial 0 with value: 0.5817500491982808.
[I 2025-06-24 22:04:05,120] Trial 1 finished with value: 0.4798814508199817 and parameters: {'n_clusters': 3}. Best is trial 0 with value: 0.5817500491982808.
[I 2025-06-24 22:04:05,144] Trial 2 finished with value: 0.3339432304006177 and parameters: {'n_clusters': 6}. Best is trial 0 with value: 0.5817500491982808.
[I 2025-06-24 22:04:05,155] Trial 3 finished with value: 0.32439914389835456 and parameters: {'n_clusters': 9}. Best is trial 0 with value: 0.5817500491982808.
[I 2025-06-24 22:04:05,168] Trial 4 finished with value: 0.3339432304006177 and parameters: {'n_clusters': 6}. Best is trial 0 with value: 0.5817500491982808.
[I 2025-06-24 22:04:05,183] Trial 5 finished with value: 0.3339432304006177 and parameters

In [None]:
# 9. Melhor número de clusters
best_n_clusters = study.best_params['n_clusters']
print(f"Melhor número de clusters: {best_n_clusters}")

Melhor número de clusters: 2


In [None]:
# 10. Treinar modelo com o melhor número
best_kmeans = KMeans(n_clusters=best_n_clusters, random_state=42, n_init='auto')
best_kmeans.fit(X_scaled)
final_labels = best_kmeans.labels_

In [None]:
# 11. Avaliar
sil_score = silhouette_score(X_scaled, final_labels)
print(f"Silhouette Score final: {sil_score:.4f}")

Silhouette Score final: 0.5818


### Interface

In [62]:
def clusterizar(n_clusters):
    model = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
    model.fit(X_scaled)  # X_scaled já foi definido anteriormente
    score = silhouette_score(X_scaled, model.labels_)
    return f"Silhouette Score: {score:.4f}"

In [63]:
iface = gr.Interface(
    fn=clusterizar,
    inputs=gr.Slider(minimum=2, maximum=10, step=1, label="Número de Clusters"),
    outputs="text",
    title="KMeans com Iris Dataset",
    description="Teste o agrupamento KMeans com diferentes números de clusters"
)

iface.launch()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


