In [1]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

from sklearn.cluster import KMeans, AgglomerativeClustering

from sklearn.metrics import silhouette_score, davies_bouldin_score


data = load_iris()

X = pd.DataFrame(data.data, columns=data.feature_names)


X.to_csv("iris.csv", index=False)

print(X.head())
print(X.shape)


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)



models = {
    "K-Means": KMeans(n_clusters=3, random_state=42, n_init=10),
    "Hierarchical": AgglomerativeClustering(n_clusters=3)
}

results = {}


for name, model in models.items():

    labels = model.fit_predict(X_scaled)

    sil_score = silhouette_score(X_scaled, labels)
    db_score = davies_bouldin_score(X_scaled, labels)

    results[name] = [sil_score, db_score]


results_df = pd.DataFrame(
    results,
    index=["Silhouette Score", "Davies-Bouldin Score"]
).T

print("\nClustering Model Comparison:\n")
print(results_df)


   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2
(150, 4)

Clustering Model Comparison:

              Silhouette Score  Davies-Bouldin Score
K-Means               0.459948              0.833595
Hierarchical          0.446689              0.803467
