# 🌸 Iris Dataset Segmentation Example

In this notebook, we demonstrate **clustering and segmentation** on the classic Iris dataset. We'll:
1. Cluster the flowers using **K-Means**
2. Profile each resulting segment
3. Interpret the groups to understand the segmentation


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import adjusted_rand_score

In [None]:
# Load dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['true_species'] = iris.target
df['true_species_name'] = pd.Categorical.from_codes(iris.target, iris.target_names)

# Standardize for clustering
X_scaled = StandardScaler().fit_transform(df[iris.feature_names])

In [None]:
# Apply KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(X_scaled)

# Compare with true labels
ari = adjusted_rand_score(df['true_species'], df['cluster'])
print(f'Adjusted Rand Index (vs. true species): {ari:.3f}')

In [None]:
# Profile each cluster
cluster_profile = df.groupby('cluster')[iris.feature_names].mean()
display(cluster_profile)

In [None]:
# Visualize clusters in PCA-reduced 2D space
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
df['PC1'] = X_pca[:, 0]
df['PC2'] = X_pca[:, 1]

plt.figure(figsize=(6, 5))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='cluster', palette='tab10', style='true_species_name', s=60)
plt.title('K-Means Clusters vs. True Species in PCA Space')
plt.grid(True)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()