# Iris Clustering (Unsupervised Learning)

This notebook demonstrates clustering on the Iris dataset for the Intermediate level unsupervised learning task.

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the Iris Dataset
iris_df = pd.read_csv('../Data Set For Tasks/Data Set For Task/1) iris.csv')
print(iris_df.head())
print(iris_df.info())

In [None]:
# Preprocessing
X = iris_df.drop('species', axis=1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Elbow Method to find optimal k
inertia = []
k_range = range(1, 11)
for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

plt.plot(k_range, inertia, marker='o')
plt.xlabel('Number of clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method')
plt.show()

In [None]:
# Silhouette Score for different k
silhouette_scores = []
for k in range(2, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    labels = kmeans.fit_predict(X_scaled)
    score = silhouette_score(X_scaled, labels)
    silhouette_scores.append(score)

plt.plot(range(2, 11), silhouette_scores, marker='o')
plt.xlabel('Number of clusters (k)')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Analysis')
plt.show()

In [None]:
# Apply K-Means with k=3
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
iris_df['cluster'] = clusters
print(iris_df.head())

In [None]:
# Visualize Clusters
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

plt.figure(figsize=(8, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=clusters, palette='viridis', s=100)
plt.title('Clusters visualized with PCA')
plt.show()

In [None]:
# Compare Clusters with Actual Species
plt.figure(figsize=(8, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=iris_df['species'], palette='viridis', s=100)
plt.title('Actual Species visualized with PCA')
plt.show()

# Cross-tabulation
print(pd.crosstab(iris_df['species'], iris_df['cluster']))