In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score
from scipy.cluster.hierarchy import dendrogram, linkage

# Load Iris dataset
iris = load_iris()
iris_data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_data['species'] = iris.target

# EDA for Iris Dataset
sns.pairplot(iris_data, hue='species')
plt.title('Iris Dataset Pair Plot')
plt.show()

# Load Mall Customers dataset
mall_data = pd.read_csv('Mall_Customers.csv')  
mall_data.head()

# EDA for Mall Customers Dataset
plt.figure(figsize=(10, 5))
sns.boxplot(x='Annual Income (k$)', y='Spending Score (1-100)', data=mall_data)
plt.title('Boxplot of Annual Income vs Spending Score')
plt.show()

# Check correlations
correlation_matrix = mall_data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix for Mall Customers Dataset')
plt.show()


In [None]:
# K-Means on Iris Dataset
X_iris = iris_data.drop('species', axis=1)

# Elbow Method
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, random_state=42)
    kmeans.fit(X_iris)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(8, 4))
plt.plot(range(1, 11), wcss)
plt.title('Elbow Method for Optimal K - Iris Dataset')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()

# Choose optimal K (e.g., K=3 based on elbow)
kmeans_iris = KMeans(n_clusters=3, random_state=42)
iris_data['kmeans_cluster'] = kmeans_iris.fit_predict(X_iris)

# Silhouette Score
silhouette_iris = silhouette_score(X_iris, iris_data['kmeans_cluster'])
print(f'Silhouette Score for K-Means (Iris): {silhouette_iris}')

# K-Means on Mall Customers Dataset
X_mall = mall_data[['Annual Income (k$)', 'Spending Score (1-100)']]
wcss_mall = []
for i in range(1, 11):
    kmeans_mall = KMeans(n_clusters=i, random_state=42)
    kmeans_mall.fit(X_mall)
    wcss_mall.append(kmeans_mall.inertia_)

plt.figure(figsize=(8, 4))
plt.plot(range(1, 11), wcss_mall)
plt.title('Elbow Method for Optimal K - Mall Customers Dataset')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()

kmeans_mall = KMeans(n_clusters=5, random_state=42)
mall_data['kmeans_cluster'] = kmeans_mall.fit_predict(X_mall)

# Silhouette Score
silhouette_mall = silhouette_score(X_mall, mall_data['kmeans_cluster'])
print(f'Silhouette Score for K-Means (Mall Customers): {silhouette_mall}')


In [None]:
#Hierarchical Clustering on Iris Dataset
linked_iris = linkage(X_iris, method='ward')
plt.figure(figsize=(10, 5))
dendrogram(linked_iris, labels=iris_data['species'].values, leaf_rotation=90)
plt.title('Dendrogram for Iris Dataset')
plt.show()

# Hierarchical Clustering on Mall Customers Dataset
linked_mall = linkage(X_mall, method='ward')
plt.figure(figsize=(10, 5))
dendrogram(linked_mall)
plt.title('Dendrogram for Mall Customers Dataset')
plt.show()


In [None]:
# Visualization of Clusters
# K-Means Clustering Visualization for Iris
plt.figure(figsize=(10, 6))
sns.scatterplot(x='sepal length (cm)', y='sepal width (cm)', hue='kmeans_cluster', data=iris_data, palette='deep')
plt.title('K-Means Clustering on Iris Dataset')
plt.show()

# K-Means Clustering Visualization for Mall Customers
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Annual Income (k$)', y='Spending Score (1-100)', hue='kmeans_cluster', data=mall_data, palette='deep')
plt.title('K-Means Clustering on Mall Customers Dataset')
plt.show()


In [None]:
# Analyze clusters for Iris
iris_cluster_means = iris_data.groupby('kmeans_cluster').mean()
print("Cluster means for Iris Dataset:")
print(iris_cluster_means)



In [None]:
#Compare performance
print(f"Silhouette Score - K-Means Iris: {silhouette_iris}, K-Means Mall: {silhouette_mall}")
dbi_iris = davies_bouldin_score(X_iris, iris_data['kmeans_cluster'])
dbi_mall = davies_bouldin_score(X_mall, mall_data['kmeans_cluster'])
print(f'Davies-Bouldin Index - K-Means Iris: {dbi_iris}, K-Means Mall: {dbi_mall}')

