In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from sklearn.metrics import silhouette_score

import warnings
warnings.filterwarnings('ignore')

# Limit threads to prevent resource errors
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# Load dataset
df = pd.read_csv('Mall_Customers.csv')
features = df[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']]

# Scale data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Elbow method to find optimal k for KMeans
wcss = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(scaled_features)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), wcss, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.grid(True)
plt.show()

# Dendrogram for Hierarchical Clustering
linked = linkage(scaled_features, method='ward')
plt.figure(figsize=(10, 7))
dendrogram(linked)
plt.title('Dendrogram')
plt.xlabel('Samples')
plt.ylabel('Distance')
plt.show()

# Apply KMeans with k=5
kmeans = KMeans(n_clusters=5, random_state=42)
df['KMeans_Cluster'] = kmeans.fit_predict(scaled_features)

# Apply Hierarchical Clustering with k=5
df['Hierarchical_Cluster'] = fcluster(linked, 5, criterion='maxclust')

# Visualize KMeans clusters
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x='Annual Income (k$)', y='Spending Score (1-100)', hue='KMeans_Cluster', palette='Set2')
plt.title('K-Means Clustering')
plt.legend(title='Cluster')
plt.show()

# Visualize Hierarchical clusters
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x='Annual Income (k$)', y='Spending Score (1-100)', hue='Hierarchical_Cluster', palette='Set1')
plt.title('Hierarchical Clustering')
plt.legend(title='Cluster')
plt.show()

# Silhouette Scores
kmeans_score = silhouette_score(scaled_features, df['KMeans_Cluster'])
hier_score = silhouette_score(scaled_features, df['Hierarchical_Cluster'])

print(f"Silhouette Score - KMeans: {kmeans_score:.3f}")
print(f"Silhouette Score - Hierarchical: {hier_score:.3f}")

# Cluster analysis
print("\n--- KMeans Cluster Summary ---")
print(df.groupby('KMeans_Cluster')[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']].mean())

print("\n--- Hierarchical Cluster Summary ---")
print(df.groupby('Hierarchical_Cluster')[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']].mean())
