In [3]:
import os
os.environ['OMP_NUM_THREADS'] = '1'  # Fix for Windows OpenBLAS bug

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


ModuleNotFoundError: No module named 'seaborn'

In [None]:
# Load the dataset
df = pd.read_csv("Mall_Customers.csv")

# Select relevant numeric features
features = df[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)


In [None]:
# Find the best K using Silhouette Score
silhouette_scores = []
K_range = range(2, 11)

for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init='auto')
    labels = kmeans.fit_predict(X_scaled)
    score = silhouette_score(X_scaled, labels)
    silhouette_scores.append(score)

# Choose optimal K
optimal_k = K_range[silhouette_scores.index(max(silhouette_scores))]
print(f"Best K: {optimal_k}")

# Fit final model and add cluster labels
kmeans_final = KMeans(n_clusters=optimal_k, random_state=42, n_init='auto')
df['Cluster'] = kmeans_final.fit_predict(X_scaled)

# Reduce dimensions for plotting
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
df['PCA1'] = X_pca[:, 0]
df['PCA2'] = X_pca[:, 1]

# Plot clusters
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PCA1', y='PCA2', hue='Cluster', palette='Set2', s=70)
plt.title(f'Customer Segments (K={optimal_k}) - PCA View')
plt.grid(True)
plt.show()

# Final silhouette score
final_score = silhouette_score(X_scaled, df['Cluster'])
print(f"Silhouette Score: {final_score:.3f}")
