In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import MiniBatchKMeans  # MiniBatchKMeans instead of KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files

# Upload file
uploaded = files.upload()

# Load dataset
df = pd.read_csv("9. Customer Segmentation in E-commerce.csv")

# Keep only numeric columns
df = df.select_dtypes(include=['float64', 'int64'])

# Drop missing values
df.dropna(inplace=True)

# Standardize data
scaler = StandardScaler()
scaled = scaler.fit_transform(df)

# Elbow method + Silhouette Scores
inertia = []
silhouette_scores = []
k_range = range(2, 6)  # Reduced k range for faster results

for k in k_range:
    kmeans = MiniBatchKMeans(n_clusters=k, random_state=42, batch_size=100)  # Use MiniBatchKMeans
    labels = kmeans.fit_predict(scaled)
    inertia.append(kmeans.inertia_)
    silhouette_scores.append(silhouette_score(scaled, labels))

# Plot Elbow Method
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(k_range, inertia, '-o')
plt.title('Elbow Method')
plt.xlabel('k')
plt.ylabel('Inertia')

# Plot Silhouette Scores (Accuracy-like)
plt.subplot(1,2,2)
plt.plot(k_range, silhouette_scores, '-o', color='green')
plt.title('Silhouette Scores')
plt.xlabel('k')
plt.ylabel('Score')
plt.tight_layout()
plt.show()

# ✅ Use best k based on silhouette (or manually choose)
best_k = k_range[silhouette_scores.index(max(silhouette_scores))]
print(f"🔍 Best k based on silhouette score: {best_k}")

# Fit KMeans with best k
kmeans = MiniBatchKMeans(n_clusters=best_k, random_state=42, batch_size=100)  # Use MiniBatchKMeans
df['Cluster'] = kmeans.fit_predict(scaled)

# Optional: Plot just the cluster centers instead of pairplot
centroids = pd.DataFrame(scaler.inverse_transform(kmeans.cluster_centers_), columns=df.columns)
print("📊 Cluster Centers:\n", centroids)

# Optional: show silhouette score
print(f"✅ Silhouette Score for k={best_k}: {max(silhouette_scores):.4f}")


Saving 9. Customer Segmentation in E-commerce.csv to 9. Customer Segmentation in E-commerce (3).csv
