<a href="https://colab.research.google.com/github/Mehaboob999/notebook-rough/blob/main/03_customer_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler


In [None]:
# Load cleaned data
df = pd.read_csv('/content/cleaned_sales.csv')

# Preview
df.head()


In [None]:
# Group by customer
customer_data = df.groupby('customer_name').agg({
    'revenue': 'sum',
    'order_number': 'nunique',
}).rename(columns={
    'revenue': 'total_revenue',
    'order_number': 'num_orders'
})

# Add average order value
customer_data['avg_order_value'] = customer_data['total_revenue'] / customer_data['num_orders']
customer_data.reset_index(inplace=True)

customer_data.head()

In [None]:
features = ['total_revenue', 'num_orders', 'avg_order_value']
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_data[features])


In [None]:
sse = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(scaled_features)
    sse.append(kmeans.inertia_)

# Plot Elbow Curve
plt.figure(figsize=(8, 4))
plt.plot(range(1, 11), sse, marker='o')
plt.xlabel('Number of Clusters')
plt.ylabel('SSE (Inertia)')
plt.title('Elbow Method for Optimal Clusters')
plt.grid(True)
plt.show()


In [None]:
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
customer_data['cluster'] = kmeans.fit_predict(scaled_features)


In [None]:
# Plot clusters
sns.pairplot(customer_data, hue='cluster', diag_kind='kde')
plt.suptitle('Customer Clusters', y=1.02)
plt.show()


In [None]:
import os

# Create the directory if it doesn't exist
os.makedirs('../data', exist_ok=True)

customer_data.to_csv('../data/customer_clusters.csv', index=False)
print("✅ customer_clusters.csv saved!")