# 🧠 Clustering: K-Means and DBSCAN for Customer Segmentation

## 📚 Step 1: Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score

## 📥 Step 2: Create Synthetic Customer Data

In [None]:
# Simulated customer data (e.g., Annual Income vs Spending Score)
X, _ = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)

# Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## 🔎 Step 3: K-Means Clustering

### 🔢 Concept:
- Partition data into K clusters.
- Minimize intra-cluster variance.

Objective function:
\[
\text{min} \sum_{i=1}^{k} \sum_{x \in C_i} ||x - \mu_i||^2
\]

In [None]:
# Find Optimal K using Elbow Method
inertia = []
K_range = range(1, 10)
for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=0)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

plt.plot(K_range, inertia, 'bo-')
plt.xlabel('Number of Clusters K')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal K')
plt.show()

### 🛠 Train KMeans with Optimal K

In [None]:
kmeans = KMeans(n_clusters=4, random_state=0)
kmeans.fit(X_scaled)
labels_kmeans = kmeans.labels_

### 📊 Visualize KMeans Clusters

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=labels_kmeans, cmap='viridis')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c='red', marker='X')
plt.title('Customer Segmentation with K-Means')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

## 📚 Step 4: DBSCAN Clustering

### 🔢 Concept:
- Density-based clustering.
- Groups points that are closely packed together.
- Good for arbitrary shaped clusters and outlier detection.

In [None]:
# Train DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan.fit(X_scaled)
labels_dbscan = dbscan.labels_

### 📊 Visualize DBSCAN Clusters

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=labels_dbscan, cmap='plasma')
plt.title('Customer Segmentation with DBSCAN')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

## ✅ Step 5: Summary
- **K-Means** partitions customers into "k" groups minimizing variance.
- **DBSCAN** identifies dense regions and detects outliers.
- Useful for customer segmentation in marketing and personalization strategies.

# End of Notebook