# Customer Segmentation

In [None]:

import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Aggregate transaction data
customer_transactions = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

# Merge with customer profile
customer_profile = pd.merge(customers, customer_transactions, on='CustomerID', how='left').fillna(0)
customer_profile_encoded = pd.get_dummies(customer_profile[['Region']], drop_first=True)

# Standardize features
features = pd.concat([customer_profile[['TotalValue', 'Quantity']], customer_profile_encoded], axis=1)
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# KMeans clustering
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
customer_profile['Cluster'] = kmeans.fit_predict(features_scaled)

# Davies-Bouldin Index
db_index = davies_bouldin_score(features_scaled, customer_profile['Cluster'])
print(f"Davies-Bouldin Index: {db_index}")

# Visualize clusters
pca = PCA(n_components=2)
pca_features = pca.fit_transform(features_scaled)
plt.figure(figsize=(8, 6))
sns.scatterplot(x=pca_features[:, 0], y=pca_features[:, 1],
                hue=customer_profile['Cluster'], palette='Set2', s=50)
plt.title("Customer Segmentation")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend(title="Cluster", loc="best")
plt.grid(True)
plt.show()
