# Customer Segmentation
This notebook performs customer segmentation using clustering techniques on transaction and profile data.

In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load the datasets
customers = pd.read_csv('/mnt/data/Customers.csv')
transactions = pd.read_excel('/mnt/data/Transactions.xlsx')

# Merge datasets for clustering
customer_transactions = transactions.merge(customers, on="CustomerID")

# Aggregate transaction data
customer_features = customer_transactions.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "Quantity": "sum",
    "TransactionID": "count"
}).rename(columns={"TransactionID": "TransactionCount"}).reset_index()


In [None]:

# Preprocessing and scaling
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

# Perform clustering with KMeans
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
clusters = kmeans.fit_predict(scaled_features)
customer_features['Cluster'] = clusters

# Calculate Davies-Bouldin Index
db_index = davies_bouldin_score(scaled_features, clusters)
print(f"Davies-Bouldin Index: {db_index}")


In [None]:

# Visualize clusters
sns.pairplot(customer_features, hue="Cluster", palette="Set2", diag_kind="kde")
plt.title("Customer Segments")
plt.show()


In [None]:

# Save cluster results
customer_features.to_csv('/mnt/data/Bhavadharani_Haribabu_Clustering.csv', index=False)


In [None]:

# Display cluster data
customer_features.head()
