In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets
customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")

# Merge datasets to include transaction information with customer profiles
customer_data = transactions.merge(customers, on="CustomerID")

# Aggregate data by CustomerID
customer_summary = customer_data.groupby("CustomerID").agg({
    "TotalValue": "sum",  # Total spending
    "Quantity": "sum",    # Total quantity purchased
    "Region": "first"      # Region of the customer
}).reset_index()

# One-hot encode the Region column
customer_summary = pd.get_dummies(customer_summary, columns=["Region"], prefix="Region")

# Standardize the numerical columns
scaler = StandardScaler()
numerical_features = ["TotalValue", "Quantity"]
customer_summary[numerical_features] = scaler.fit_transform(customer_summary[numerical_features])

# Choose the number of clusters
n_clusters = 4

# Apply K-Means clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
customer_summary["Cluster"] = kmeans.fit_predict(customer_summary.drop("CustomerID", axis=1))

# Calculate Davies-Bouldin Index
db_index = davies_bouldin_score(customer_summary.drop(["CustomerID", "Cluster"], axis=1), customer_summary["Cluster"])
print(f"Davies-Bouldin Index: {db_index}")

# Visualize the clusters
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=customer_summary,
    x="TotalValue",
    y="Quantity",
    hue="Cluster",
    palette="viridis",
    s=100
)
plt.title("Customer Segments Based on Clustering")
plt.xlabel("Total Spending (Standardized)")
plt.ylabel("Total Quantity Purchased (Standardized)")
plt.legend(title="Cluster")
plt.show()

# Save cluster assignments to a CSV
customer_summary[["CustomerID", "Cluster"]].to_csv("FirstName_LastName_Clustering.csv", index=False)

print("Clustering completed. Results saved to 'FirstName_LastName_Clustering.csv'.")
