In [2]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt
import seaborn as sns

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

merged_data = transactions.merge(customers, on="CustomerID", how="left").merge(products, on="ProductID", how="left")

customer_features = merged_data.groupby('CustomerID').agg({
    'Price': 'mean',
    'Quantity': 'sum',
    'TotalValue': 'sum'
}).fillna(0)

scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features)

kmeans = KMeans(n_clusters=4, random_state=42)
kmeans.fit(scaled_features)

customer_features['Cluster'] = kmeans.labels_
davies_bouldin = davies_bouldin_score(scaled_features, kmeans.labels_)

print(f"Davies-Bouldin Index: {davies_bouldin}")

plt.figure(figsize=(10, 6))
sns.scatterplot(
    x=scaled_features[:, 0],
    y=scaled_features[:, 1],
    hue=kmeans.labels_,
    palette="viridis",
    legend="full"
)
plt.title("Customer Segmentation")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()

customer_features.to_csv("Customer_Segmentation.csv", index=True)
print("Customer segmentation results saved to Customer_Segmentation.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'Customers.csv'