In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

customer_data = pd.merge(customers_df, transactions_df.groupby('CustomerID')['TotalValue'].sum().reset_index(), on='CustomerID')

customer_data_encoded = pd.get_dummies(customer_data, columns=['Region'], drop_first=True)

scaler = StandardScaler()
customer_data_scaled = scaler.fit_transform(customer_data_encoded.drop(['CustomerID', 'CustomerName', 'SignupDate'], axis=1))

kmeans = KMeans(n_clusters=4, random_state=42)
customer_data['Cluster'] = kmeans.fit_predict(customer_data_scaled)

db_index = davies_bouldin_score(customer_data_scaled, customer_data['Cluster'])
print(f"Davies-Bouldin Index: {db_index:.3f}")

customer_data['SignupDate'] = pd.to_datetime(customer_data['SignupDate'])
customer_data['DaysSinceSignup'] = (customer_data['SignupDate'] - customer_data['SignupDate'].min()).dt.days

plt.figure(figsize=(12, 8))
sns.scatterplot(
    x='TotalValue',
    y='DaysSinceSignup',
    hue='Cluster',
    data=customer_data,
    palette='viridis',
    s=100,
    alpha=0.7
)

plt.title(f"Customer Segments\nDavies-Bouldin Index: {db_index:.3f}", fontsize=16, fontweight='bold')
plt.xlabel('Total Value of Transactions', fontsize=12)
plt.ylabel('Days Since Signup', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.legend(title='Cluster', fontsize=10)
plt.grid(alpha=0.3)

plt.tight_layout()
plt.show()