In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score, silhouette_score, calinski_harabasz_score
import matplotlib.pyplot as plt

# Preparing data for clustering
segmentation_data = transactions.groupby('CustomerID').agg({'TotalValue': 'sum', 'Quantity': 'sum'}).reset_index()
scaled_segmentation_data = scaler.fit_transform(segmentation_data[['TotalValue', 'Quantity']])

# KMeans clustering
kmeans = KMeans(n_clusters=5, random_state=42)
clusters = kmeans.fit_predict(scaled_segmentation_data)
segmentation_data['Cluster'] = clusters

# Evaluate clustering
db_index = davies_bouldin_score(scaled_segmentation_data, clusters)
silhouette_avg = silhouette_score(scaled_segmentation_data, clusters)
calinski_harabasz = calinski_harabasz_score(scaled_segmentation_data, clusters)

# Print metrics
print(f"Davies-Bouldin Index: {db_index}")
print(f"Silhouette Score: {silhouette_avg}")
print(f"Calinski-Harabasz Index: {calinski_harabasz}")

# Cluster sizes
cluster_sizes = segmentation_data['Cluster'].value_counts()
print("Cluster Sizes:")
print(cluster_sizes)

# Visualizing clusters
plt.scatter(segmentation_data['TotalValue'], segmentation_data['Quantity'], c=segmentation_data['Cluster'], cmap='viridis')
plt.title("Customer Segmentation")
plt.xlabel("Total Value")
plt.ylabel("Quantity")
plt.show()

# Save clustering results
segmentation_data.to_csv("Clustering_Results.csv", index=False)
