In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score

In [None]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [None]:
def clustering():
    # Feature engineering
    customer_data = transactions.groupby('CustomerID').agg({
        'TotalValue': 'sum',
        'Quantity': 'sum',
        'ProductID': 'nunique'
    }).reset_index()

    # Scaling features
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(customer_data.drop('CustomerID', axis=1))

    # K-Means Clustering
    kmeans = KMeans(n_clusters=4, random_state=42)
    customer_data['Cluster'] = kmeans.fit_predict(scaled_data)

    # Calculate DB Index
    db_index = davies_bouldin_score(scaled_data, customer_data['Cluster'])
    print(f"Davies-Bouldin Index: {db_index}")

    # Visualization
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=scaled_data[:, 0], y=scaled_data[:, 1], hue=customer_data['Cluster'], palette='viridis')
    plt.title("Customer Segmentation")
    plt.show()

    # Save cluster data
    customer_data.to_csv('Mohit_Chaudhary_Clustering.csv', index=False)

clustering()