In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score

# Load the dataset
df = pd.read_csv("Complete payment dataset.csv")
df = df.drop(columns=['MARK'])

# Standardize the features
scaler = StandardScaler()
df[['y_T', 'Alpha_T', 't_T', 'p_T']] = scaler.fit_transform(df[['y', 'Alpha', 't', 'p']])

# Calculate silhouette scores and Davies-Bouldin scores for different numbers of clusters
silhouette_scores = []
davies_bouldin_scores = []
cluster_range = range(2, 11)  # Both metrics are not defined for n_clusters=1

for n_clusters in cluster_range:
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(df[['y_T', 'Alpha_T', 't_T', 'p_T']])
    
    # Calculate silhouette score
    sil_score = silhouette_score(df[['y_T', 'Alpha_T', 't_T', 'p_T']], cluster_labels)
    silhouette_scores.append(sil_score)
    
    # Calculate Davies-Bouldin score
    db_score = davies_bouldin_score(df[['y_T', 'Alpha_T', 't_T', 'p_T']], cluster_labels)
    davies_bouldin_scores.append(db_score)
    
    print(f"Silhouette Score for {n_clusters} clusters: {sil_score}")
    print(f"Davies-Bouldin Score for {n_clusters} clusters: {db_score}")




Silhouette Score for 2 clusters: 0.3728550264436199
Davies-Bouldin Score for 2 clusters: 0.9831585904240556
Silhouette Score for 3 clusters: 0.3288727815854123
Davies-Bouldin Score for 3 clusters: 1.0610180961787703
Silhouette Score for 4 clusters: 0.30408148072629365
Davies-Bouldin Score for 4 clusters: 1.042078324501569
Silhouette Score for 5 clusters: 0.3066147603137512
Davies-Bouldin Score for 5 clusters: 1.0117329494458727
Silhouette Score for 6 clusters: 0.25705811444843135
Davies-Bouldin Score for 6 clusters: 1.1100211867790515
Silhouette Score for 7 clusters: 0.22420131925158562
Davies-Bouldin Score for 7 clusters: 1.1750122039936437
Silhouette Score for 8 clusters: 0.23085127088340884
Davies-Bouldin Score for 8 clusters: 1.1106852098453204
Silhouette Score for 9 clusters: 0.24572102165771986
Davies-Bouldin Score for 9 clusters: 1.0439062860625858
Silhouette Score for 10 clusters: 0.22964362065655997
Davies-Bouldin Score for 10 clusters: 1.1015911371439118
