In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score

# Load the dataset
df = pd.read_csv("Incomplete payment dataset.csv")
df = df.drop(columns=['MARK'])

# Standardize the features
scaler = StandardScaler()
df[['y_T', 'Alpha_T', 't_T', 'p_T']] = scaler.fit_transform(df[['y', 'Alpha', 't', 'p']])

# Calculate silhouette scores and Davies-Bouldin scores for different numbers of clusters
silhouette_scores = []
davies_bouldin_scores = []
cluster_range = range(2, 11)  # Both metrics are not defined for n_clusters=1

for n_clusters in cluster_range:
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(df[['y_T', 'Alpha_T', 't_T', 'p_T']])
    
    # Calculate silhouette score
    sil_score = silhouette_score(df[['y_T', 'Alpha_T', 't_T', 'p_T']], cluster_labels)
    silhouette_scores.append(sil_score)
    
    # Calculate Davies-Bouldin score
    db_score = davies_bouldin_score(df[['y_T', 'Alpha_T', 't_T', 'p_T']], cluster_labels)
    davies_bouldin_scores.append(db_score)
    
    print(f"Silhouette Score for {n_clusters} clusters: {sil_score}")
    print(f"Davies-Bouldin Score for {n_clusters} clusters: {db_score}")



Silhouette Score for 2 clusters: 0.36150519730468866
Davies-Bouldin Score for 2 clusters: 1.1764192741923636
Silhouette Score for 3 clusters: 0.3646488925277868
Davies-Bouldin Score for 3 clusters: 0.966805723794539
Silhouette Score for 4 clusters: 0.3549928542235575
Davies-Bouldin Score for 4 clusters: 0.9629175069774729
Silhouette Score for 5 clusters: 0.31283262268714224
Davies-Bouldin Score for 5 clusters: 1.0038288736749417
Silhouette Score for 6 clusters: 0.3287124348335066
Davies-Bouldin Score for 6 clusters: 1.0495427242096207
Silhouette Score for 7 clusters: 0.3399865834670593
Davies-Bouldin Score for 7 clusters: 0.9880789376326341
Silhouette Score for 8 clusters: 0.356804437335951
Davies-Bouldin Score for 8 clusters: 0.9258366221743413
Silhouette Score for 9 clusters: 0.3638440496443739
Davies-Bouldin Score for 9 clusters: 0.9180737335049957
Silhouette Score for 10 clusters: 0.3551010965297117
Davies-Bouldin Score for 10 clusters: 0.9400909816263313
