In [35]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN, SpectralClustering
from sklearn.metrics import silhouette_score
import warnings
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'svg'
%matplotlib qt5
from sklearn.metrics import davies_bouldin_score, calinski_harabasz_score, silhouette_score


iris = datasets.load_iris()
X = iris.data


clusters_range = range(2, 10)
epsilon_values = [0.2, 0.3, 0.4, 0.5]  
min_samples = [3, 4, 5]  

kmeans_results_sil_score = []
kmeans_results_db_score = []
kmeans_results_ch_score = []




In [36]:
# K-means and Spectral Clustering
for n_clusters in clusters_range:

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(X)
    sil_score = silhouette_score(X, kmeans_labels)
    db_score = davies_bouldin_score(X, kmeans_labels)
    ch_score = calinski_harabasz_score(X, kmeans_labels)

    kmeans_results_sil_score.append((n_clusters, sil_score))
    kmeans_results_db_score.append((n_clusters, db_score))
    kmeans_results_ch_score.append((n_clusters, ch_score))
print(kmeans_results_sil_score)
print(kmeans_results_db_score)
print(kmeans_results_ch_score)  
    

[(2, 0.6810461692117462), (3, 0.5528190123564095), (4, 0.49805050499728726), (5, 0.49123997967644767), (6, 0.3648340039670025), (7, 0.35429788771988485), (8, 0.3487345377219387), (9, 0.31250440839683896)]
[(2, 0.40429283717304215), (3, 0.6619715465007497), (4, 0.7803069838811083), (5, 0.8159888282066943), (6, 0.9141579726538106), (7, 0.978573014726016), (8, 0.9518866198522946), (9, 1.0323863139420475)]
[(2, 513.9245459802768), (3, 561.62775662962), (4, 530.7658081872851), (5, 495.3699060237615), (6, 473.8506068330425), (7, 449.53634155424317), (8, 438.425530468372), (9, 406.8673000948424)]


In [None]:

spectral_results_sil_score = []
spectral_results_db_score = []
spectral_results_ch_score = []

for n_clusters in clusters_range:
    spectral = SpectralClustering(
        n_clusters=n_clusters, random_state=42, affinity='nearest_neighbors')
    spectral_labels = spectral.fit_predict(X)
    sil_score = silhouette_score(X, spectral_labels)
    db_score = davies_bouldin_score(X, spectral_labels)
    ch_score = calinski_harabasz_score(X, spectral_labels)

    spectral_results_sil_score.append((n_clusters, sil_score))
    spectral_results_db_score.append((n_clusters, db_score))
    spectral_results_ch_score.append((n_clusters, ch_score))

print(spectral_results_sil_score)
print(spectral_results_db_score)
print(spectral_results_ch_score)


[(2, 0.6867350732769777), (3, 0.554160858028285), (4, 0.4944696226639279), (5, 0.36889996241754375), (6, 0.3652374850654385), (7, 0.3525371983676508), (8, 0.32254663614494744), (9, 0.3093490960578848)]
[(2, 0.3827528421006905), (3, 0.6584442783224301), (4, 0.792963381520259), (5, 0.9261524324087838), (6, 0.934577780159754), (7, 0.9704702277177838), (8, 1.026944450260528), (9, 1.0177469593710167)]
[(2, 502.82156350235897), (3, 556.879541917953), (4, 523.0982068284292), (5, 451.2219241858261), (6, 469.17746699794236), (7, 441.2962024124759), (8, 400.7565814970956), (9, 393.66440161422)]


In [None]:

spectral_results_sil_score = []
spectral_results_db_score = []
spectral_results_ch_score = []

for n_clusters in clusters_range:
    spectral = SpectralClustering(
        n_clusters=n_clusters, random_state=42, affinity='nearest_neighbors')
    spectral_labels = spectral.fit_predict(X)
    sil_score = silhouette_score(X, spectral_labels)
    db_score = davies_bouldin_score(X, spectral_labels)
    ch_score = calinski_harabasz_score(X, spectral_labels)

    spectral_results_sil_score.append((n_clusters, sil_score))
    spectral_results_db_score.append((n_clusters, db_score))
    spectral_results_ch_score.append((n_clusters, ch_score))

print(spectral_results_sil_score)
print(spectral_results_db_score)
print(spectral_results_ch_score)


[(2, 0.6867350732769777), (3, 0.554160858028285), (4, 0.4944696226639279), (5, 0.36889996241754375), (6, 0.3652374850654385), (7, 0.3525371983676508), (8, 0.32254663614494744), (9, 0.3093490960578848)]
[(2, 0.3827528421006905), (3, 0.6584442783224301), (4, 0.792963381520259), (5, 0.9261524324087838), (6, 0.934577780159754), (7, 0.9704702277177838), (8, 1.026944450260528), (9, 1.0177469593710167)]
[(2, 502.82156350235897), (3, 556.879541917953), (4, 523.0982068284292), (5, 451.2219241858261), (6, 469.17746699794236), (7, 441.2962024124759), (8, 400.7565814970956), (9, 393.66440161422)]


In [41]:

spectral_results_sil_score = []
spectral_results_db_score = []
spectral_results_ch_score = []

for n_clusters in clusters_range:
    spectral = SpectralClustering(
        n_clusters=n_clusters, random_state=42, affinity='nearest_neighbors')
    spectral_labels = spectral.fit_predict(X)
    sil_score = silhouette_score(X, spectral_labels)
    db_score = davies_bouldin_score(X, spectral_labels)
    ch_score = calinski_harabasz_score(X, spectral_labels)

    spectral_results_sil_score.append((n_clusters, sil_score))
    spectral_results_db_score.append((n_clusters, db_score))
    spectral_results_ch_score.append((n_clusters, ch_score))

print(spectral_results_sil_score)
print(spectral_results_db_score)
print(spectral_results_ch_score)


[(2, 0.6867350732769777), (3, 0.554160858028285), (4, 0.4944696226639279), (5, 0.36889996241754375), (6, 0.3652374850654385), (7, 0.3525371983676508), (8, 0.32254663614494744), (9, 0.3093490960578848)]
[(2, 0.3827528421006905), (3, 0.6584442783224301), (4, 0.792963381520259), (5, 0.9261524324087838), (6, 0.934577780159754), (7, 0.9704702277177838), (8, 1.026944450260528), (9, 1.0177469593710167)]
[(2, 502.82156350235897), (3, 556.879541917953), (4, 523.0982068284292), (5, 451.2219241858261), (6, 469.17746699794236), (7, 441.2962024124759), (8, 400.7565814970956), (9, 393.66440161422)]


In [31]:
# DBSCAN
dbscan_results_sil_score = []
dbscan_results_db_score = []
dbscan_results_ch_score=[]

for eps in epsilon_values:
    for min_pts in min_samples:
        dbscan = DBSCAN(eps=eps, min_samples=min_pts)
        dbscan_labels = dbscan.fit_predict(X)
        if len(set(dbscan_labels)) > 1:  # Valid cluster found
            sil_score = silhouette_score(X, dbscan_labels)
            db_score = davies_bouldin_score(X, dbscan_labels)
            ch_score = calinski_harabasz_score(X, dbscan_labels)

        else:
            sil_score = -1  # Invalid clustering
            db_score = -1
            ch_score = -1
        dbscan_results_sil_score.append((eps, min_pts, sil_score))
        dbscan_results_db_score.append((eps, min_pts, db_score))   
        dbscan_results_ch_score.append((eps, min_pts, ch_score))
print(dbscan_results_sil_score)
print(dbscan_results_db_score)
print(dbscan_results_ch_score)



[(0.2, 3, -0.3435295742459988), (0.2, 4, -0.3298054383391399), (0.2, 5, 0.18256428323870055), (0.3, 3, 0.031696721278817), (0.3, 4, -0.04646912371526451), (0.3, 5, -0.05206429641095596), (0.4, 3, 0.3349357629247676), (0.4, 4, 0.3252554808454046), (0.4, 5, 0.27794001394439843), (0.5, 3, 0.34673727351549816), (0.5, 4, 0.3812453990825315), (0.5, 5, 0.48603419703456857)]
[(0.2, 3, 2.5010199425842976), (0.2, 4, 3.1902133744717176), (0.2, 5, 0.6275995822540149), (0.3, 3, 3.2818811238161842), (0.3, 4, 3.6597817209180046), (0.3, 5, 2.4009421098997565), (0.4, 3, 2.9154310562407466), (0.4, 4, 2.817174817472728), (0.4, 5, 3.893826840804742), (0.5, 3, 2.9507229738798264), (0.5, 4, 1.7346688089128244), (0.5, 5, 7.222448016359593)]
[(0.2, 3, 10.62915533120602), (0.2, 4, 13.395567320624231), (0.2, 5, 18.623578246086847), (0.3, 3, 21.90619482666317), (0.3, 4, 31.217240523499086), (0.3, 5, 55.41857537141078), (0.4, 3, 123.18855497696), (0.4, 4, 122.47971273080236), (0.4, 5, 97.56349179124388), (0.5, 3,

In [32]:
# AGNES (Hierarchical clustering)
agnes_results_sil_score = []
agnes_results_db_score = []
agnes_results_ch_score = []

distance_metrics = ['euclidean', 'manhattan']
for n_clusters in clusters_range:
    for metric in distance_metrics:
        agnes = AgglomerativeClustering(
            n_clusters=n_clusters, affinity=metric, linkage='complete')
        agnes_labels = agnes.fit_predict(X)
        sil_score = silhouette_score(X, agnes_labels)
        db_score = davies_bouldin_score(X, agnes_labels)
        ch_score = calinski_harabasz_score(X, agnes_labels)

        agnes_results_sil_score.append((n_clusters, metric, sil_score))
        agnes_results_db_score.append((n_clusters, metric, db_score))
        agnes_results_ch_score.append((n_clusters, metric, ch_score))
print(agnes_results_sil_score)
print(agnes_results_db_score)
print(agnes_results_ch_score)

[(2, 'euclidean', 0.5159829508133176), (2, 'manhattan', 0.3090678928111287), (3, 'euclidean', 0.5135953221192212), (3, 'manhattan', 0.5543665902228472), (4, 'euclidean', 0.499812829417197), (4, 'manhattan', 0.48555391802542913), (5, 'euclidean', 0.34617396930269523), (5, 'manhattan', 0.47341019169880205)]
[(2, 'euclidean', 0.6567610541416429), (2, 'manhattan', 0.7729197851253998), (3, 'euclidean', 0.6333339304359726), (3, 'manhattan', 0.6532209095848828), (4, 'euclidean', 0.6783707403483058), (4, 'manhattan', 0.7902133047018486), (5, 'euclidean', 0.9106477878980828), (5, 'manhattan', 0.803968689952676)]
[(2, 'euclidean', 280.8392024579737), (2, 'manhattan', 97.14981198159457), (3, 'euclidean', 485.9050227341817), (3, 'manhattan', 552.2561616535166), (4, 'euclidean', 495.18162297307987), (4, 'manhattan', 513.2738759819965), (5, 'euclidean', 414.39248474403956), (5, 'manhattan', 462.2702488596455)]
