In [1]:
#import stuff
from sklearn.model_selection import train_test_split
from sklearn.metrics import davies_bouldin_score
from sklearn.metrics import silhouette_score
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import calinski_harabasz_score
import pandas as pd
import numpy as np

#read csv
df = pd.read_csv("iris.csv")

#removing labels
df.drop("species", axis = "columns", inplace = True)

from sklearn import datasets

#Getting true labels
true_labels = datasets.load_iris().target

df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [2]:
#using KMean and Sillhoutte score
from sklearn.cluster import KMeans

#kmean
kmeans = KMeans(n_clusters = 3, random_state = 20, n_init = "auto").fit(df)

#getting labels
kmeans_labels = kmeans.labels_

#calculating silhouette score
silhouette_avg1 = silhouette_score(df, kmeans_labels)

#calculating DBI
dbi_score1 = davies_bouldin_score(df, kmeans_labels)

#calcualting rand score
rand_score1 = adjusted_rand_score(true_labels, kmeans_labels)

#calculating calinksi and harabasz score
ch_score1 = calinski_harabasz_score(df, kmeans_labels)

print("KMean")
print("Silhouette Score:", silhouette_avg1)
print("Davies-Bouldin Index:", dbi_score1)
print("Rand Score:", rand_score1)
print("Calinski and Harabasz Score:", ch_score1)


KMean
Silhouette Score: 0.5525919445499757
Davies-Bouldin Index: 0.662322864989869
Rand Score: 0.7302382722834697
Calinski and Harabasz Score: 560.3999242466401


In [3]:
#using MeanShift and davies bouldin index
from sklearn.cluster import MeanShift

#mean shift
mean_shift = MeanShift().fit(df)

#getting labels
mean_shift_labels = mean_shift.labels_

#calculating silhouette score
silhouette_avg2 = silhouette_score(df, mean_shift_labels)

#calculating DBI
dbi_score2 = davies_bouldin_score(df, mean_shift_labels)

#calcualting rand score
rand_score2 = adjusted_rand_score(true_labels, mean_shift_labels)

#calculating calinksi and harabasz score
ch_score2 = calinski_harabasz_score(df, mean_shift_labels)

print("Mean shift")
print("Silhouette Score:", silhouette_avg2)
print("Davies-Bouldin Index:", dbi_score2)
print("Rand Score:", rand_score2)
print("Calinski and Harabasz Score:", ch_score2)

Mean shift
Silhouette Score: 0.68548158489118
Davies-Bouldin Index: 0.3892910671661126
Rand Score: 0.5583714437541352
Calinski and Harabasz Score: 508.8821831629575


In [4]:
#using agglomerative clustering and rand score
from sklearn.cluster import AgglomerativeClustering

#agglomerative clustering
agglomerative_clustering = AgglomerativeClustering(n_clusters = 3).fit(df)

#getting clustered labels
agglomerative_labels = agglomerative_clustering.labels_

#calculating silhouette score
silhouette_avg3 = silhouette_score(df, agglomerative_labels)

#calculating DBI
dbi_score3 = davies_bouldin_score(df, agglomerative_labels)

#calcualting rand score
rand_score3 = adjusted_rand_score(true_labels, agglomerative_labels)

#calculating calinksi and harabasz score
ch_score3 = calinski_harabasz_score(df, agglomerative_labels)

print("Agglomerative clustering")
print("Silhouette Score:", silhouette_avg3)
print("Davies-Bouldin Index:", dbi_score3)
print("Rand Score:", rand_score3)
print("Calinski and Harabasz Score:", ch_score3)


Agglomerative clustering
Silhouette Score: 0.5540972908150553
Davies-Bouldin Index: 0.6566044124178414
Rand Score: 0.7311985567707746
Calinski and Harabasz Score: 556.841121636393


In [5]:
#using spectral clustering and calinski and harabasz score
from sklearn.cluster import SpectralClustering

#spectral clustering
spectral_clustering = SpectralClustering(n_clusters = 3,random_state = 20).fit(df)

#getting labels
spectral_labels = spectral_clustering.labels_

#calculating silhouette score
silhouette_avg4 = silhouette_score(df, spectral_labels)

#calculating DBI
dbi_score4 = davies_bouldin_score(df, spectral_labels)

#calcualting rand score
rand_score4 = adjusted_rand_score(true_labels, spectral_labels)

#calculating calinksi and harabasz score
ch_score4 = calinski_harabasz_score(df, spectral_labels)

print("Spectral clustering")
print("Silhouette Score:", silhouette_avg4)
print("Davies-Bouldin Index:", dbi_score4)
print("Rand Score:", rand_score4)
print("Calinski and Harabasz Score:", ch_score4)

Spectral clustering
Silhouette Score: 0.5550802152744265
Davies-Bouldin Index: 0.6542059421635233
Rand Score: 0.7455038681804481
Calinski and Harabasz Score: 554.9066856862854


In [None]:
#Silhoutte score from best to worst
#Mean shift -> Spectral -> Agglomerative -> Kmeans

#DBI from best to worst
#Mean shift -> Spectral -> Agglomerative -> Kmeans

#Rand score from best to worst
#Spectral -> Agglomerative -> Kmeans -> Mean shift

#Calinksi and Harabasz score from best to worst
#Kmeans -> Agglomerative -> Spectral -> Mean shift