In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import davies_bouldin_score
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import calinski_harabasz_score
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import load_digits

# Load the digits dataset
digits = load_digits()

# Create a DataFrame
digits_df = pd.DataFrame(data = digits.data,columns = digits.feature_names)

# Add the target variable class to the DataFrame
digits_df['target'] = digits.target

# True Labels
true_labels = digits.target

# Print the features and target data
print(digits_df.head())

   pixel_0_0  pixel_0_1  pixel_0_2  pixel_0_3  pixel_0_4  pixel_0_5  \
0        0.0        0.0        5.0       13.0        9.0        1.0   
1        0.0        0.0        0.0       12.0       13.0        5.0   
2        0.0        0.0        0.0        4.0       15.0       12.0   
3        0.0        0.0        7.0       15.0       13.0        1.0   
4        0.0        0.0        0.0        1.0       11.0        0.0   

   pixel_0_6  pixel_0_7  pixel_1_0  pixel_1_1  ...  pixel_6_7  pixel_7_0  \
0        0.0        0.0        0.0        0.0  ...        0.0        0.0   
1        0.0        0.0        0.0        0.0  ...        0.0        0.0   
2        0.0        0.0        0.0        0.0  ...        0.0        0.0   
3        0.0        0.0        0.0        8.0  ...        0.0        0.0   
4        0.0        0.0        0.0        0.0  ...        0.0        0.0   

   pixel_7_1  pixel_7_2  pixel_7_3  pixel_7_4  pixel_7_5  pixel_7_6  \
0        0.0        6.0       13.0       10.0

In [3]:
# Using KMean and Silhoutte score
from sklearn.cluster import KMeans

# Kmean
kmeans = KMeans(n_clusters = 3, random_state = 20, n_init = "auto").fit(digits_df)

# Getting labels
kmeans_labels = kmeans.labels_

# Calculating silhouette score
silhouette_avg1 = silhouette_score(digits_df, kmeans_labels)

# Calculating DBI
dbi_score1 = davies_bouldin_score(digits_df, kmeans_labels)

# Calcualting rand score
rand_score1 = adjusted_rand_score(true_labels, kmeans_labels)

# Calculating calinksi and harabasz score
ch_score1 = calinski_harabasz_score(digits_df, kmeans_labels)

print("KMean")
print("Silhouette Score:", silhouette_avg1)
print("Davies-Bouldin Index:", dbi_score1)
print("Rand Score:", rand_score1)
print("Calinski and Harabasz Score:", ch_score1)

KMean
Silhouette Score: 0.1264364314825275
Davies-Bouldin Index: 2.445935417058955
Rand Score: 0.2216930048692321
Calinski and Harabasz Score: 221.11727444263212


In [23]:
from sklearn.cluster import MeanShift
# MeanShift clustering
mean_shift = MeanShift().fit(digits_df)

# Getting labels
mean_shift_labels = mean_shift.labels_

# Assume you have true labels for comparison (replace true_labels with your actual true labels)
true_labels = digits.target

# Calculating silhouette score
silhouette_avg2 = silhouette_score(digits_df, mean_shift_labels)

# Calculating Davies-Bouldin Index
dbi_score2 = davies_bouldin_score(digits_df, mean_shift_labels)

# Calculating Rand Score
rand_score2 = adjusted_rand_score(true_labels, mean_shift_labels)

# Calculating Calinski and Harabasz Score
ch_score2 = calinski_harabasz_score(digits_df, mean_shift_labels)

print("Mean Shift")
print("Silhouette Score:", silhouette_avg2)
print("Davies-Bouldin Index:", dbi_score2)
print("Rand Score:", rand_score2)
print("Calinski and Harabasz Score:", ch_score2)

ValueError: ignored

In [15]:
from sklearn.cluster import AgglomerativeClustering

# Agglomerative Clustering
agglomerative_clustering = AgglomerativeClustering(n_clusters=3).fit(digits_df)

# Getting labels
agglomerative_labels = agglomerative_clustering.labels_

# Assume you have true labels for comparison (replace true_labels with your actual true labels)
true_labels = digits.target

# Calculating silhouette score
silhouette_avg3 = silhouette_score(digits_df, agglomerative_labels)

# Calculating Davies-Bouldin Index
dbi_score3 = davies_bouldin_score(digits_df, agglomerative_labels)

# Calculating Rand Score
rand_score3 = adjusted_rand_score(true_labels, agglomerative_labels)

# Calculating Calinski and Harabasz Score
ch_score3 = calinski_harabasz_score(digits_df, agglomerative_labels)

print("Agglomerative Clustering")
print("Silhouette Score:", silhouette_avg3)
print("Davies-Bouldin Index:", dbi_score3)
print("Rand Score:", rand_score3)
print("Calinski and Harabasz Score:", ch_score3)

Agglomerative Clustering
Silhouette Score: 0.11077086341414898
Davies-Bouldin Index: 2.7497403469949773
Rand Score: 0.2981967298269896
Calinski and Harabasz Score: 193.63464529209537


In [None]:
from sklearn.cluster import SpectralClustering

# Spectral Clustering
spectral_clustering = SpectralClustering(n_clusters=3, random_state=20).fit(digits_df)

# Getting labels
spectral_labels = spectral_clustering.labels_

# Assume you have true labels for comparison (replace true_labels with your actual true labels)
true_labels = digits.target

# Calculating silhouette score
silhouette_avg4 = silhouette_score(digits_df, spectral_labels)

# Calculating Davies-Bouldin Index
dbi_score4 = davies_bouldin_score(digits_df, spectral_labels)

# Calculating Rand Score
rand_score4 = adjusted_rand_score(true_labels, spectral_labels)

# Calculating Calinski and Harabasz Score
ch_score4 = calinski_harabasz_score(digits_df, spectral_labels)

print("Spectral Clustering")
print("Silhouette Score:", silhouette_avg4)
print("Davies-Bouldin Index:", dbi_score4)
print("Rand Score:", rand_score4)
print("Calinski and Harabasz Score:", ch_score4)



Spectral Clustering
Silhouette Score: 0.012947828868357035
Davies-Bouldin Index: 0.8701680179286883
Rand Score: 5.189135899521916e-07
Calinski and Harabasz Score: 1.3133090439623518


[5.76925926e-06 1.75325957e-05 2.38908724e-05 4.91648292e-04]
not reaching the requested tolerance 2.6777386665344238e-05.
Use iteration 108 instead with accuracy 
0.00013471025486398939.

  _, diffusion_map = lobpcg(
[5.76925927e-06 1.75325958e-05 2.38908724e-05 4.91648292e-04]
not reaching the requested tolerance 2.6777386665344238e-05.
  _, diffusion_map = lobpcg(
