# Clustering Activity

In [1]:
# Import necessary libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from skfuzzy.cluster import cmeans
from sklearn.metrics import adjusted_rand_score, silhouette_score
import numpy as np

# Generate a hypothetical dataset
X, y_true = make_blobs(n_samples=1000, centers=4, random_state=42, cluster_std=3.0)

Exercise: Evaluate four clustering algorithms (K-Means, Fuzzy C-Means, Spectral, and Agglomerative)
1. Apply each clustering algorithm to the dataset where:
- cluster_count = 4
- random_state = 42 (where applicable)
- do not specify affinity, metric, algo, init, linkage (if applicable)
2. Use the **Adjusted Rand index** and **Silhouette Score** to evaluate the clustering performance.
3. Compare the performance of the algorithms based on the aforementioned metrics and decide which algorithm is best suited for the data given.

In [2]:
# code here...

In [3]:
cluster_count = 4
random_state = 42

In [4]:
# K-Means
kmeans = KMeans(n_clusters=cluster_count, random_state=random_state, n_init=10, max_iter=1000)
kmeans_labels = kmeans.fit_predict(X)

In [5]:
# Spectral Clustering
spectral_clustering = SpectralClustering(n_clusters=cluster_count, random_state=random_state, n_neighbors=10)
spectral_labels = spectral_clustering.fit_predict(X)

In [6]:
# Agglomerative Clustering
agglomerative_clustering = AgglomerativeClustering(n_clusters=cluster_count)
agglomerative_labels = agglomerative_clustering.fit_predict(X)

In [7]:
average_labels = np.mean([kmeans_labels, spectral_labels, agglomerative_labels], axis=0)

In [8]:
fcm = cmeans(X, c=cluster_count, m=2, error=0.005, maxiter=1000)
fcm_labels = np.argmax(fcm[0], axis=1)

In [9]:
silhouette_scores = {
    'KMeans': silhouette_score(X, kmeans_labels),
    'Fuzzy C-Means': silhouette_score(X, average_labels),
    'Spectral Clustering': silhouette_score(X, spectral_labels),
    'Agglomerative Clustering': silhouette_score(X, agglomerative_labels),
}

ari_scores = {
    'KMeans': adjusted_rand_score(y_true, kmeans_labels),
    'Fuzzy C-Means': adjusted_rand_score(y_true, average_labels),
    'Spectral Clustering': adjusted_rand_score(y_true, spectral_labels),
    'Agglomerative Clustering': adjusted_rand_score(y_true, agglomerative_labels),
}



In [10]:
print('Adjusted Rand Index:')
for algorithm, score in ari_scores.items():
    print(f'{algorithm}: {score}')

print('\nSilhouette Score:')
for algorithm, score in silhouette_scores.items():
    print(f'{algorithm}: {score}')

Adjusted Rand Index:
KMeans: 0.7645768468484858
Fuzzy C-Means: 0.4788789093552861
Spectral Clustering: 0.6405186110096376
Agglomerative Clustering: 0.640307343092939

Silhouette Score:
KMeans: 0.4671480417353004
Fuzzy C-Means: -0.03838142872719743
Spectral Clustering: 0.4668562547059353
Agglomerative Clustering: 0.41900657021234533
