# Clustering Activity

In [6]:
%pip install scikit-fuzzy

Collecting scikit-fuzzy
  Downloading scikit-fuzzy-0.4.2.tar.gz (993 kB)
     ---------------------------------------- 0.0/994.0 kB ? eta -:--:--
     - ------------------------------------- 30.7/994.0 kB 1.3 MB/s eta 0:00:01
     --- ----------------------------------- 92.2/994.0 kB 1.1 MB/s eta 0:00:01
     ---- ------------------------------- 122.9/994.0 kB 901.1 kB/s eta 0:00:01
     -------- ----------------------------- 225.3/994.0 kB 1.3 MB/s eta 0:00:01
     ---------- --------------------------- 266.2/994.0 kB 1.2 MB/s eta 0:00:01
     ------------- ------------------------ 348.2/994.0 kB 1.3 MB/s eta 0:00:01
     -------------- ----------------------- 368.6/994.0 kB 1.3 MB/s eta 0:00:01
     ----------------- -------------------- 450.6/994.0 kB 1.2 MB/s eta 0:00:01
     ------------------- ------------------ 522.2/994.0 kB 1.3 MB/s eta 0:00:01
     --------------------- ---------------- 573.4/994.0 kB 1.3 MB/s eta 0:00:01
     ------------------------ ------------- 634.9/994.

In [7]:
# Import necessary libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics import adjusted_rand_score
import skfuzzy.cluster as fuzz
import numpy as np
from sklearn.metrics import adjusted_rand_score, silhouette_score

# Generate a hypothetical dataset
X, y_true = make_blobs(n_samples=300, centers=4, random_state=42, cluster_std=1.0)

Exercise: Evaluate four clustering algorithms (K-Means, Fuzzy C-Means, Spectral, and Agglomerative)
1. Apply each clustering algorithm to the dataset where:
- cluster_count = 4
- random_state = 42 (where applicable)
- do not specify affinity, metric, algo, init, linkage (if applicable)
2. Use the **Adjusted Rand index** and **Silhouette Score** to evaluate the clustering performance.
3. Compare the performance of the algorithms based on the aforementioned metrics and decide which algorithm is best suited for the data given.

In [8]:
# K-Means
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
kmeans_labels = kmeans.fit_predict(X)
kmeans_ari = adjusted_rand_score(y_true, kmeans_labels)
kmeans_sil = silhouette_score(X, kmeans_labels)

#Fuzzy C-Means
fcm_centers, fcm_labels, _, _, _, _, _ = fuzz.cmeans(X.T, c = 4, m=1.25, error=0.075, maxiter=1000)
fuzzy_labels = np.argmax(fcm_labels, axis=0)
fuzzy_ari = adjusted_rand_score(y_true, fuzzy_labels)
fuzzy_sil = silhouette_score(X, fuzzy_labels)

#Spectral
spectral_labels = SpectralClustering(n_clusters=4, random_state=42).fit_predict(X)
spectral_ari = adjusted_rand_score(y_true, spectral_labels)
spectral_sil = silhouette_score(X, spectral_labels)

# Agglomerative
agglo_labels = AgglomerativeClustering(n_clusters=4).fit_predict(X)
agglo_ari = adjusted_rand_score(y_true, agglo_labels)
agglo_sil = silhouette_score(X, agglo_labels)

# Results

In [9]:
print("Adjusted Rand Index")
print(f"K-Means: {kmeans_ari}")
print(f"Fuzzy C-Means: {fuzzy_ari}")
print(f"Spectral: {spectral_ari}")
print(f"Agglomerative: {agglo_ari}")

print("\n\nSilhouette Score")
print(f"K-Means: {kmeans_sil}")
print(f"Fuzzy C-Means: {fuzzy_sil}")
print(f"Spectral: {spectral_sil}")
print(f"Agglomerative: {agglo_sil}")

Adjusted Rand Index
K-Means: 0.9910811504997546
Fuzzy C-Means: 0.6267323784563009
Spectral: 0.9910811504997546
Agglomerative: 0.9910811504997546


Silhouette Score
K-Means: 0.7915830011443039
Fuzzy C-Means: 0.6212282155627628
Spectral: 0.7915830011443039
Agglomerative: 0.7915830011443039
