# 04 - Fuzzy K-Means Clustering and ROI Recommendation

This notebook applies fuzzy c-means clustering to PCA-reduced features
to group similar regions and recommend ROIs for detailed analysis.

1. Fuzzy c-means clustering
2. Membership degree analysis
3. Optimal cluster selection (fuzzy partition coefficient)
4. ROI recommendation based on cluster centroids

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import skfuzzy as fuzz
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

%matplotlib inline

## Generate synthetic PCA-reduced data

Simulates 60 segmented regions projected into 3 principal components
with three natural groupings.

In [None]:
rng = np.random.default_rng(99)
centers = np.array([[2, 1, 0], [-1, 3, 1], [0, -2, 2]], dtype=float)
n_per_group = 20

data_list = []
for c in centers:
    data_list.append(c + rng.normal(0, 0.6, size=(n_per_group, 3)))

X = np.vstack(data_list)  # (60, 3)
print(f"Data shape: {X.shape}")

## 1. Fuzzy c-means clustering

`skfuzzy.cmeans` expects data as `(n_features, n_samples)`,
so we transpose.

In [None]:
n_clusters = 3
cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(
    X.T, c=n_clusters, m=2.0, error=1e-5, maxiter=500, seed=42
)

print(f"Fuzzy partition coefficient (FPC): {fpc:.4f}")
print(f"Membership matrix shape: {u.shape}  (clusters x samples)")

hard_labels = np.argmax(u, axis=0)
print(f"Hard label distribution: {np.bincount(hard_labels)}")

## 2. Membership degree analysis

In [None]:
fig, axes = plt.subplots(1, n_clusters, figsize=(5 * n_clusters, 4))
for k in range(n_clusters):
    axes[k].hist(u[k], bins=20, edgecolor="black")
    axes[k].set_title(f"Cluster {k} membership")
    axes[k].set_xlabel("Membership degree")
    axes[k].set_ylabel("Count")
plt.tight_layout()
plt.show()

# Identify ambiguous regions (max membership < 0.6)
max_membership = u.max(axis=0)
ambiguous = np.where(max_membership < 0.6)[0]
print(f"Ambiguous regions (max membership < 0.6): {len(ambiguous)} of {X.shape[0]}")

## 3. Optimal cluster selection

Sweep over candidate cluster counts and select the one with highest FPC.

In [None]:
fpcs = []
k_range = range(2, 8)
for k in k_range:
    _, _, _, _, _, _, fpc_k = fuzz.cmeans(
        X.T, c=k, m=2.0, error=1e-5, maxiter=500, seed=42
    )
    fpcs.append(fpc_k)

plt.figure(figsize=(6, 4))
plt.plot(list(k_range), fpcs, "o-")
plt.xlabel("Number of clusters")
plt.ylabel("Fuzzy Partition Coefficient")
plt.title("FPC vs. cluster count")
plt.grid(True, alpha=0.3)
plt.show()

best_k = list(k_range)[np.argmax(fpcs)]
print(f"Optimal cluster count: {best_k} (FPC = {max(fpcs):.4f})")

## 4. ROI recommendation

For each cluster, recommend the region closest to the centroid
as the representative ROI.

In [None]:
print("Recommended ROIs (closest to cluster centroid):")
print(f"{'Cluster':<10} {'Region ID':<12} {'Distance':<12} {'Membership':<12}")
print("-" * 46)

roi_indices = []
for k in range(n_clusters):
    dists = np.linalg.norm(X - cntr[k], axis=1)
    best_idx = np.argmin(dists)
    roi_indices.append(best_idx)
    print(f"{k:<10} {best_idx:<12} {dists[best_idx]:<12.4f} {u[k, best_idx]:<12.4f}")

In [None]:
# Visualize clusters with recommended ROIs highlighted
fig, ax = plt.subplots(figsize=(7, 6))
scatter = ax.scatter(X[:, 0], X[:, 1], c=hard_labels, cmap="Set2",
                     edgecolors="k", s=50, alpha=0.7)
ax.scatter(X[roi_indices, 0], X[roi_indices, 1], c="red",
           marker="*", s=300, zorder=5, label="Recommended ROIs")
ax.scatter(cntr[:, 0], cntr[:, 1], c="black", marker="X",
           s=200, zorder=5, label="Centroids")
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
ax.set_title("Fuzzy c-means clusters with ROI recommendations")
ax.legend()
plt.tight_layout()
plt.show()