# 3. Consensus Clustering
This notebook performs consensus clustering on percolation participation matrices to identify critical nodes across subjects.

## Imports

In [None]:
import os
import numpy as np
import scipy.io
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, fcluster
from scipy.stats import zscore
from scipy.integrate import simpson
from nilearn import plotting

## Define helper functions

In [None]:
def run_consensus_clustering(metric_matrix):
    n_subs, n_nodes = metric_matrix.shape
    consensus_votes = np.zeros(n_nodes)

    for s in range(n_subs):
        x = zscore(metric_matrix[s, :])
        Z = linkage(x[:, None], method='ward')
        labels = fcluster(Z, 2, criterion='maxclust')
        means = [np.mean(x[labels == l]) for l in np.unique(labels)]
        crit_label = np.argmax(means) + 1
        crit_nodes = np.where(labels == crit_label)[0]
        consensus_votes[crit_nodes] += 1

    return consensus_votes / n_subs

def binary_top_15(score):
    threshold = np.percentile(score, 85)
    return (score >= threshold).astype(int)

## Load data

In [None]:
data_dir = '/content/drive/MyDrive/PSI_broadband'
coords_path = os.path.join(data_dir, 'MNI_66_coords.txt')
hub_metrics_path = os.path.join(data_dir, 'hub_metrics_all_subjects.mat')

coords = pd.read_csv(coords_path, sep="\t", header=None).values[:, :3]
n_nodes = coords.shape[0]

file_paths = [os.path.join(data_dir, f) for f in os.listdir(data_dir)
              if f.endswith('broadband_psi_adj_participation_in_percolation.mat')]

## Percolation-based consensus clustering

In [None]:
consensus_votes = np.zeros(n_nodes)

for path in file_paths:
    mat = scipy.io.loadmat(path)
    if 'node_participation_at_percolation' not in mat:
        continue
    x = mat['node_participation_at_percolation'].T

    x_non_nan = ~np.isnan(np.sum(x, axis=0))
    D = np.diff(np.concatenate([[0], x_non_nan.astype(int), [0]]))
    starts = np.where(D == 1)[0]
    ends = np.where(D == -1)[0]
    if len(starts) == 0 or len(ends) == 0:
        continue
    longest = np.argmax(ends - starts)
    x_crop = x[:, starts[longest]:ends[longest]]
    x_crop = np.nan_to_num(x_crop)

    flattening = -np.linspace(1 / x_crop.shape[1], 1, x_crop.shape[1])
    weights = np.linspace(1, 1 / x_crop.shape[1], x_crop.shape[1])
    x_flat = x_crop + flattening[np.newaxis, :]
    x_weighted = x_flat * weights[np.newaxis, :]
    x_weighted = np.clip(x_weighted, 0, None)

    Z = linkage(x_weighted, method='ward')
    labels = fcluster(Z, 2, criterion='maxclust')
    auc_vals = np.array([simpson(x_weighted[i, :]) for i in range(x_weighted.shape[0])])
    crit_label = np.argmax([np.mean(auc_vals[labels == l]) for l in np.unique(labels)])
    crit_nodes = np.where(labels == crit_label + 1)[0]
    consensus_votes[crit_nodes] += 1

n_subs = len(file_paths)
consensus_score = consensus_votes / n_subs
crit_bin = binary_top_15(consensus_score)

## Plot consensus result

In [None]:
highlight_vals = consensus_score * crit_bin
plotting.plot_markers(
    node_values=highlight_vals,
    node_coords=coords,
    node_cmap='Reds',
    title='Top Nodes via Percolation',
    display_mode='lzr'
)