In [1]:
pip install node2vec



In [2]:
pip install munkres



In [3]:
import numpy as np
import scipy.sparse as sp
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from node2vec import Node2Vec
import networkx as nx
from sklearn.preprocessing import normalize

In [4]:
%%writefile metrics.py



from munkres import Munkres
import numpy as np
from sklearn import metrics


def cal_clustering_acc(true_label, pred_label):
    # code from https://github.com/hyzhang98/AdaGAE
    l1 = list(set(true_label))
    numclass1 = len(l1)

    l2 = list(set(pred_label))
    numclass2 = len(l2)
    if numclass1 != numclass2:
        print('Class Not equal, Error!!!!')
        return 0

    cost = np.zeros((numclass1, numclass2), dtype=int)
    for i, c1 in enumerate(l1):
        mps = [i1 for i1, e1 in enumerate(true_label) if e1 == c1]
        for j, c2 in enumerate(l2):
            mps_d = [i1 for i1 in mps if pred_label[i1] == c2]

            cost[i][j] = len(mps_d)

    # match two clustering results by Munkres algorithm
    m = Munkres()
    cost = cost.__neg__().tolist()

    indexes = m.compute(cost)

    # get the match results
    new_predict = np.zeros(len(pred_label))
    for i, c in enumerate(l1):
        # correponding label in l2:
        c2 = l2[indexes[i][1]]

        # ai is the index with label==c2 in the pred_label list
        ai = [ind for ind, elm in enumerate(pred_label) if elm == c2]
        new_predict[ai] = c

    acc = metrics.accuracy_score(true_label, new_predict)

    return acc

Writing metrics.py


In [20]:
%%writefile lgcc.py


from sklearn.preprocessing import normalize
import numpy as np
from sklearn.cluster import KMeans
from node2vec import Node2Vec
import networkx as nx
from sklearn.neighbors import NearestNeighbors



def construct_knn_graph(X, k=15, metric="cosine", sigma=1.0):
    """
    Construct k-NN similarity graph for one view
    """
    n = X.shape[0]

    if metric == "cosine":
        X = normalize(X)
        sim = np.dot(X, X.T)
        A = np.zeros((n, n))
        for i in range(n):
            idx = np.argsort(sim[i])[-(k + 1):-1]
            A[i, idx] = sim[i, idx]
    else:
        knn = kneighbors_graph(X, k, mode='distance', include_self=False)
        distances = knn.toarray()
        A = np.exp(-(distances ** 2) / (2 * sigma ** 2))
        A[distances == 0] = 0

    # Symmetrize
    A = 0.5 * (A + A.T)
    return A


def adaptive_graph_fusion(A_list, eps=1e-8):
    """
    Adaptive consensus graph fusion
    """
    V = len(A_list)
    A_bar = sum(A_list) / V

    weights = np.zeros(V)
    for v in range(V):
        weights[v] = 1.0 / (np.linalg.norm(A_list[v] - A_bar, 'fro') + eps)

    weights = weights / np.sum(weights)

    A_consensus = np.zeros_like(A_list[0])
    for v in range(V):
        A_consensus += weights[v] * A_list[v]

    return A_consensus, weights


def node2vec_embedding(A, dim=128, p=1, q=1):
    """
    Node2Vec embedding on consensus graph
    """
    G = nx.from_numpy_array(A)

    node2vec = Node2Vec(
        G,
        dimensions=dim,
        walk_length=30,
        num_walks=200,
        p=p,
        q=q,
        workers=4,
        quiet=True
    )

    model = node2vec.fit(window=10, min_count=1)
    Z = np.array([model.wv[str(i)] for i in range(A.shape[0])])
    return Z


def LGCC(X_views, n_clusters, k=15, metric_list=None, p=1, q=1):
    """
    Lightweight Graph-based Consensus Clustering (LGCC)

    Parameters:
        X_views: list of feature matrices (n x d_v)
        n_clusters: number of clusters
    """
    if metric_list is None:
        metric_list = ["cosine"] * len(X_views)

    # 1. View-wise graph construction
    A_views = []
    for X, metric in zip(X_views, metric_list):
        A = construct_knn_graph(X, k=k, metric=metric)
        A_views.append(A)

    # 2. Adaptive consensus graph fusion
    A_consensus, weights = adaptive_graph_fusion(A_views)

    # 3. Node2Vec embedding
    Z = node2vec_embedding(A_consensus, p=p, q=q)

    # 4. K-means clustering
    kmeans = KMeans(n_clusters=n_clusters, n_init=10, random_state=42)
    labels = kmeans.fit_predict(Z)

    return labels, weights


Overwriting lgcc.py


In [6]:
pip install scipy



In [7]:
import numpy as np
import scipy.io as scio
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score

In [24]:
%%writefile run_lgcc.py

import numpy as np
from lgcc import LGCC
import scipy.io as scio
from metrics import cal_clustering_acc
from sklearn.preprocessing import normalize
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score

def evaluate_clustering(y_true, y_pred):
    acc = cal_clustering_acc(y_true, y_pred)
    nmi = normalized_mutual_info_score(y_true, y_pred)
    ari = adjusted_rand_score(y_true, y_pred)
    return acc, nmi, ari


if __name__ == "__main__":

    # -------- Load dataset (same style as CGL) --------
    data = scio.loadmat("100Leaves.mat")

    label_key = 'y' if 'y' in data.keys() else 'Y'
    y_true = data[label_key].reshape(-1)

    X = data['X']          # shape: (1, V)
    view_num = X.shape[1]

    X_views = []
    for v in range(view_num):
        X_views.append(X[0][v])

    # -------- Parameters --------
    n_clusters = len(np.unique(y_true))
    k = 15
    p = 0.5
    q = 2

    # -------- Run LGCC --------
    labels, view_weights = LGCC(
        X_views,
        n_clusters=n_clusters,
        k=k,
        metric_list=["cosine"] * view_num,
        p=p,
        q=q
    )

    # -------- Evaluation --------
    acc, nmi, ari = evaluate_clustering(y_true, labels)

    print("LGCC Results")
    print(f"ACC: {acc:.4f}")
    print(f"NMI: {nmi:.4f}")
    print(f"ARI: {ari:.4f}")
    print("View Weights:", view_weights)


Overwriting run_lgcc.py


In [25]:
!python run_lgcc.py

LGCC Results
ACC: 0.6831
NMI: 0.8374
ARI: 0.5728
View Weights: [1.]
