In [6]:
# -*- coding: utf-8 -*-
"""
Build multilayer network from CSV (u,v,layer,...) and call IterModMax's
it_mod_max_multilevel via MATLAB Engine — NO argparse version.

Usage:
  1) 修改下方“用户可配置参数”
  2) python run_iter_mod_max_multilevel.py
"""

import os
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
import matlab.engine

# =========================
# 用户可配置参数（直接改这里）
# =========================
CSV_PATH      = r"sync_data/test.csv"            # 你的CSV：包含列 u,v,layer,u_label,v_label（仅用前三列）
ITER_MOD_MAX  = r"IterModMax"    # 例如："/home/me/code/IterModMax"；若已在MATLAB路径可设为 None
GENLOUVAIN    = r"GenLouvain"    # 例如："/home/me/code/GenLouvain"；若已在MATLAB路径可设为 None
GAMMA0        = 1.0                       # 初始分辨率
OMEGA0        = 1.0                       # 初始层间耦合
BETA0        = 1.0   # multilevel 的 β 初值
UNDIRECTED    = True                      # True：把边当无向；False：保留有向
BINARIZE      = True                      # True：多重边计为1；False：保留计数权重
# =========================


def _remap_nodes(edges: pd.DataFrame):
    all_nodes = pd.Index(edges['u']).append(pd.Index(edges['v'])).unique()
    mapping = {node: i for i, node in enumerate(all_nodes, start=1)}  # 1-based for MATLAB
    edges = edges.assign(
        u_idx=edges['u'].map(mapping).astype(int),
        v_idx=edges['v'].map(mapping).astype(int)
    )
    return edges, mapping

def _build_layer_mats(edges: pd.DataFrame, undirected: bool, binarize: bool):
    layers = []
    layer_ids = {}
    n = int(max(edges['u_idx'].max(), edges['v_idx'].max()))
    for lid, (layer_value, df) in enumerate(edges.groupby('layer'), start=1):
        if undirected:
            dfu = df.loc[df['u_idx'] != df['v_idx'], ['u_idx','v_idx']]
            sym = pd.concat([dfu, dfu.rename(columns={'u_idx':'v_idx','v_idx':'u_idx'})], ignore_index=True)
            rows = sym['u_idx'].to_numpy() - 1
            cols = sym['v_idx'].to_numpy() - 1
            data = np.ones(len(rows), dtype=float)
        else:
            dfu = df.loc[:, ['u_idx','v_idx']]
            rows = dfu['u_idx'].to_numpy() - 1
            cols = dfu['v_idx'].to_numpy() - 1
            data = np.ones(len(rows), dtype=float)

        A = coo_matrix((data, (rows, cols)), shape=(n, n))
        if binarize:
            # 把多重边二值化
            A = coo_matrix((np.ones_like(A.data), (A.row, A.col)), shape=A.shape).tocsr()
            A.data[:] = 1.0
            A = A.tocoo()
        layers.append(A)
        layer_ids[layer_value] = lid
    return layers, layer_ids, n

def _scipy_to_matlab_sparse(A: coo_matrix):
    A = A.tocoo()
    i = matlab.double((A.row + 1).astype(float).tolist())  # 1-based
    j = matlab.double((A.col + 1).astype(float).tolist())
    v = matlab.double(A.data.astype(float).tolist())
    m, n = A.shape
    return i, j, v, float(m), float(n)

def _build_matlab_cell_of_sparse(eng, layers):
    """
    在 MATLAB 端创建 sparse，并存入 A_cell（留在 MATLAB workspace）。
    避免返回 MATLAB 稀疏矩阵到 Python。
    """
    L = len(layers)
    eng.eval(f"A_cell = cell(1,{L});", nargout=0)

    for idx, A in enumerate(layers, start=1):
        A = A.tocoo()
        i = matlab.double((A.row + 1).astype(float).tolist())  # 1-based
        j = matlab.double((A.col + 1).astype(float).tolist())
        v = matlab.double(A.data.astype(float).tolist())
        m, n = A.shape

        # 用以字母开头的临时变量名
        eng.workspace['tmp_i'] = i
        eng.workspace['tmp_j'] = j
        eng.workspace['tmp_v'] = v
        eng.workspace['tmp_m'] = float(m)
        eng.workspace['tmp_n'] = float(n)

        # 在 MATLAB 里构造稀疏矩阵并放入 A_cell
        eng.eval("TMP_A = sparse(tmp_i, tmp_j, tmp_v, tmp_m, tmp_n);", nargout=0)
        eng.eval(f"A_cell{{{idx}}} = TMP_A;", nargout=0)

    # 清理
    eng.eval("clear TMP_A tmp_i tmp_j tmp_v tmp_m tmp_n;", nargout=0)

    # 不把 A_cell 拿回 Python，后续直接用 workspace 里的 A_cell
    return None

def main():
    # === 读取CSV ===
    df = pd.read_csv(CSV_PATH)
    for col in ['u','v','layer']:
        if col not in df.columns:
            raise ValueError(f"CSV缺少必须列: {col}")

    # === 重映射节点到1..N（MATLAB友好）===
    df_idx, node_map = _remap_nodes(df)

    # === 按层构建稀疏邻接矩阵 ===
    layers, layer_ids, N = _build_layer_mats(df_idx, UNDIRECTED, BINARIZE)
    L = len(layers)
    if L == 0:
        raise ValueError("未从CSV中解析到任何层（layer）。")

    print(f"Nodes: {N}  Layers: {L}")

    # === 启动 MATLAB 引擎 ===
    print("Starting MATLAB engine...")
    eng = matlab.engine.start_matlab()

    # 可选：加入路径（一定要递归！）
    if GENLOUVAIN:
        if not os.path.isdir(GENLOUVAIN):
            raise FileNotFoundError(f"GenLouvain路径不存在: {GENLOUVAIN}")
        eng.addpath(eng.genpath(GENLOUVAIN), nargout=0)  # 递归加入
        print(f"Added GenLouvain (recursive): {GENLOUVAIN}")

    if ITER_MOD_MAX:
        if not os.path.isdir(ITER_MOD_MAX):
            raise FileNotFoundError(f"IterModMax路径不存在: {ITER_MOD_MAX}")
        eng.addpath(eng.genpath(ITER_MOD_MAX), nargout=0)  # 递归加入
        print(f"Added IterModMax (recursive): {ITER_MOD_MAX}")
        
    # === 生成 MATLAB cell of sparse ===
    _build_matlab_cell_of_sparse(eng, layers)

    # === 在 MATLAB 端完成调用（避免把含稀疏的 A_cell 带回 Python）===
    print("Running it_mod_max_multilevel ...")

    eng.workspace['gamma0_py'] = float(GAMMA0)
    eng.workspace['omega0_py'] = float(OMEGA0)
    eng.workspace['beta0_py']  = float(BETA0)

    eng.eval(
        "[gamma_out, omega_out, params_out, S_out, Q_out, conv_out] = " 
        "it_mod_max_multilevel(A_cell, gamma0_py, omega0_py, beta0_py);",
        nargout=0
    )
    # 保险起见：若 S_out 是稀疏，转成 full 再取回 Python
    eng.eval("S_out = full(S_out);", nargout=0)

    # 现在只把不会出问题的结果从 MATLAB 拿回 Python
    gamma = eng.workspace['gamma_out']      # 标量
    omega = eng.workspace['omega_out']      # 标量
    S     = eng.workspace['S_out']          # 致密矩阵 (N x L)
    Q     = eng.workspace['Q_out']          # 标量
    converged = eng.workspace['conv_out']   # 逻辑值


    # === 结果后处理 ===
    S_np = np.array(S)  # 期望形状 (N, L)
    if S_np.ndim == 1:
        S_np = S_np[:, None]
    nS, lS = S_np.shape
    if nS != N or lS != L:
        print(f"[警告] 返回的S形状与期望不一致：got {S_np.shape}, expected ({N},{L})")

    inv_nodes = {new: old for old, new in node_map.items()}
    inv_layer = {lid: layer_val for layer_val, lid in layer_ids.items()}

    rows = []
    for j in range(lS):
        layer_val = inv_layer.get(j+1, j+1)
        for i in range(nS):
            rows.append((inv_nodes.get(i+1, i+1), layer_val, int(S_np[i, j])))
    df_S = pd.DataFrame(rows, columns=['node', 'layer', 'community'])

    # === 输出保存 ===
    out_base = os.path.splitext(os.path.basename(CSV_PATH))[0]
    df_S.to_csv(f"{out_base}_multilevel_assignments.csv", index=False)
    with open(f"{out_base}_multilevel_summary.txt", "w", encoding="utf-8") as f:
        f.write(f"gamma*: {gamma}\nomega*: {omega}\nconverged: {bool(converged)}\nQ: {Q}\n")

    print("\n=== IterModMax multilevel results ===")
    print(f"gamma*: {gamma}, omega*: {omega}, converged: {bool(converged)}")
    print(f"Q (modularity): {Q}")
    print("Saved:",
          f"{out_base}_multilevel_assignments.csv",
          f"{out_base}_multilevel_summary.txt", sep="\n  - ")

if __name__ == "__main__":
    main()

Nodes: 11  Layers: 3
Starting MATLAB engine...
Added GenLouvain (recursive): GenLouvain
Added IterModMax (recursive): IterModMax
Running it_mod_max_multilevel ...
Initialisation: gamma = 1.00, omega = 1.00


此类型的变量不支持使用花括号进行索引。

出错 multilevel (第 119 行)
  jj(indx+1:indx+N(t+1)) = pi_map{t};  
                           ^^^^^^^^^
出错 it_mod_max_multilevel (第 135 行)
        [B, twom] = multilevel(A, Pi, gamma, omega);  
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^



MatlabExecutionError: 
  File /Users/peijiezhong/Desktop/research/multilayer/IterModMax/HelperFunctions/multilevel.m, line 119, in multilevel

  File /Users/peijiezhong/Desktop/research/multilayer/IterModMax/it_mod_max_multilevel.m, line 135, in it_mod_max_multilevel
此类型的变量不支持使用花括号进行索引。


NameError: name 'eng' is not defined