In [1]:
import multilayerGM as gm

  from pkg_resources import get_distribution, DistributionNotFound


In [None]:
n_nodes=10
n_layers = 2 
p=0.01
theta=0.1
n_sets=5
dt = gm.dependency_tensors.UniformMultiplex(n_nodes, n_layers, p)
null = gm.DirichletNull(layers=dt.shape[1:], theta=theta, n_sets=n_sets)
partition = gm.sample_partition(dependency_tensor=dt, null_distribution=null)

In [6]:
print(partition)

{(0, 0): 0, (1, 0): 0, (2, 0): 0, (3, 0): 0, (4, 0): 0, (5, 0): 0, (6, 0): 0, (7, 0): 0, (8, 0): 0, (9, 0): 0, (0, 1): 0, (1, 1): 0, (2, 1): 0, (3, 1): 0, (4, 1): 0, (5, 1): 0, (6, 1): 0, (7, 1): 0, (8, 1): 0, (9, 1): 0}


In [3]:
# -*- coding: utf-8 -*-
"""
生成多层网络并导出 u,v,layer,u_label,v_label
- N:            节点池大小（物理节点的全集为 [0, N-1]）
- coverage:     长度为 L-1 的列表，表示相邻层之间的共享比例（Jaccard：|∩|/|∪|）
- nodes_per_layer: 长度为 L 的列表，表示每一层的节点数
输出: edges.csv
"""

import math
import random
from typing import List, Dict, Set, Tuple
import csv

import numpy as np
import multilayerGM as gm  # 来自仓库


def jaccard_intersection_size(a: int, b: int, c: float) -> int:
    """
    给定集合大小 a、b 和目标 Jaccard c，解 |A∩B| = x，使 x/(a+b-x)=c
    x = c*(a+b)/(1+c)，并限制在 [0, min(a,b)]
    """
    if c < 0 or c > 1:
        raise ValueError("coverage 中的值必须在 [0,1] 之间")
    if a < 0 or b < 0:
        raise ValueError("nodes_per_layer 必须为非负整数")

    if a == 0 or b == 0:
        return 0

    if c == 0:
        return 0
    # 理想值
    x_float = (c * (a + b)) / (1.0 + c)
    # 四舍五入后再裁剪
    x = int(round(x_float))
    x = max(0, min(x, min(a, b)))
    return x


def build_layer_node_sets(
    N: int, coverage: List[float], nodes_per_layer: List[int], seed: int = 42
) -> List[Set[int]]:
    """
    在节点池 [0, N-1] 里为每层挑选节点集合 S_l
    使得相邻层 l 与 l+1 的 Jaccard 近似于 coverage[l]
    """
    rng = random.Random(seed)
    L = len(nodes_per_layer)
    if len(coverage) != L - 1:
        raise ValueError("coverage 的长度必须等于 len(nodes_per_layer)-1")

    if any(n < 0 for n in nodes_per_layer):
        raise ValueError("nodes_per_layer 里不能有负数")
    if any(n > N for n in nodes_per_layer):
        raise ValueError("每层的节点数不能超过节点池大小 N")

    universe = list(range(N))

    # 先随机取第一层
    S = []
    S0 = set(rng.sample(universe, nodes_per_layer[0]))
    S.append(S0)

    # 逐层构造
    for l in range(1, L):
        a = nodes_per_layer[l - 1]
        b = nodes_per_layer[l]
        c = coverage[l - 1]

        prev = S[l - 1]
        # 计算目标交集大小
        x = jaccard_intersection_size(a, b, c)
        # 与前一层交集从 prev 里抽
        if x > len(prev):
            x = len(prev)
        inter = set(rng.sample(list(prev), x))

        # 剩余需要的新元素数
        rest = b - x

        # 优先从未在 prev 中的节点取，避免影响 Jaccard
        candidates = [u for u in universe if u not in prev]
        if len(candidates) >= rest:
            add = set(rng.sample(candidates, rest))
        else:
            # 如果候选不够，就允许从 prev 之外的“已使用过但不在 prev 的层”里取
            used = set().union(*S) if S else set()
            more_candidates = [u for u in universe if (u not in inter)]
            # 保证最终大小为 b
            add = set()
            for u in rng.sample(more_candidates, b - len(inter)):
                add.add(u)
                if len(inter) + len(add) == b:
                    break

        Sl = inter | add
        # 兜底修正（极端情况下可能超或少一两个）
        if len(Sl) > b:
            Sl = set(list(Sl)[:b])
        if len(Sl) < b:
            extra = [u for u in universe if u not in Sl]
            Sl |= set(rng.sample(extra, b - len(Sl)))

        S.append(Sl)

    return S


def sample_layer_labels(
    layer_sets: List[Set[int]],
    n_sets: int = 4,
    theta: float = 1.0,
    seed: int = 42,
) -> Dict[Tuple[int, int], int]:
    """
    为每个状态节点 (node, layer) 采样一个“社区/mesoset”标签
    使用对称 Dirichlet(θ) 产生层内类别分布，然后分类采样
    """
    rng = np.random.default_rng(seed)
    partition: Dict[Tuple[int, int], int] = {}

    for l, nodes in enumerate(layer_sets):
        # Dirichlet 概率
        probs = rng.dirichlet(alpha=[theta] * n_sets)
        # 为该层的每个节点抽签
        for u in nodes:
            label = rng.choice(n_sets, p=probs)
            partition[(u, l)] = int(label)

    return partition


def build_multilayer_network(
    partition: Dict[Tuple[int, int], int],
    mu: float = 0.1,
    k_min: int = 5,
    k_max: int = 70,
    t_k: float = -2.0,
):
    """
    用 MultilayerGM 的 DCSBM 基准模型按给定 partition 生成多层网络
    返回 MultilayerGraph（节点形如 (u, layer)），并带 'mesoset' 属性
    """
    # gm.multilayer_DCSBM_network 会读取节点上的 'mesoset'（我们会在内部设置）
    # 需要把 partition 转换为节点属性字典：
    # 其内部会创建节点并设置 'mesoset'，我们只需传 mapping 即可。
    multinet = gm.multilayer_DCSBM_network(
        partition, mu=mu, k_min=k_min, k_max=k_max, t_k=t_k
    )
    return multinet


def export_edges_csv(
    multinet,
    out_path: str = "edges.csv",
):
    """
    将多层网络的“同层边”导出为 CSV：
    u,v,layer,u_label,v_label
    其中 layer 从 0 开始计数；u_label/v_label 取自节点属性 'mesoset'
    """
    with open(out_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["u", "v", "layer", "u_label", "v_label"])

        # multinet 的节点是 (u, layer) 的元组
        for (u_node, v_node) in multinet.edges():
            # 只导出“同层边”（一般 DCSBM 生成的就是同层边）
            if u_node[1] != v_node[1]:
                continue
            u_phys, layer_u = u_node[0], u_node[1]
            v_phys, layer_v = v_node[0], v_node[1]
            # 取标签
            u_lab = multinet.nodes[u_node].get("mesoset", -1)
            v_lab = multinet.nodes[v_node].get("mesoset", -1)
            writer.writerow([u_phys, v_phys, layer_u, u_lab, v_lab])


if __name__ == "__main__":
    # ======== 示例参数 ========
    N = 500  # 节点池大小
    nodes_per_layer = [200, 150, 220]  # 每层的节点数量
    coverage = [0.3, 0.5]  # 相邻层的交并比（长度需为 L-1）

    # 你也可以在这里接入 argparse，从命令行读入 N / coverage / nodes_per_layer

    # 1) 构造每层节点集合，确保相邻层的 Jaccard 近似为 coverage[i]
    layer_sets = build_layer_node_sets(N, coverage, nodes_per_layer, seed=123)

    # 2) 为 (节点, 层) 采样 mesoset 标签；可通过 n_sets/theta 控制社区数与平滑度
    partition = sample_layer_labels(
        layer_sets, n_sets=4, theta=1.0, seed=123
    )

    # 3) 用仓库的 DCSBM 生成多层网络（只会在同层连边）
    multinet = build_multilayer_network(
        partition, mu=0.1, k_min=5, k_max=70, t_k=-2.0
    )

    # 4) 导出 CSV
    export_edges_csv(multinet, out_path="edges.csv")

    print("Done. 已写出 edges.csv")

Done. 已写出 edges.csv


In [3]:
# -*- coding: utf-8 -*-
"""
从 edges.csv 构建 Raphtory 时序图
- CSV 列: u,v,layer,u_label,v_label
- 约定: layer 作为边的时间戳（timestamp）
"""

import pandas as pd
from raphtory import Graph

def load_csv_to_raphtory(csv_path: str) -> Graph:
    df = pd.read_csv(csv_path)

    # 基本列检测
    required_cols = {"u", "v", "layer", "u_label", "v_label"}
    if not required_cols.issubset(df.columns):
        missing = required_cols - set(df.columns)
        raise ValueError(f"CSV 缺少列: {missing}")

    g = Graph()

    # 逐行添加：先确保节点存在（带上各自的标签），再加边（timestamp=layer）
    for row in df.itertuples(index=False):
        u = int(getattr(row, "u"))
        v = int(getattr(row, "v"))
        ts = int(getattr(row, "layer"))
        u_label = int(getattr(row, "u_label"))
        v_label = int(getattr(row, "v_label"))

        # 可选：把节点标签写成节点属性（在时间 ts 时生效）
        g.add_node(ts, u, properties={"label": u_label})
        g.add_node(ts, v, properties={"label": v_label})

        # 加边：时间戳=layer；也把标签冗余到边属性，便于边级别查询/过滤
        g.add_edge(
            ts,
            u,
            v,
            properties={"u_label": u_label, "v_label": v_label, "layer": ts},
        )

    return g


# 示例：读取当前目录的 edges.csv
g = load_csv_to_raphtory("edges.csv")

# 简单检查：打印图里节点/边数量（截至最大时间点）
print("vertices:", g.count_nodes())
print("edges:", g.count_edges())

g1 = g.at(1)
print("edges up to ts=1:", g1.count_edges())

vertices: 333
edges: 3671
edges up to ts=1: 922


In [30]:
view = g.at(0)
neigh = view.node(id=0).neighbours.id.collect()
print(neigh)

[98, 269, 350, 322, 444, 278]


In [2]:
from collections import defaultdict

def neighbors_in_layer(g, node: int, layer: int):
    view = g.at(layer)
    n = view.node(id=node)
    if n == None:
        return list()
    return list(view.node(id=node).neighbours.id.collect())

def neighbor_distribution(g, node: int, layer: int, alpha: float = 0.5):
    assert 0.0 <= alpha <= 1.0, "alpha 必须在 [0,1]"
    nbrs = neighbors_in_layer(g, node, layer)
    k = len(nbrs)

    dist = defaultdict(float)
    if k == 0:
        dist[node] = 1.0     # 无邻居则全留在自身
        return dist

    dist[node] = alpha
    share = (1.0 - alpha) / k
    for v in nbrs:
        dist[v] += share
    return dist

def wasserstein_discrete(p: dict, q: dict) -> float:
    """
    0-1 离散地面度量的一阶 Wasserstein:
    d(i,j)=0(同点) / 1(不同点) => W1 = 1 - sum_x min(p_x, q_x)
    """
    support = set(p.keys()) | set(q.keys())
    overlap = sum(min(p.get(x, 0.0), q.get(x, 0.0)) for x in support)
    return 1.0 - overlap

def layer_wasserstein_for_node(g, node: int, layer: int, alpha: float = 0.5) -> float:
    """
    计算同一节点在相邻两层 (layer, layer+1) 的邻居分布 Wasserstein 距离
    """
    p = neighbor_distribution(g, node, layer, alpha=alpha)
    q = neighbor_distribution(g, node, layer + 1, alpha=alpha)
    return wasserstein_discrete(p, q)

# ===== 用法示例 =====
node = 0
layer = 0
alpha = 0.5
print("邻居(层0):", neighbors_in_layer(g, node, layer))
print("邻居(层1):", neighbors_in_layer(g, node, layer+1))
w = layer_wasserstein_for_node(g, node, layer, alpha)
print(f"Wasserstein (node={node}, {layer}->{layer+1}, alpha={alpha}) = {w:.4f}")

NameError: name 'g' is not defined

In [4]:
# -*- coding: utf-8 -*-
import numpy as np
from collections import defaultdict

# ========= 1) 取邻居（v0.16.2 用法） =========
def neighbors_in_layer(g, node: int, layer: int):
    view = g.at(layer)
    try:
        return list(view.node(id=node).neighbours.id.collect())
    except Exception:
        return []

def neighbor_distribution(g, node: int, layer: int, alpha: float = 0.5):
    """
    m_x^(alpha)：alpha 留在自身，其余均分给邻居（在 layer 视图下）
    无邻居/节点缺失 => 全部质量在自身。
    返回: dict[node_id -> prob]
    """
    assert 0.0 <= alpha <= 1.0
    nbrs = neighbors_in_layer(g, node, layer)
    k = len(nbrs)

    dist = defaultdict(float)
    if k == 0:
        dist[node] = 1.0
        return dist

    dist[node] = alpha
    share = (1.0 - alpha) / k
    for v in nbrs:
        dist[v] += share
    return dist

# ========= 3) 0-1 离散代价下的 W1 =========
def wasserstein_discrete(p: dict, q: dict) -> float:
    """
    d(i,j)=0(同点)/1(不同点) 的一阶 Wasserstein：
    W1 = 1 - sum_x min(p_x, q_x)
    """
    support = set(p.keys()) | set(q.keys())
    overlap = sum(min(p.get(x, 0.0), q.get(x, 0.0)) for x in support)
    return 1.0 - overlap

# ========= 4) 节点在相邻两层的 W1 =========
def layer_wasserstein_for_node(g, node: int, layer: int, alpha: float = 0.5) -> float:
    p = neighbor_distribution(g, node, layer, alpha=alpha)
    q = neighbor_distribution(g, node, layer + 1, alpha=alpha)
    return wasserstein_discrete(p, q)

# ========= 5) 计算所有层对(l,l+1)的层间耦合强度矩阵 =========
def compute_layer_coupling(g, layers, node_ids=None, alpha: float = 0.5, agg: str = "mean"):
    """
    返回 L×L 的层间耦合强度矩阵 S（只填相邻层 l,l+1；其余为 0）
    S[l,l+1] = S[l+1,l] = 1 - aggregate(W1_v)
    agg: "mean" 或 "median"
    """
    layers = list(sorted(layers))
    L = len(layers)

    # 物理节点集合：若未给定，取所有层并集
    if node_ids is None:
        all_nodes = set()
        for l in layers:
            try:
                ids = list(g.at(l).nodes.id.collect())
            except Exception:
                ids = []
            all_nodes |= set(ids)
        node_ids = sorted(all_nodes)

    S = np.zeros((L, L), dtype=float)

    for i in range(L - 1):
        l = layers[i]
        l1 = layers[i + 1]
        dists = []
        for v in node_ids:
            w = layer_wasserstein_for_node(g, v, l, alpha=alpha)
            dists.append(w)
        if len(dists) == 0:
            strength = 0.0
        else:
            if agg == "median":
                strength = 1.0 - float(np.median(dists))
            else:
                strength = 1.0 - float(np.mean(dists))
        # 对称写入
        S[i, i+1] = strength
        S[i+1, i] = strength

    return S, node_ids

# ========= 6) 构造超邻接矩阵 B =========
def build_supra_adjacency(S: np.ndarray, node_ids, layers):
    """
    仅包含“层间同一物理节点”的耦合边：
    对每个 v、以及相邻层 (l,l+1)，在 B 中连接 (v,l) ↔ (v,l+1)，权重=S[l,l+1]。
    - 节点顺序：按 layers 的顺序块排，块内按 node_ids 顺序
    - 对角块（层内邻接）留空（全 0），如需加入可后续扩展
    返回 B (shape = (L*N, L*N))
    """
    layers = list(sorted(layers))
    L = len(layers)
    N = len(node_ids)
    B = np.zeros((L*N, L*N), dtype=float)

    # 位置编码：index((l_idx, v_idx)) = l_idx*N + v_idx
    for i in range(L - 1):
        w = S[i, i+1]
        if w == 0.0:
            continue
        # 在块 (i, i+1) 与 (i+1, i) 的对角线上填入 w
        for v_idx in range(N):
            r = i*N + v_idx
            c = (i+1)*N + v_idx
            B[r, c] = w
            B[c, r] = w
    return B

# ========= 7) 一键计算并导出 =========
def export_coupling_and_B(g, L: int = None, layers=None, alpha: float = 0.5, agg: str = "mean"):
    """
    - 若给定 L 且未给 layers，则默认 layers = range(L)
    - 导出两份 CSV：layer_coupling.csv (L×L) 与 B.csv ((L*N)×(L*N))
    返回 (S, B, node_ids, layers_used)
    """
    if layers is None:
        if L is None:
            # 若两者都未给，尝试用出现过的时间戳范围 [0..max_ts]
            # 保险起见，取边或节点出现过的最大时间戳
            max_ts = 0
            # 试图从节点视图收集可能的时间戳（Raphtory API 不直接给）
            # 这里保守做法：假设从 0 开始连续
            # ——如你的层不是从 0 开始或不连续，请显式传入 layers
            if hasattr(g, "max_timestamp"):
                try:
                    max_ts = int(g.max_timestamp())
                except Exception:
                    pass
            layers = list(range(max_ts + 1))
        else:
            layers = list(range(L))

    S, node_ids = compute_layer_coupling(g, layers, node_ids=None, alpha=alpha, agg=agg)
    B = build_supra_adjacency(S, node_ids, layers)

    return S, B, node_ids, layers

# ================== 示例调用 ==================
alpha = 0.5
L = 3                         # 如果你的层是 0,1,2，就写 3；或者直接传 layers=[0,1,2]
S, B, node_ids, layers = export_coupling_and_B(g, L=L, alpha=alpha)
print("layers:", layers)
print("num nodes:", len(node_ids))
print("S shape:", S.shape, "B shape:", B.shape)

layers: [0, 1, 2]
num nodes: 333
S shape: (3, 3) B shape: (999, 999)


In [9]:
import matlab.engine

# 1. 启动 MATLAB Engine（只做一次！）
eng = matlab.engine.start_matlab()

# 2. 添加 GenLouvain 路径
GENLOUVAIN_DIR = "/path/to/GenLouvain"  # TODO: 换成你自己的
eng.addpath(eng.genpath(GENLOUVAIN_DIR), nargout=0)

ModuleNotFoundError: No module named 'matlab.engine'; 'matlab' is not a package