# 一、核心思想
<b>角度一：随机游走模型</b><br>
假设一个随机游走者在网路中随机移动，在社区内部，游走者倾向于停留更长时间（社区内连接紧密，困住了游走者）；而在社区之间，游走者转换概率较低。<br>
<b>角度二：编码问题</b><br>
对于一系列节点地址，想用最短的编码描述游走路径。那么对于频率高的路径（社区内游走），要用短编码；而对于频率低的路径（社区间游走），要用长编码。这里的编码长短可以从熵的角度来看。
# 二、数学原理
1. 使用二级编码来描述节点，第一级标识社区模块，第二级标识社区内的节点。
2. 目标函数为：$L(M)=qH(Q)+\sum_{i=1}^{m}p^{i}H(P^{i})$，其中 $L_{1}=qH(Q)$ 表示社区模块编码的信息熵，也表示游走者在社区间转换的概率， $L_{2}=\sum_{i=1}^{m}p^{i}H(P^{i})$ 表示社区内节点编码的信息熵，也表示游走者在社区内游走的概率。最终要最小化$L(M)$。
3. 注意，虽然最小化$L(M)$看起来要求第一项和第二项都小，但由于$L_1$表示社区间转换的概率（熵），$L_2$表示社区内游走的概率（熵），而两者是互斥的，因此算法实际上在寻找两者的权衡。举个例子：假设整个大图就是一个社区，那么$L_1=0$，但$L_2$很大。如果将大图切分的社区越多，那么$L_1$就很大，而在社区内游走的概率$L_2$就很小。
# 三、算法流程
类似Louvain，只是将$Modularity$换成了$L(M)$。
1. 初始化，每个节点单独为一个社区。
2. 尝试将每个节点移动到相邻社区，计算$ΔL$变化，若$ΔL<0$则接受该移动。
3. 聚合社区，将地②步的每个社区聚合为一个节点，内部的连接用一个自连接表示，外部的多条连接也合并到一起。
4. 重复②③。

In [1]:
import numpy as np
import queue

In [14]:
EPS = 1e-10

def L1(graph, community, community_label):
    '''
    对于整个大图，计算社区间转移的熵
    graph: List[list] 大图邻接矩阵
    community: List[list] 社区矩阵
    community_label: List[int] 用一个节点标识社区
    '''
    external_weights = np.sum((graph*(~community))[community_label, :], axis=1) + EPS
    weights_sum = np.sum(external_weights)
    return -np.sum(external_weights * np.log(external_weights/weights_sum))

def L2(graph, degrees, c):
    '''
    对子社区，计算节点内游走的熵
    graph: List[list] 大图邻接矩阵
    degrees: List[int] 全部节点的度
    c: list 子社区的节点
    '''

    internal_weights = degrees[c].copy() + EPS
    internal_weights = np.append(internal_weights, np.sum(graph[c,:][:,c])/2+EPS)
    weights_sum = np.sum(internal_weights)
    return -np.sum(internal_weights * np.log(internal_weights/weights_sum))

def L(graph, community, dense_community, m=None):
    '''
    计算当前社区划分方式的L(M)
    graph: List[list] 大图邻接矩阵
    community: List[list] 社区矩阵
    dense_community: List[list] 精密各社区的节点
    m: 大图的度数
    '''
    if m is None:
        m = np.sum(graph)
    degrees = np.sum(graph, axis=1)
    community_label = np.zeros(len(graph), dtype=bool)
    l2 = 0
    for c in dense_community:
        community_label[c[0]] = True
        l2 += L2(graph, degrees, c)
    l1 = L1(graph, community, community_label)
    return (l1 + l2) / m

In [15]:
def step1_nodeMove(graph):
    ''' 根据邻接矩阵初步形成社区，返回community(n*n) '''
    n = len(graph)
    degrees = np.sum(graph, axis=1)
    community = np.eye(n, dtype=bool)
    community_label = np.ones(n, dtype=bool)
    q = queue.Queue()
    for x in np.random.choice(n, size=n, replace=False):
        q.put(x)
    visited = set(range(n)) # 用来标识那些在队列中的节点
    while not q.empty():
        node = q.get()
        visited.discard(node)
        L1_old = L1(graph, community, community_label) # 计算更新Node社区之前的L1
        community_exclude_node = community[node].copy()
        L2_old_node = L2(graph, degrees, community_exclude_node)
        community_exclude_node[node] = False
        L2_new_node = L2(graph, degrees, community_exclude_node)
        for c in np.where(~community[node])[0]:
            community_include_node = community[c].copy()
            L2_old_c = L2(graph, degrees, community_include_node)
            L2_old = L2_old_node + L2_old_c # 计算更新Node之前社区的L2
            community_include_node[node] = True
            L2_new_c = L2(graph, degrees, community_include_node)
            L2_new = L2_new_node + L2_new_c # 计算更新Node之后社区的L2

            community_new = community.copy()
            community_label_new = community_label.copy()
            community_new[node, community_exclude_node] = False
            community_new[community_exclude_node, node] = False
            community_new[node, community_include_node] = True
            community_new[community_include_node, node] = True
            if community_label_new[node]:
                indices = np.where(community_exclude_node)[0]
                if indices.size > 0:
                    community_label_new[indices[0]] = True
                community_label_new[node] = False
            L1_new = L1(graph, community_new, community_label_new) # 计算更新Node之后社区的L1

            if (L1_new + L2_new) < (L1_old + L2_old):
                for k in np.where(community[node])[0]:
                    if k != node and k not in visited:
                        q.put(k)
                community = community_new
                community_label = community_label_new
                L1_old = L1_new
                L2_old_node = L2_new_c
                L2_new_node = L2_old_c
                community_exclude_node = community_include_node
                community_include_node[node] = False

    return community

def dense_community(community, mapping=None):
    '''
    根据community(n*n)矩阵，返回列表形式的更紧密的community(List[list])
    community: List[list] 阶段一产生的社区矩阵
    mapping: List 用于阶段二每个子社区，第一阶段的community内是0开始的索引，而mapping内存储其对应真实节点的标签
    '''
    visited = np.zeros(len(community), dtype=bool)
    res = []
    for i in range(len(community)):
        if not visited[i]:
            k = np.where(community[i])[0]
            res.append(mapping[k] if mapping is not None else k)
            visited[k] = True
    return res

def step2_communityToGraph(graph, dense_community):
    ''' n*n的graph压缩为k*k的graph '''
    k = len(dense_community)
    new_graph = np.zeros((k, k)) # 压缩后的k*k的新邻接矩阵
    node_to_community = np.zeros(len(graph)) # List 原node -> 压缩后新node的映射表
    for i, c_fir in enumerate(dense_community): # 只计算上三角矩阵（不包括对角线）
        for j in range(i+1, k):
            c_sec = dense_community[j]
            row_idx, col_idx = np.meshgrid(c_fir, c_sec)
            new_graph[i][i] = np.sum(graph[row_idx, col_idx])
        node_to_community[c_fir] = i
    new_graph += new_graph.T
    return new_graph, node_to_community

def Infomap(graph, iters = 10, threshold=0.01):
    '''Info算法
    graph: 原始大图邻接矩阵
    iters: 最大迭代次数
    threshold: 最小L(M)提升的阈值，不然终止迭代
    '''
    res = []
    m = np.sum(graph)
    for _ in range(iters):
        n = len(graph)
        lm = L(graph, np.ones((n, n), dtype=bool), [np.arange(n)], m)
        community = step1_nodeMove(graph)
        dense_community_ = dense_community(community)
        new_lm = L(graph, community, dense_community_, m)
        graph, node_to_community = step2_communityToGraph(graph, dense_community_)
        if lm - new_lm < threshold:
            break
        res.append(node_to_community)
    return res

In [51]:
graph = np.array([
    [0, 1, 1, 0, 1],
    [1, 0, 0, 1, 0],
    [1, 0, 0, 0, 1],
    [0, 1, 0, 0, 1],
    [1, 0, 1, 1, 0]
])
community = np.array([
    [1, 1, 0, 0],
    [1, 1, 0, 0],
    [0, 0, 1, 1],
    [0, 0, 1, 1],
], dtype=bool)
Infomap(graph)

[array([0., 1., 1., 0., 2.]), array([0., 1., 2.])]