# 一、核心思想
Louvain算法只考虑了Modularity的提升，而没有考虑社区合并质量的好坏。举个例子，<br>
<img src="pic/Leiden_1.png" width=300px weight=400px><br>
(a)中的社区为<b>弱链接</b>，即通过节点0才关联起来的，其他节点之间链接不紧密；(b)中的社区为<b>断连</b>，社区不连通但归为了一个社区。为了解决该问题，Leiden算法在Louvain算法的基础上<b>增加了细化阶段，该步在每个社区内重新合并，并检查合并的节点连通性</b>。
# 二、算法流程
1. 局部节点移动：不再对每个节点检查所有节点。先用队列储存随机顺序的节点，每次弹出队首节点，并判断该节点时候移动到新社区，若是则则将该节点原邻居节点接入队列。直到队列为空。
2. 细化：将社区内的节点初始化单独作为一个社区，对社区内的每个节点v，考虑与其他社区C合并，计算连接度是否满足$E(v,C-v) \geq \gamma \cdot \|v\| \cdot (\|C\| - \|v\|)$，其中$E(v,C-v)$：节点v与社区C中除自己以外的所有其他节点之间的边数，$\|C\|$：节点v的总边权(度数)，$\|C\|$：社区C内所有节点的度数。然后将该节点以连接度为概率随机将节点v与满足条件的社区C合并。可用公式：<img src="pic/Leiden_2.png" width=150px height=150px>计算。
3. 同Louvain进行社区压缩。

In [7]:
import numpy as np
import queue

In [2]:
def Q(graph, c, m=None, gamma=1.0):
    '''
    计算当个子社区的Q函数
    graph: List[list]大图邻接矩阵
    c: list子社区的节点
    m: 大图的度数
    '''
    if m is None:
        m = np.sum(graph)
    return np.sum(graph[c,c]) - gamma * (np.sum(graph[:,c])**2) / m

def Modularity(graph, community, m=None):
    '''
    计算当前社区划分方式的模块度
    graph: List[list]大图邻接矩阵
    community: List[list]全部社区的节点
    m: 大图的度数
    '''
    if m is None:
        m = np.sum(graph)
    res = 0
    for c in community:
        res += Q(graph, c, m)
    return res / m

In [39]:
def setp1_nodeMove(graph, gamma=1.0):
    ''' 根据邻接矩阵初步形成社区，返回community(n*n) '''
    m = np.sum(graph)
    n = len(graph)
    community = np.eye(n, dtype=bool) # 记录节点社区信息
    q = queue.Queue() # 队列，用于节点局部移动
    for x in np.random.choice(n, size=n, replace=False):
        q.put(x)
    visited = set(range(n)) # 用于标识那些在队列中的节点
    while not q.empty():
        node = q.get()
        visited.discard(node)
        community_exclude_node = community[node].copy()
        Q_old_node = Q(graph, community_exclude_node, m, gamma)
        community_exclude_node[node] = False
        Q_new_node = Q(graph, community_exclude_node, m, gamma)
        for c in np.where(~community[node])[0]:
            community_include_node = community[c].copy()
            Q_old_c = Q(graph, community_include_node, m, gamma)
            Q_old = Q_old_node + Q_old_c # 计算更新Node社区之前的Q
            community_include_node[node] = True
            Q_new_c = Q(graph, community_include_node, m, gamma)
            Q_new = Q_new_node + Q_new_c # 计算更新Node社区之后的Q
            print(Q_new, Q_old)
            if Q_new > Q_old:
                for k in np.where(community[node])[0]:
                    if k != node and k not in visited:
                        q.put(k)
                community[community_include_node, node] = True
                community_exclude_node[node, community_include_node] = True
                Q_old_node = Q_new_c
                Q_new_node = Q_old_c
    return community


def dense_community(community, mapping=None):
    '''
    根据community(n*n)矩阵，返回列表形式的更紧密的community(List[list])
    community: List[list] 阶段一产生的社区矩阵
    mapping: List 用于阶段二每个子社区，第一阶段的community内是0开始的索引，而mapping内存储其对应真实节点的标签
    '''
    visited = np.zeros(len(community), dtype=bool)
    res = []
    for i in range(len(community)):
        if not visited[i]:
            k = np.where(community[i])[0]
            if mapping is not None: k = mapping[k]
            res.append(k)
            visited[k] = True
    return res

def sparse_community(dense_community, n):
    ''' 由dense_community反转回community矩阵 '''
    community = np.zeros((n, n), dtype=bool)
    for c in dense_community:
        community[c, c] = True
    return community


def step2_refinement(graph, community, gamma=0.2):
    '''
    细化第一步的社区
    graph: List[list] 上一步大图邻接矩阵
    community: List[list] 压缩后的社区dense_community
    gamma: 对于每个子社区，其计算Q时使用的gamma
    '''
    res = []
    for c in community:
        sub_community = setp1_nodeMove(graph[c, c], gamma)
        sub_community =dense_community(sub_community, c)
        res.extend(sub_community)
    return res


def step3_communityToGraph(graph, community):
    ''' n*n的graph压缩为k*k的graph '''
    k = len(community)
    new_graph = np.zeros((k, k)) # 压缩后的k*k的新邻接矩阵
    node_to_community = np.zeros(len(graph)) # List 原node -> 压缩后新node的映射表
    for i, c_fir in enumerate(community): # 只计算上三角矩阵（不包括对角线）
        for j in range(i+1, k):
            c_sec = community[j]
            graph[i][j] = new_graph[i][i] = np.sum(graph[c_fir, c_sec])
        node_to_community[c_fir] = i
    new_graph += new_graph.T
    return new_graph, node_to_community


def Leiven(graph, gamma=0.2, iters = 10, threshold=0.01):
    '''Leiven算法
    graph: 原始大图邻接矩阵
    gamma: 阶段二细化时，使用的gamma值
    iters: 最大迭代次数
    threshold: 最小Modularity提升的阈值，不然终止迭代
    '''
    res = []
    m = np.sum(graph)
    modularity = -np.inf
    for _ in range(iters):
        n = len(graph)
        community = setp1_nodeMove(graph)
        community = dense_community(community)
        community = step2_refinement(graph, community, gamma)
        graph, node_to_community = step3_communityToGraph(graph, community)
        new_modularity = Modularity(graph, sparse_community(graph, n), m)
        res.append(node_to_community)
        if new_modularity - modularity < threshold:
            break
        modularity = new_modularity
    return res