# 一、核心思想
&nbsp;&nbsp;&nbsp;&nbsp; 节点的标签完全由其邻居决定（Bagging思想），并通过传播迭代更新标签，每次异步将节点的标签更新为相邻节点出现频率最高的标签。
# 二、算法流程
1. 初始化每个节点一个唯一标签。
2. 随机排序每个节点，对于每个节点，选择邻居中权重最大的标签，并更新该节点标签。
3. 重复②，直到所有节点的更新比例少于某个阈值或者迭代达到一定次数。
# 三、缺点
1. 结果受到标签随机选择影响，不稳定
2. 收敛性没有严格的数学证明，不一定收敛
3. 倾向于选择大社区
# 四、应用
1. 社区聚类，由于时间复杂度低，可以用来快速了解社区
2. 半监督学习，将少数标签传播到未标签的节点

In [85]:
import numpy as np
import random
random.seed(42)

In [185]:
def lpa(graph, iters = 10, ratio = 0.1):
    '''
    graph: 邻接矩阵
    iters: 最大迭代轮次
    ratio: 更新比例阈值，达不到则提前停止迭代
    '''
    n = len(graph)
    community = np.arange(n)
    blocks = int(pow(n, 0.5) // 1) # 分块异步迭代，每块大小为blocks

    for _ in range(iters):
        nodes = np.random.choice(n, size=n, replace=False)
        j = 1
        update = 0 # 记录改变社区的节点数
        while j*blocks <= n:
            sub_nodes = nodes[(j-1)*blocks:j*blocks]
            sub_data = graph[sub_nodes, :]
            unique_labels, inverse_indices = np.unique(community, return_inverse=True)
            result = np.zeros((len(sub_nodes), len(unique_labels)))
            for i in range(len(unique_labels)):
                mask = (inverse_indices == i)
                if mask.any():
                    result[:, i] = sub_data[:, mask].sum(axis=1)
            newC = np.argmax(result, axis=1)
            # 设置20%的概率保持原来社区
            dropout = (np.random.random(len(newC)) > 0.2)
            newC = dropout*newC + (1-dropout)*community[sub_nodes]

            update += np.sum(newC != community[sub_nodes])
            community[sub_nodes] = newC
            j += 1
        if update/n < ratio:
            break

    return community

def Modularity(graph, community):
    '''
    graph: list[list]邻接矩阵
    community: list 挖掘出的社区
    '''
    m = np.sum(graph)
    unique_labels, inverse_indices = np.unique(community, return_inverse=True)
    Modularity = 0
    for i in range(len(unique_labels)):
        mask = (inverse_indices == i)
        Q = np.sum(graph[mask, mask]) - (np.sum(graph[:, mask])**2)/m
        Modularity += Q
    return Modularity / m

def LPA(graph, iters = 10, lpa_iters = 10, ratio = 0.1):
    ''' 多次运行lpa，找到模块度最大的返回为最终结果 '''
    max_modularity = -np.inf
    best_community = None
    for _ in range(iters):
        community = lpa(graph, lpa_iters, ratio)
        modularity = Modularity(graph, community)
        if modularity > max_modularity:
            max_modularity = modularity
            best_community = community
    return best_community


In [196]:
if __name__ == "__main__":
    m = 10
    adj_matrix = np.zeros((m, m))
    for i in range(m):
        for j in range(i + 1, m):
            adj_matrix[i, j] = random.random()
    adj_matrix = adj_matrix + adj_matrix.T

    C = LPA(adj_matrix)
    print(C)

[1 0 1 1 1 1 1 1 1 1]
