# 使用leiden算法进行全局主题聚类

In [None]:
import leidenalg
import igraph as ig
import datatable as dt
import pandas as pd

### （1）读取边文件

In [None]:
%%time
data = dt.fread('allPapers_cociting_data.csv')
data_pd = data.to_pandas()
data_pd.columns = ['source', 'target', 'weight']

### （2）根据边文件构建网络

In [None]:
%%time
# 根据边文件构建网络
G = ig.Graph.DataFrame(data_pd) 

In [None]:
G.summary()

### （3）获取分辨率配置文件

In [None]:
%%time
# 构建优化器
optimiser = leidenalg.Optimiser()
profile = optimiser.resolution_profile(G, 
                                       leidenalg.CPMVertexPartition,
                                       resolution_range=(0,1)
                                      )

### （4）使用Leidenalg进行聚类

In [None]:
%%time
part = leidenalg.find_partition(G, 
                                leidenalg.CPMVertexPartition,
                                weights = 'Weight',   
                                # max_comm_size=1000,         
                                resolution_parameter = 0.1  
                               ) 
print("Modularity of this partition: ", ig.Graph.modularity(G, part))
G.vs['cluster'] = part.membership
vs = dt.Frame(list(zip(G.vs()["name"],G.vs()["cluster"])))
vs.names = ('id', 'cluster')
vs.to_csv('allPapers_globalCluster_weighted.csv')