# 整体网分析Python 代码

## 导入包和修改路径

In [None]:
import os  ##跟工作路径相关的包
import networkx as nx   ##python中network分析的基础包
import matplotlib.pyplot as plt  ##画图的包
import unicodecsv as csv  ##写入EXCEL文件的包
import numpy as np  ##数据处理包
import pandas as pd  ##数据处理包
import scipy.stats as ss  ## 统计函数的包
from networkx.algorithms import community  ##社区分析的专用函数

In [None]:
os.getcwd() #得到当前工作的目录
os.chdir("E:\\必要文件\\课程学习\\2021年暑期班整体网")  ##修改工作目录

## 第一部分：创建和导入网络

### 无向无权网

In [None]:
#手动创建，先建一个空的无向网
guu = nx.Graph()
guu.add_edges_from(
    [
        ('A', 'K'),
        ('A', 'B'),
        ('B', 'K'),
        ('B', 'C'),
        ('C', 'F'),
        ('C', 'E'),
        ('F', 'E'),
        ('F', 'G'),
        ('D', 'E'),
        ('E', 'H'),
        ('E', 'I'),
        ('I', 'J')
    ]
)

In [None]:
guu
nx.draw_networkx(guu)
plt.show()

In [None]:
#自带网络
guu1 = nx.karate_club_graph()
nx.draw_networkx(guu1)
plt.show()

In [None]:
#读入外部数据
data=pd.read_csv("example.csv")
data

In [None]:
#将边列表转为网络
guu2 = nx.from_pandas_edgelist(data,"from","to")
guu2

In [None]:
nx.draw_networkx(guu2)
plt.show()

### 无向有权网

In [None]:
guw = nx.from_pandas_edgelist(data,"from","to",edge_attr='weight')
guw.edges(data = True),guw.nodes()

In [None]:
edges = guw.edges()
weights = [guw[u][v]['weight'] for u,v in edges]  ##读入边权重
nx.draw_networkx(guw,width=weights)
plt.axis("off")
plt.show()

### 有向无权图

In [None]:
#手动创建
gdu = nx.DiGraph()
e = [
        ('A', 'B'), ('A', 'E'), ('A', 'N'),
        ('B', 'C'), ('B', 'E'),
        ('C', 'A'), ('C', 'D'),
        ('D', 'B'), ('D', 'E'),
        ('E', 'D'), ('E', 'C'),
        ('F', 'G'),
        ('G', 'A'), ('G', 'J'),
        ('H', 'G'), ('H', 'I'),
        ('I', 'G'), ('I', 'F'), ('I', 'J'),
        ('J', 'F'), ('J', 'O'),
        ('K', 'L'), ('K', 'M'),
        ('L', 'M'),
        ('N', 'L'), ('N', 'O'),
        ('O', 'K'), ('O', 'L'),('O','J')
    ]
gdu.add_edges_from(e)
gdu

In [None]:
nx.draw_networkx(gdu)
plt.axis("off")
plt.show()

In [None]:
#导入有向图
data=pd.read_csv("example2.csv")
data

In [None]:
gdu1 = nx.from_pandas_edgelist(data,"from","to",edge_attr="weight",create_using=nx.DiGraph)
gdu1.edges(data = True),guw.nodes()

In [None]:
nx.draw_networkx(gdu1)
plt.axis("off")
plt.show()

### 显示为有向有权图

In [None]:
edges = gdu1.edges()
weights = [gdu1[u][v]['weight'] for u,v in edges]
nx.draw_networkx(gdu1,width=weights)
plt.axis("off")
plt.show()

运用样式

上边的代码虽然简单，但生成的图形略显单调。NetworkX提供了一系列样式参数，可以用来修饰和美化图形，达到我们想要的效果。常用的参数包括：

  - `node_size`:  指定节点的尺寸大小(默认是300，单位未知，就是上图中那么大的点)
  - `node_color`:  指定节点的颜色 (默认是红色，可以用字符串简单标识颜色，例如'r'为红色，'b'为绿色等，具体可查看手册)
  - `node_shape`:  节点的形状（默认是圆形，用字符串'o'标识，具体可查看手册）
  - `alpha`: 透明度 (默认是1.0，不透明，0为完全透明) 
  - `width`: 边的宽度 (默认为1.0)
  - `edge_color`: 边的颜色(默认为黑色)
  - `style`: 边的样式(默认为实现，可选： solid|dashed|dotted,dashdot)
  - `with_labels`: 节点是否带标签（默认为True）
  - `font_size`: 节点标签字体大小 (默认为12)
  - `font_color`: 节点标签字体颜色（默认为黑色）
运用布局

NetworkX在绘制网络图形方面提供了布局的功能，可以指定节点排列的形式。这些布局包括：

    circular_layout：节点在一个圆环上均匀分布
    random_layout：节点随机分布
    shell_layout：节点在同心圆上分布
    spring_layout： 用Fruchterman-Reingold算法排列节点
    spectral_layout：根据图的拉普拉斯特征向量排列节点

## 第二部分 网络演化模型

### ER随机网络

In [None]:
ER = nx.random_graphs.erdos_renyi_graph(100,0.062)  #生成包含20个节点、以概率0.2连接的随机图
pos = nx.shell_layout(ER)          #定义一个布局，此处采用了shell布局方式
nx.draw(ER,pos,with_labels=False,node_size = 30) 
plt.show()

### WS小世界网络

In [None]:
WS = nx.random_graphs.watts_strogatz_graph(100,6,0.3)  #生成包含20个节点、每个节点4个近邻、随机化重连概率为0.3的小世界网络
pos = nx.shell_layout(WS)          #定义一个布局
nx.draw(WS,pos,with_labels=False,node_size = 30)  #绘制图形
plt.axis('off')
plt.show()

### BA无标度网络

In [None]:
BA= nx.random_graphs.barabasi_albert_graph(100,3)  #生成n=20、m=1的BA无标度网络
pos = nx.spring_layout(BA)          #定义一个布局，此处采用了spring布局方式
nx.draw(BA,pos,with_labels=False,node_size = 30)  #绘制图形
plt.show()

In [None]:
len(ER.edges()),len(WS.edges()),len(BA.edges())

## 第三部分 网络结构分析

### 网络整体结构分析

#### 密度分析

In [None]:
nx.density(ER),nx.density(WS),nx.density(BA)

In [None]:
nx.density(guu2)

In [None]:
nx.density(gdu1)

In [None]:
nx.density(guw)

#### 成份分析 

In [None]:
#先查看各个网络是不是连通的，连通图只有一个成份
nx.is_connected(ER), nx.is_connected(WS), nx.is_connected(BA)  

In [None]:
#非连通图有一个以上成份
nx.number_connected_components(ER)

In [None]:
#注意强成份只在有向网中定义
nx.is_strongly_connected(gdu),nx.is_strongly_connected(gdu1)

In [None]:
nx.number_strongly_connected_components(gdu)

In [None]:
##找出最大强成份
largest = max(nx.strongly_connected_components_recursive(gdu), key=len)
largest

In [None]:
len(gdu.nodes())

#### 直径和平均最短路径分析

In [None]:
nx.diameter(ER)

In [None]:
nx.diameter(WS),nx.diameter(BA)

In [None]:
#注意，平均最短路径和直径都只在连通图中定义
nx.average_shortest_path_length(WS),nx.average_shortest_path_length(BA)

In [None]:
#抽取非连通图的巨成份
S = [ER.subgraph(c).copy() for c in nx.connected_components(ER)]
S

In [None]:
nx.diameter(S[0])

In [None]:
nx.average_shortest_path_length(S[0])

In [None]:
#进一步的验证
WS2 = nx.random_graphs.watts_strogatz_graph(1000,6,0.3) 
nx.average_shortest_path_length(WS2)

In [None]:
WS3 = nx.random_graphs.watts_strogatz_graph(5000,6,0.3) 
nx.average_shortest_path_length(WS3)

In [None]:
BA2 = nx.random_graphs.barabasi_albert_graph(1000,3)
nx.average_shortest_path_length(BA2)

In [None]:
BA3 = nx.random_graphs.barabasi_albert_graph(5000,3)
nx.average_shortest_path_length(BA3)

#### 度分布分析

In [None]:
#提取节点度
nx.degree(WS2)
wsdl = np.array(nx.degree(WS2))

In [None]:
from collections import Counter

In [None]:
#计算度分布频数
counts = Counter(d for n, d in wsdl)
b = [counts.get(i, 0) for i in range(max(counts) + 1)]
x = range(len(b)) # x轴
y = [z for z in b] # y轴

In [None]:
#双对数坐标轴
plt.figure(figsize=(5.8, 5.2), dpi=150)
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.loglog(x, y, '.')
plt.show()

In [None]:
nx.degree(BA2)
badl1 = np.array(nx.degree(BA2))
counts = Counter(d for n, d in badl1)
b = [counts.get(i, 0) for i in range(max(counts) + 1)]
x = range(len(b)) # x轴
y = [z for z in b] # y轴

In [None]:
plt.figure(figsize=(5.8, 5.2), dpi=150)
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.loglog(x, y, '.')
plt.show()

In [None]:
nx.degree(BA3)
badl2 = np.array(nx.degree(BA3))
counts = Counter(d for n, d in badl2)
b = [counts.get(i, 0) for i in range(max(counts) + 1)]
x = range(len(b)) # x轴
y = [z for z in b] # y轴

In [None]:
plt.figure(figsize=(5.8, 5.2), dpi=150)
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.loglog(x, y, '.')
plt.show()

In [None]:
erdl = np.array(nx.degree(WS2))
erdl[1,1]

In [None]:
#create figure
fig, axs = plt.subplots(1, 1,
                        figsize =(10, 7),
                        tight_layout = True)
axs.hist(erdl[:,1], bins = 8)
 
# Show plot
plt.show()

In [None]:
erdl2 = np.array(nx.degree(WS3))

#create figure
fig, axs = plt.subplots(1, 1,
                        figsize =(10, 7),
                        tight_layout = True)
axs.hist(erdl2[:,1], bins = 8)
 
# Show plot
plt.show()

### 网络群体结构分析

In [None]:
#经典数据
gkc = nx.karate_club_graph()
plt.axis('off')
nx.draw_networkx(gkc)
plt.show()

In [None]:
#用最简单的边介数中心性算法，并获得每个社区的名单
gkcc = community.girvan_newman(gkc)
next_level_communities = next(gkcc)
sorted(map(sorted, next_level_communities))

In [None]:
#将社区名单堆叠起来
node_groups = []

for i in range(1, 2):
  node_groups.append(list(next_level_communities[i]))

In [None]:
# 绘制社区
color_map = []
for node in gkc:
    if node in node_groups[0]:
        color_map.append('purple')
    else:
        color_map.append('green')  

nx.draw(gkc, node_color=color_map, with_labels=True)
plt.show()

In [None]:
#模块度
community.modularity(gkc,next_level_communities)

In [None]:
from networkx.algorithms.community import greedy_modularity_communities

In [None]:
gkcc = list(greedy_modularity_communities(gkc))
community.modularity(gkc,gkcc)

In [None]:
gkcc

与随机网络进行对比

In [None]:
len(gkc.edges()),len(gkc)

In [None]:
rgk = nx.gnm_random_graph(n=34, m=78,seed=1)

In [None]:
nx.draw(rgk)
plt.show()

In [None]:
rgkc = community.girvan_newman(rgk)
top_level_communities = next(rgkc)
next_level_communities = next(rgkc)

In [None]:
community.modularity(rgk,next_level_communities)

In [None]:
rgkc = list(greedy_modularity_communities(rgk))
community.modularity(rgk,rgkc)

### 网络节点中心性分析

In [None]:
deg = nx.degree_centrality(gkc)
clo = nx.closeness_centrality(gkc)
bet = nx.betweenness_centrality(gkc)
tra = nx.clustering(gkc)
egv = nx.eigenvector_centrality(gkc)

In [None]:
#将所有系数放到一个数据框中
col_names = ['id','degree', 'closeness', 'betweenness', 'clustering', 'eigenvector']
data = pd.DataFrame(columns=col_names)
data['id'] = gkc.nodes()
data['degree'] = [i for i in deg.values()]
data['closeness'] = [i for i in clo.values()]
data['betweenness'] = [i for i in bet.values()]
data['clustering'] = [i for i in tra.values()]
data['eigenvector'] = [i for i in egv.values()]

In [None]:
data

In [None]:
pos = nx.spring_layout(gkc) 
nx.draw_networkx(gkc,pos,node_size=data['degree']*300)
plt.show()

In [None]:
nx.draw_networkx(gkc,pos,node_size=data['closeness']*data['closeness']*300)
plt.show()

In [None]:
nx.draw_networkx(gkc,pos,node_size=data['betweenness']*300)
plt.show()

In [None]:
nx.draw_networkx(gkc,pos,node_size=data['clustering']*300)
plt.show()

In [None]:
nx.draw_networkx(gkc,pos,node_size=data['eigenvector']*300)
plt.show()

## 第四部分 网络建模ERGM