In [None]:
!pip list

In [None]:
import pandas as pd
import networkx as nx

In [None]:
# File path to your uploaded file
file_path = 'data/as-caida20071105.txt'

# Read the file, skipping comment lines that start with '#'
df = pd.read_csv(file_path, sep='\t', comment='#', header=None, names=['FromNodeId', 'ToNodeId', 'Relationship'])

# Display the first few rows
df.head()

In [None]:
start_node = list(df['FromNodeId'])
end_node = list(df['ToNodeId'])
values = list(df['Relationship'])

nodes = start_node + end_node
print(len(nodes))
nodes = list(set(nodes))
print(len(nodes))

In [None]:
edges = tuple(zip(start_node, end_node, values))
print(type(edges))
for idx, i in enumerate(edges):
    if idx > 5:
        break
    print(i)
print(len(edges))
# test same edge
edges = set(edges)
print(len(edges))

In [None]:
WG = nx.DiGraph()
WG.add_nodes_from(nodes)
WG.add_weighted_edges_from(list(edges))
w = [WG[e[0]][e[1]]['weight'] for e in WG.edges()]

In [None]:
# nx.draw(WG, node_size=50, with_labels=False)

In [None]:
num_nodes = WG.number_of_nodes()
num_edges = WG.number_of_edges()
print("节点数量:", num_nodes)
print("边数量:", num_edges)

In [None]:
if nx.is_strongly_connected(WG):
    diameter = nx.diameter(WG)
    print("图的直径:", diameter)
else:
    print("图不是强连通的，因此没有直径。")

In [None]:
import matplotlib.pyplot as plt
# 计算入度和出度
in_degrees = [d for n, d in WG.in_degree() if d > 0]  # 排除入度为0的节点
out_degrees = [d for n, d in WG.out_degree() if d > 0]  # 排除出度为0的节点

# 绘制入度分布的直方图
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.hist(in_degrees, bins=50, color='blue', alpha=0.7, rwidth=0.8)  # rwidth < 1 加入间隙
plt.xlim(0, 2000)  # 限制横坐标范围为 0-500
plt.yscale('log')  # 对Y轴使用对数
plt.title("In Degree Distribution")
plt.xlabel("In Degree")
plt.ylabel("Frequence")

# 绘制出度分布的直方图
plt.subplot(1, 2, 2)
plt.hist(out_degrees, bins=50, color='green', alpha=0.7, rwidth=0.8)  # rwidth < 1 加入间隙
plt.xlim(0, 2000)  # 限制横坐标范围为 0-500
plt.yscale('log')  # 对Y轴使用对数
plt.title("Out Degree Distribution")
plt.xlabel("Out Degree")
plt.ylabel("Frequence")

plt.tight_layout()
plt.show()

In [None]:
WCC = max(nx.weakly_connected_components(WG), key=len)
WCC_subgraph = WG.subgraph(WCC)
print("最大WCC节点数:", WCC_subgraph.number_of_nodes())
print("最大WCC边数:", WCC_subgraph.number_of_edges())

In [None]:
SCC = max(nx.strongly_connected_components(WG), key=len)
SCC_subgraph = WG.subgraph(SCC)
print("最大SCC节点数:", SCC_subgraph.number_of_nodes())
print("最大SCC边数:", SCC_subgraph.number_of_edges())

In [None]:
avg_clustering_coefficient = nx.average_clustering(WG.to_undirected())
print("平均聚类系数:", avg_clustering_coefficient)

In [None]:
import networkx.algorithms.community as community

# 使用Girvan-Newman算法来挖掘社区结构
communities = community.girvan_newman(WG)
top_level_communities = next(communities)
sorted_communities = sorted(map(sorted, top_level_communities))
print("社区结构:", sorted_communities)

In [None]:
!pip install python-igraph

In [None]:
import igraph as ig
G_undirected = WG.to_undirected()
g = ig.Graph.TupleList(G_undirected.edges(), directed=False)

# 使用Louvain算法检测社区
louvain_communities = g.community_multilevel()

# 绘制社区结构
layout = g.layout("fr")
ig.plot(louvain_communities, layout=layout, vertex_size=5, bbox=(600, 600), margin=20)