In [1]:
# 读取节点数据
import networkx as nx
import matplotlib.pyplot as plt
import gc


##----------read nodes function-------------------------------------
def read_nodes(file_path, node_type):
    with open(file_path, 'r') as file:
        nodes = [line.strip() for line in file]
    node_data = {}
    for node_str in nodes:
        node_parts = node_str.split(',')
        node_id = node_type + node_parts[0]
        #features = list(map(float, node_parts[1:]))
        features = ''  ##For common community detection algorithm,you don't need any features.
        node_data[node_id] = {'node_type': node_type, 'features': features}
    return node_data


##--------------------------------------------------------------------


##------------read edge function-----------------------------------------------
def read_edges_incremental(file_path, edge_type, node_type_map):
    with open(file_path, 'r') as file:
        for line in file:
            edge_str = line.strip()
            edge_parts = edge_str.split(',')
            node1 = node_type_map[0] + edge_parts[0]
            node2 = node_type_map[1] + edge_parts[1]
            #features = list(map(float, edge_parts[2:]))
            features=''
            node1_type = node_type_map[0]
            node2_type = node_type_map[1]
            yield (node1, node2,
                   {'edge_type': edge_type, 'features': features})


##---------------------------------------------------------------------------------------


print("----start to read node.-----------------------")
app_nodes = read_nodes('telecom-graph//node_app.txt', 'app')
package_nodes = read_nodes('telecom-graph//node_package.txt', 'package')
user_nodes = read_nodes('telecom-graph//node_user.txt', 'user')
cell_nodes = read_nodes('telecom-graph//node_cell.txt', 'cell')
G = nx.Graph()
# 添加 package 节点并输出节点数量
for node_id, data in package_nodes.items():
    G.add_node(node_id, **data)
print(f"After adding package nodes: {len(G.nodes())} nodes")

# 添加 app 节点并输出节点数量
for node_id, data in app_nodes.items():
    G.add_node(node_id, **data)
print(f"After adding app nodes: {len(G.nodes())} nodes")

# 添加 user 节点并输出节点数量
for node_id, data in user_nodes.items():
    G.add_node(node_id, **data)
print(f"After adding user nodes: {len(G.nodes())} nodes")

# 添加 cell 节点并输出节点数量
for node_id, data in cell_nodes.items():
    G.add_node(node_id, **data)
print(f"After adding cell nodes: {len(G.nodes())} nodes")
print("-----all nodes is read already.----------------")

print("----------start to read edge.----------------------")

print("----------1--------------------")
for edge in read_edges_incremental('telecom-graph//edge_user_buy_package.txt', 'buy',
                                   ['user', 'package']):
    G.add_edge(edge[0], edge[1], **edge[2])
# 打印图的信息，检查是否添加了边
print(f"After first addition, number of edges: {G.number_of_edges()}")
# 进行垃圾回收，释放内存
gc.collect()

print("----------2--------------------")
for edge in read_edges_incremental('telecom-graph//edge_user_live_cell.txt', 'live',
                                   ['user', 'cell']):
    G.add_edge(edge[0], edge[1], **edge[2])

# 进行垃圾回收，释放内存
gc.collect()
print(f"After second addition, number of edges: {G.number_of_edges()}")
print("----------3--------------------")
for edge in read_edges_incremental('telecom-graph//edge_user_use_app.txt', 'use',
                                   ['user', 'app']):
    G.add_edge(edge[0], edge[1], **edge[2])

# 进行垃圾回收，释放内存
gc.collect()
print(f"After third addition, number of edges: {G.number_of_edges()}")
print("---------all edge is read already.------------------")

#nx.write_graphml(G, "telecom_network_1.graphml")
#print("Writing done!!")

----start to read node.-----------------------
After adding package nodes: 380 nodes
After adding app nodes: 1380 nodes
After adding user nodes: 101380 nodes
After adding cell nodes: 170380 nodes
-----all nodes is read already.----------------
----------start to read edge.----------------------
----------1--------------------
After first addition, number of edges: 1900000
----------2--------------------
After second addition, number of edges: 8800000
----------3--------------------
After third addition, number of edges: 8900000
---------all edge is read already.------------------


In [2]:
# 获取所有连通分量
connected_components = list(nx.connected_components(G))

# 按连通分量的大小进行排序，并获取前20个
connected_components = sorted(connected_components, key=len, reverse=True)[:20]
app_count=0
package_count=0
cell_count=0
user_count=0
for node in connected_components[10]:
    if 'package' in node:
        package_count=package_count+1
    if 'app' in node:
        app_count=app_count+1
    if 'cell' in node:
        cell_count=cell_count+1
    if 'user' in node:
        user_count=user_count+1
print(f"App Neighbors Count:{app_count}")
print(f"Package Neighbors Count:{package_count}")
print(f"Cell Neighbors Count:{cell_count}")
print(f"User Neighbors Count:{user_count}")

App Neighbors Count:50
Package Neighbors Count:19
Cell Neighbors Count:3450
User Neighbors Count:5000


In [3]:
subgraph = G.subgraph(connected_components[0])

In [4]:
pagerank=nx.pagerank(subgraph,alpha=0.8)

In [6]:
# 获取 PageRank 最高的前 5 个节点
top_5_pagerank = sorted(pagerank.items(), key=lambda item: item[1], reverse=True)[:5]

# 输出结果
print("PageRank 最高的前 5 个节点及其 PageRank 值：")
for node, rank in top_5_pagerank:
    print(f"节点 {node}: PageRank = {rank:.4f}")

PageRank 最高的前 5 个节点及其 PageRank 值：
节点 package120: PageRank = 0.0046
节点 package140: PageRank = 0.0046
节点 package340: PageRank = 0.0046
节点 package300: PageRank = 0.0046
节点 package360: PageRank = 0.0046


In [10]:
for node,rank in pagerank.items():
    if 'app' in node:
        print(rank)
        break

0.00011549185018012377


In [12]:
for node,rank in pagerank.items():
    if 'user' in node:
        print(rank)
        break

0.0001015836584341875


In [13]:
for node,rank in pagerank.items():
    if 'cell' in node:
        print(rank)
        break

0.00011549185018012377


In [17]:
nx.density(subgraph)

0.012264892687287874

In [23]:
num_nodes = subgraph.number_of_nodes()
num_edges = subgraph.number_of_edges()

In [24]:
num_edges

445000

In [26]:
import numpy as np
num_nodes*np.log(num_nodes)

77097.41208796296

In [38]:
list(nx.bridges(subgraph))

[]