In [10]:
import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset
import torch


dataset_name = 'ogbn-arxiv'

dataset = PygNodePropPredDataset(name=dataset_name,
                                 transform=T.ToSparseTensor())
print('The {} dataset has {} graph'.format(dataset_name, len(dataset)))

# Extract the graph
data = dataset[0]
print(data)

# 查看节点ID范围
print(f"\n节点ID范围: 0 ~ {data.x.shape[0]-1}")
# 查看标签信息
print("\n标签信息:")
print(f"标签维度: {data.y.shape}")
print(f"标签取值范围: {torch.min(data.y).item()} ~ {torch.max(data.y).item()}")
print(f"标签类别数量: {len(torch.unique(data.y))}")


The ogbn-arxiv dataset has 1 graph
Data(num_nodes=169343, x=[169343, 128], node_year=[169343, 1], y=[169343, 1], adj_t=[169343, 169343, nnz=1166243])

节点ID范围: 0 ~ 169342

标签信息:
标签维度: torch.Size([169343, 1])
标签取值范围: 0 ~ 39
标签类别数量: 40


In [20]:
# 获取邻接矩阵
adj_t = data.adj_t

# 计算每个节点的度数
degrees = adj_t.sum(dim=1)

# 统计度数信息
print("\n度数统计信息:")
print(f"平均度数: {degrees.mean().item():.2f}")
print(f"最大度数: {degrees.max().item()}")
print(f"最小度数: {degrees.min().item()}")
print(f"度数标准差: {degrees.std().item():.2f}")

# 找出度数为0的节点(孤立节点)
isolated_nodes = torch.where(degrees == 0)[0]

print(f"\n孤立节点数量: {len(isolated_nodes)}")
if len(isolated_nodes) > 0:
    print("存在孤立节点,其ID为:")
    print(isolated_nodes)
else:
    print("不存在孤立节点")



度数统计信息:
平均度数: 6.89
最大度数: 13155.0
最小度数: 0.0
度数标准差: 67.60

孤立节点数量: 62006
存在孤立节点,其ID为:
tensor([     8,     11,     16,  ..., 169337, 169338, 169342])


In [21]:
adj_t[8]

SparseTensor(row=tensor([], dtype=torch.int64),
             col=tensor([], dtype=torch.int64),
             size=(1, 169343), nnz=0, density=0.00%)