In [1]:
import dgl
from dgl.data import DGLDataset
import torch
import numpy as np
import os
import pandas as pd
device = 'cuda'      # change to 'cuda' for GPU

  from .autonotebook import tqdm as notebook_tqdm


In [65]:
class myERDataset(DGLDataset):
    def __init__(self, days: list, length: int, read_path:str):
        """
        arg:
        days: list 需要提取的天数 如 [7, 14, 15, 16]
        length: int 是指之前模拟传播了多少次,用于定位下标
        read_path: str rawdata文件夹
        save_path: str data存在哪里
        """
        self.days = days
        self.length = length  
        self.read_path = read_path
        super().__init__(name='ER_Graph')

    def process(self):
        edges_data = pd.read_csv(self.read_path + 'edges.csv')
        
        node_features = []
        node_labels = []
        for i in range(self.length):
            label_data = pd.read_csv(self.read_path + str(i) + '/nodes' + str(1) +'.csv')
            label = label_data['state'].to_numpy()
            for j in self.days:
                nodes_data = pd.read_csv(self.read_path + str(i) + '/nodes' + str(j) +'.csv')
                node_features.append(nodes_data['state'].to_numpy()) # 某天的状态 # append 一行
                node_labels.append(label)

        # edge_features = torch.from_numpy(edges_data['weight'].to_numpy())
        edges_src = torch.from_numpy(edges_data['src'].to_numpy())
        edges_dst = torch.from_numpy(edges_data['dst'].to_numpy())
        n_nodes = node_labels[0].shape[0]
        self.graph = dgl.graph((edges_src, edges_dst), num_nodes=n_nodes)
        node_features = np.array(node_features,dtype=np.float32).reshape(-1, n_nodes).T
        node_labels = np.array(node_labels).reshape(-1, n_nodes).T
        self.graph.ndata['feat'] = torch.from_numpy(node_features)
        self.graph.ndata['label'] = torch.from_numpy(node_labels)
        # self.graph.edata['weight'] = edge_features

        # If your dataset is a node classification dataset, you will need to assign
        # masks indicating whether a node belongs to training, validation, and test set.

        n_train = int(n_nodes * 0.6)
        n_val = int(n_nodes * 0.2)
        train_mask = torch.zeros(n_nodes, dtype=torch.bool)
        val_mask = torch.zeros(n_nodes, dtype=torch.bool)
        test_mask = torch.zeros(n_nodes, dtype=torch.bool)
        train_mask[:n_train] = True
        val_mask[n_train:n_train + n_val] = True
        test_mask[n_train + n_val:] = True
        self.graph.ndata['train_mask'] = train_mask
        self.graph.ndata['val_mask'] = val_mask
        self.graph.ndata['test_mask'] = test_mask

    def __getitem__(self, idx):
        assert idx == 0, "这个数据集里只有一个图"
        return self.graph

    def __len__(self):
        return 1

In [160]:
# 数据导入
dataset = myERDataset([7], 1, './rawdata/er/')


define model

In [161]:
# 构建一个2层的GNN模型
import torch.nn as nn
import torch.nn.functional as F
class SAGE(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats):
        super().__init__()
        # 实例化SAGEConve，in_feats是输入特征的维度，out_feats是输出特征的维度，aggregator_type是聚合函数的类型
        self.conv1 = dgl.nn.SAGEConv(
            in_feats=in_feats, out_feats=hid_feats, aggregator_type='mean')
        self.conv2 = dgl.nn.SAGEConv(
            in_feats=hid_feats, out_feats=out_feats, aggregator_type='mean')

    def forward(self, graph, inputs):
        # 输入是节点的特征
        h = self.conv1(graph, inputs)
        h = F.relu(h)
        h = self.conv2(graph, h)
        return h


In [162]:
graph = dataset[0]

# 获取划分的掩码
node_features = graph.ndata['feat']
node_labels = (graph.ndata['label'] + 1)/ 2
train_mask = graph.ndata['train_mask']
valid_mask = graph.ndata['val_mask']
test_mask = graph.ndata['test_mask']
n_features = node_features.shape[1]
n_labels = int(node_labels.max().item() + 1)


In [167]:
print(node_features.shape)
print(n_features)
print(n_labels)


torch.Size([1000, 1])
1
2


In [165]:
def evaluate(model, graph, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(graph, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

In [166]:
model = SAGE(in_feats=n_features, hid_feats=100, out_feats=n_labels)
opt = torch.optim.Adam(model.parameters())

for epoch in range(4000):
    model.train()
    # 使用所有节点(全图)进行前向传播计算
    logits = model(graph, node_features)
    # 计算损失值
    loss = F.multilabel_soft_margin_loss(logits[train_mask], node_labels[train_mask])
    # 计算验证集的准确度
    acc = evaluate(model, graph, node_features, node_labels, valid_mask)
    # 进行反向传播计算
    opt.zero_grad()
    loss.backward()
    opt.step()
    print(loss.item())

print('Accuracy on test: {}'.format(evaluate(model, graph, node_features, node_labels, test_mask)))

0.6935011148452759
0.6934608221054077
0.6934280395507812
0.6933987736701965
0.6933727264404297
0.6933490633964539
0.6933279037475586
0.6933087110519409
0.6932912468910217
0.6932755708694458
0.6932615041732788
0.6932485103607178
0.6932365894317627
0.6932255029678345
0.693215548992157
0.6932064294815063
0.6931981444358826
0.6931906342506409
0.6931840181350708
0.6931781768798828
0.6931731104850769
0.6931688785552979
0.6931653022766113
0.693162202835083
0.6931598782539368
0.6931580901145935
0.6931564807891846
0.6931552886962891
0.6931543350219727
0.6931537389755249
0.6931533217430115
0.6931528449058533
0.693152666091919
0.6931525468826294
0.6931526064872742
0.6931526064872742
0.6931527256965637
0.6931528449058533
0.6931531429290771
0.6931532025337219
0.6931532621383667
0.6931533217430115
0.6931532621383667
0.6931533217430115
0.6931532025337219
0.6931529641151428
0.6931528449058533
0.6931525468826294
0.6931523680686951
0.6931521892547607
0.6931520104408264
0.6931517720222473
0.6931515336036