In [50]:
%matplotlib inline

In [51]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F

# 1. Loading Cora dataset

In [52]:
import dgl.data # networkx
dataset=dgl.data.CoraGraphDataset()

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [53]:
print("Number of categories: ",dataset.num_classes)

Number of categories:  7


In [54]:
g=dataset[0]  # 几张图 
#A DGL Dataset object may contain one or multiple graphs. 
#The Cora dataset used in this tutorial only consists of one single graph.

In [55]:
g

Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'train_mask': Scheme(shape=(), dtype=torch.bool), 'label': Scheme(shape=(), dtype=torch.int64), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'feat': Scheme(shape=(1433,), dtype=torch.float32)}
      edata_schemes={})

In [56]:
g # 图的信息

Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'train_mask': Scheme(shape=(), dtype=torch.bool), 'label': Scheme(shape=(), dtype=torch.int64), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'feat': Scheme(shape=(1433,), dtype=torch.float32)}
      edata_schemes={})

In [57]:
print("Node features",g.ndata)  # 点的信息

Node features {'train_mask': tensor([ True,  True,  True,  ..., False, False, False]), 'label': tensor([3, 4, 4,  ..., 3, 3, 3]), 'val_mask': tensor([False, False, False,  ..., False, False, False]), 'test_mask': tensor([False, False, False,  ...,  True,  True,  True]), 'feat': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])}


In [58]:
print("Node features",g.edata)

Node features {}


In [59]:
g.ndata['feat']  #The node features.

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [60]:
g=g.to('cuda')

A DGL graph can store node features and edge features in two
dictionary-like attributes called ``ndata`` and ``edata``.
In the DGL Cora dataset, the graph contains the following node features:

- ``train_mask``: A boolean tensor indicating whether the node is in the
  training set.

- ``val_mask``: A boolean tensor indicating whether the node is in the
  validation set.

- ``test_mask``: A boolean tensor indicating whether the node is in the
  test set.

- ``label``: The ground truth node category.

-  ``feat``: The node features.

# 2 .GCN做graph node classification

In [61]:
#dgl.nn.GraphConv modules, which inherit torch.nn.Module.
from dgl.nn import GraphConv

In [62]:
class GCN(nn.Module):
    def __init__(self,in_features,hid_features,num_classes):
        super(GCN,self).__init__()
        self.conv1=GraphConv(in_features,hid_features)
        self.conv2=GraphConv(hid_features,num_classes)
    def forward(self, g,in_features):
        h_=self.conv1(g,in_features)  # Graphconv 实现了 图结构A 
        h_=F.relu(h_)
        out=self.conv2(g,h_)
        return out

In [63]:
model=GCN(g.ndata['feat'].shape[1],16,dataset.num_classes).to('cuda')

In [64]:
model

GCN(
  (conv1): GraphConv(in=1433, out=16, normalization=both, activation=None)
  (conv2): GraphConv(in=16, out=7, normalization=both, activation=None)
)

In [67]:
def train(g,model):
    pass

In [68]:
optimizer=torch.optim.Adam(model.parameters(),lr=0.01)
best_val_acc=0
best_test_acc=0
features=g.ndata['feat']
labels=g.ndata['label']
train_mask=g.ndata['train_mask']
val_mask=g.ndata['val_mask']
test_mask=g.ndata['test_mask']

In [69]:
features.shape

torch.Size([2708, 1433])

In [70]:
labels.shape

torch.Size([2708])

In [71]:
features

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')

In [72]:
test_mask.shape

torch.Size([2708])

In [73]:
#for e in range(1):
    # forward
logits= model(g,features)

In [74]:
logits.shape

torch.Size([2708, 7])

In [75]:
pred=logits.argmax(1)

In [76]:
pred.shape

torch.Size([2708])

In [77]:
loss=F.cross_entropy(logits[train_mask],labels[train_mask])

In [78]:
loss

tensor(1.9461, device='cuda:0', grad_fn=<NllLossBackward0>)

In [79]:
train_acc=(pred[train_mask]==labels[train_mask]).float().mean()

In [80]:
val_acc=(pred[val_mask]==labels[val_mask ]).float().mean()

In [81]:
test_acc=(pred[test_mask]==labels[test_mask]).float().mean()

In [82]:
if best_val_acc <val_acc:
    best_val_acc=val_acc
    best_test_acc=test_acc
#BackGround
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
                1, loss, val_acc, best_val_acc, test_acc, best_test_acc))
  

In epoch 1, loss: 1.946, val acc: 0.158 (best 0.158), test acc: 0.141 (best 0.141)


# 自定义数据集

In [83]:
import dgl
import numpy as np

In [91]:
g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]), num_nodes=6)
g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]))
#num_nodes=6 可以不写

In [92]:
g

Graph(num_nodes=6, num_edges=5,
      ndata_schemes={}
      edata_schemes={})

In [93]:
g.ndata['x'] = torch.randn(6, 3)
# Assign a 4-dimensional edge feature vector for each edge.
g.edata['a'] = torch.randn(5, 4)
# Assign a 5x4 node feature matrix for each node.  Node and edge features in DGL can be multi-dimensional.
g.ndata['y'] = torch.randn(6, 5, 4)

In [94]:
g

Graph(num_nodes=6, num_edges=5,
      ndata_schemes={'x': Scheme(shape=(3,), dtype=torch.float32), 'y': Scheme(shape=(5, 4), dtype=torch.float32)}
      edata_schemes={'a': Scheme(shape=(4,), dtype=torch.float32)})

In [95]:
print(g.num_nodes())
print(g.num_edges())
# Out degrees of the center node
print(g.out_degrees(0))
# In degrees of the center node - note that the graph is directed so the in degree should be 0.
print(g.in_degrees(0))

6
5
5
0


In [96]:
# 子图
sg1 = g.subgraph([0, 1, 3])
# Induce a subgraph from edge 0, edge 1 and edge 3 from the original graph.
sg2 = g.edge_subgraph([0, 1, 3])
 
 

In [98]:
# The original IDs of each node in sg1
print(sg1.ndata[dgl.NID])
# The original IDs of each edge in sg1
print(sg1.edata[dgl.EID])
# The original IDs of each node in sg2
print(sg2.ndata[dgl.NID])
# The original IDs of each edge in sg2
print(sg2.edata[dgl.EID])

tensor([0, 1, 3])
tensor([0, 2])
tensor([0, 1, 2, 4])
tensor([0, 1, 3])


In [99]:
# Save graphs
dgl.save_graphs('graph.dgl', g)
dgl.save_graphs('graphs.dgl', [g, sg1, sg2])
# Load graphs
(g,), _ = dgl.load_graphs('graph.dgl')
print(g)
(g, sg1, sg2), _ = dgl.load_graphs('graphs.dgl')
print(g)
print(sg1)
print(sg2)

Graph(num_nodes=6, num_edges=5,
      ndata_schemes={'x': Scheme(shape=(3,), dtype=torch.float32), 'y': Scheme(shape=(5, 4), dtype=torch.float32)}
      edata_schemes={'a': Scheme(shape=(4,), dtype=torch.float32)})
Graph(num_nodes=6, num_edges=5,
      ndata_schemes={'x': Scheme(shape=(3,), dtype=torch.float32), 'y': Scheme(shape=(5, 4), dtype=torch.float32)}
      edata_schemes={'a': Scheme(shape=(4,), dtype=torch.float32)})
Graph(num_nodes=3, num_edges=2,
      ndata_schemes={'x': Scheme(shape=(3,), dtype=torch.float32), 'y': Scheme(shape=(5, 4), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'a': Scheme(shape=(4,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)})
Graph(num_nodes=4, num_edges=3,
      ndata_schemes={'x': Scheme(shape=(3,), dtype=torch.float32), 'y': Scheme(shape=(5, 4), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'a': Scheme(shape=(4,), dtype=torch.float32), '_ID': Sc