In [2]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

2.1.0+cu118
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m55.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for torch_geometric (pyproject.toml) ... [?25l[?25hdone


In [3]:
import torch_geometric
from torch_geometric.datasets import Planetoid

#Introduction

#Load the dataset

In [4]:
dataset = Planetoid(root="tutorial1", name= "Cora")

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


Dataset properties

In [6]:
print(dataset)
print(f"Number of graphs:\t\t {len(dataset)}")
print(f"Number of classes:\t\t {dataset.num_classes}")
print(f"Number of node features:\t {dataset.num_node_features}")
print(f"Number of edge features: \t {dataset.num_edge_features}")

Cora()
Number of graphs:		 1
Number of classes:		 7
Number of node features:	 1433
Number of edge features: 	 0


Dataset shapes

In [7]:
print(dataset.data)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])




In [9]:
#shape of 2 multiplied by 10556
#node 0 goes into 633, 1862, 2582...
print(f"edge_index:\t\t {dataset.data.edge_index.shape}")
print(dataset.data.edge_index)
print("\n")

#1-d tensor, boolean values that indicate which set is on training,
#validation or test
print(f"train_mask:\t\t {dataset.data.train_mask.shape}")
print(dataset.data.train_mask)
print("\n")

#2708nodes, each node containing 1433 features
print(f"x:\t\t {dataset.data.x.shape}")
print(dataset.data.x)
print("\n")

#node labelling
print(f"y:\t\t {dataset.data.y.shape}")
print(dataset.data.y)

edge_index:		 torch.Size([2, 10556])
tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
        [ 633, 1862, 2582,  ...,  598, 1473, 2706]])


train_mask:		 torch.Size([2708])
tensor([ True,  True,  True,  ..., False, False, False])


x:		 torch.Size([2708, 1433])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


y:		 torch.Size([2708])
tensor([3, 4, 4,  ..., 3, 3, 3])


In [10]:
import os.path as osp

import torch.nn.functional as F
from torch_geometric.nn import SAGEConv

In [12]:
data = dataset[0]

Defining the neural network

In [22]:
class Net(torch.nn.Module):
  def __init__(self):
    super(Net, self).__init__()

    self.conv = SAGEConv(dataset.num_features,
                         dataset.num_classes,
                         aggr="max") # specifying the aggregation

  def forward(self):
    x = self.conv(data.x, data.edge_index)
    return F.log_softmax(x, dim=1)

In [23]:
#to put model on cpu or gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [24]:
def train():
  model.train()
  optimizer.zero_grad()
  F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
  optimizer.step()

In [25]:
def test():
  model.eval()
  logits, accs = model(), []
  for _, mask in data("train_mask", "val_mask", "test_mask"):
    pred = logits[mask].max(1)[1]
    acc = pred.eq(data.y[mask]).sum().item()/ mask.sum().item()
    accs.append(acc)
  return accs

In [26]:
best_val_acc = test_acc = 0
for epoch in range(1, 100):
  train()
  _, val_acc, tmp_test_acc = test()
  if val_acc > best_val_acc:
    best_val_acc = val_acc
    test_acc = tmp_test_acc
  log = "Epoch: {:03d}, Val: {:.4f}, Test: {:.4f}"

  if epoch % 10 == 0:
    print(log.format(epoch, best_val_acc, test_acc))

Epoch: 010, Val: 0.7280, Test: 0.7190
Epoch: 020, Val: 0.7280, Test: 0.7190
Epoch: 030, Val: 0.7280, Test: 0.7190
Epoch: 040, Val: 0.7280, Test: 0.7190
Epoch: 050, Val: 0.7280, Test: 0.7190
Epoch: 060, Val: 0.7280, Test: 0.7190
Epoch: 070, Val: 0.7280, Test: 0.7190
Epoch: 080, Val: 0.7280, Test: 0.7190
Epoch: 090, Val: 0.7280, Test: 0.7190
