# Node Classification in Knowledge Graphs - Cora Dataset

## Installation of Pytorch Geometric

In [None]:
!python -c "import torch; print(torch.__version__)"

In [None]:
!python -c "import torch; print(torch.version.cuda)"

In [None]:
!pip3 install --no-index torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip3 install --no-index torch-sparse -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip3 install --no-index torch-cluster -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip3 install --no-index torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip3 install torch-geometric

## Import and visualize the dataset

In [1]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

In [2]:
dataset = Planetoid(root = 'data/Planetoid', name = 'Cora', transform = NormalizeFeatures())

In [3]:
print(f"NUmber of graphs: {len(dataset)}")
print(f"Number of features: {dataset.num_features}")
print(f"NUmber of classes: {dataset.num_classes}")
print("---------------------------------------")

data = dataset[0]

print(data)
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Number of training nodes: {data.train_mask.sum()}")
print(f"Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}")
print(f"Is undirected: {data.is_undirected()}")

NUmber of graphs: 1
Number of features: 1433
NUmber of classes: 7
---------------------------------------
Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
Number of nodes: 2708
Number of edges: 10556
Number of training nodes: 140
Training node label rate: 0.05
Is undirected: True


## Model Initialization

In [4]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

In [11]:
class GCN(torch.nn.Module):
  def __init__(self, hidden_channels):

    super(GCN, self).__init__()
    torch.manual_seed(10)

    # Initialize GCN layers
    self.conv1 = GCNConv(dataset.num_features, hidden_channels)
    self.conv2 = GCNConv(hidden_channels, hidden_channels)
    self.out = Linear(hidden_channels, dataset.num_classes)

  def forward(self, x, edge_index):

    # First message passing layer
    x = self.conv1(x, edge_index)
    x = x.relu()
    x = F.dropout(x, p = 0.2, training = self.training)

    # Second message passing layer
    x = self.conv2(x, edge_index)
    x = x.relu()
    x = F.dropout(x, p = 0.2, training = self.training)

    # Output Layer
    x = F.softmax(self.out(x), dim = 1)

    return x


In [12]:
model = GCN(hidden_channels = 16)
print(model)

GCN(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 16)
  (out): Linear(in_features=16, out_features=7, bias=True)
)


## Model Training

In [13]:
model = GCN(hidden_channels = 16)

device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
model = model.to(device)
data = data.to(device)

lr = 0.01
decay = 5e-4
optimizer = torch.optim.Adam(model.parameters(),
                             lr = lr,
                             weight_decay = decay)

CrossEntropyLoss = torch.nn.CrossEntropyLoss()

In [14]:
def train():

  model.train()
  optimizer.zero_grad()
  out = model(data.x, data.edge_index)
  loss = CrossEntropyLoss(out[data.train_mask], data.y[data.train_mask])
  loss.backward()
  optimizer.step()

  return loss

In [15]:
def test():

  model.eval()
  out = model(data.x, data.edge_index)
  pred = out.argmax(dim = 1)
  test_correct = pred[data.test_mask] == data.y[data.test_mask]
  test_acc = int(test_correct.sum()) / int(data.test_mask.sum())

  return test_acc

In [16]:
losses = []
for epoch in range(1001):
  loss = train()
  losses.append(loss)
  if epoch % 100 == 0:
    print(f"Epoch: {epoch}, Loss: {loss:.4f}")

Epoch: 0, Loss: 1.9460
Epoch: 100, Loss: 1.5360
Epoch: 200, Loss: 1.2956
Epoch: 300, Loss: 1.2558
Epoch: 400, Loss: 1.2400
Epoch: 500, Loss: 1.2296
Epoch: 600, Loss: 1.2422
Epoch: 700, Loss: 1.2405
Epoch: 800, Loss: 1.2355
Epoch: 900, Loss: 1.2274
Epoch: 1000, Loss: 1.2293


In [18]:
test_acc = test()
print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.7520
