In [1]:
! pip install torch_geometric



In [2]:
import torch
from torch.optim import Adam
import torch_geometric
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.nn import VGAE
import torch_geometric.transforms as T
from torch_geometric.utils import train_test_split_edges

In [3]:
dataset = Planetoid(root='./data', name="PubMed", transform = T.NormalizeFeatures())
data = dataset[0]

In [4]:
print(data)

Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717])


In [5]:
data.train_mask = data.val_mask = data.test_mask = None

In [6]:
data = train_test_split_edges(data)



In [7]:
print(data)

Data(x=[19717, 500], y=[19717], val_pos_edge_index=[2, 2216], test_pos_edge_index=[2, 4432], train_pos_edge_index=[2, 75352], train_neg_adj_mask=[19717, 19717], val_neg_edge_index=[2, 2216], test_neg_edge_index=[2, 4432])


In [8]:
class VGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VGCNEncoder, self).__init__()

        self.conv1 = GCNConv(in_channels, 2*out_channels, cached=True)
        self.conv_mu = GCNConv(2*out_channels, out_channels, cached=True)
        self.conv_logstd = GCNConv(2*out_channels, out_channels, cached=True)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
out_channels = 2
num_features = dataset.num_features

model = VGAE(VGCNEncoder(num_features, out_channels)).to(device)

In [11]:
model

VGAE(
  (encoder): VGCNEncoder(
    (conv1): GCNConv(500, 4)
    (conv_mu): GCNConv(4, 2)
    (conv_logstd): GCNConv(4, 2)
  )
  (decoder): InnerProductDecoder()
)

In [12]:
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

In [13]:
optimizer = Adam(model.parameters(), lr=0.001)

In [14]:
def train():
  model.train()
  optimizer.zero_grad()
  z = model.encode(x, train_pos_edge_index)
  loss = model.recon_loss(z, train_pos_edge_index)
  loss += (1 / data.num_nodes) * model.kl_loss()
  loss.backward()
  optimizer.step()
  return float(loss)


def test(pos_edge_index, neg_edge_index):
  model.eval()
  with torch.no_grad():
      z = model.encode(x, train_pos_edge_index)
  return model.test(z, pos_edge_index, neg_edge_index)

In [15]:
epochs = 350

for epoch in range(epochs):
    loss = train()
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    if epoch % 10 == 0:
        print(f'Epoch: {epoch}, Loss: {loss:.4f}, AUC: {auc:.4f}, AP: {ap:.4f}')

Epoch: 0, Loss: 1.7750, AUC: 0.7024, AP: 0.7236
Epoch: 10, Loss: 1.7588, AUC: 0.8527, AP: 0.8425
Epoch: 20, Loss: 1.7331, AUC: 0.8796, AP: 0.8615
Epoch: 30, Loss: 1.7248, AUC: 0.8859, AP: 0.8664
Epoch: 40, Loss: 1.6755, AUC: 0.8882, AP: 0.8683
Epoch: 50, Loss: 1.6917, AUC: 0.8895, AP: 0.8694
Epoch: 60, Loss: 1.6553, AUC: 0.8904, AP: 0.8702
Epoch: 70, Loss: 1.6407, AUC: 0.8910, AP: 0.8708
Epoch: 80, Loss: 1.6049, AUC: 0.8915, AP: 0.8713
Epoch: 90, Loss: 1.5836, AUC: 0.8918, AP: 0.8716
Epoch: 100, Loss: 1.5722, AUC: 0.8920, AP: 0.8719
Epoch: 110, Loss: 1.5682, AUC: 0.8923, AP: 0.8721
Epoch: 120, Loss: 1.5468, AUC: 0.8924, AP: 0.8723
Epoch: 130, Loss: 1.5331, AUC: 0.8926, AP: 0.8725
Epoch: 140, Loss: 1.5140, AUC: 0.8928, AP: 0.8728
Epoch: 150, Loss: 1.4956, AUC: 0.8930, AP: 0.8730
Epoch: 160, Loss: 1.4827, AUC: 0.8931, AP: 0.8732
Epoch: 170, Loss: 1.4727, AUC: 0.8932, AP: 0.8734
Epoch: 180, Loss: 1.4560, AUC: 0.8933, AP: 0.8736
Epoch: 190, Loss: 1.4515, AUC: 0.8934, AP: 0.8737
Epoch: 200,