In [None]:
import torch
import warnings
warnings.filterwarnings("ignore")

In [None]:
def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)


!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-cluster -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-geometric
!pip install ogb

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [None]:
from torch_geometric.datasets import Planetoid, Reddit, PPI, CitationFull, CoraFull
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GAE, VGAE, SGConv, ChebConv
from torch_geometric.utils import train_test_split_edges

from torch_geometric.utils import to_undirected
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

In [None]:
class LinearEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels, operation):
        super(LinearEncoder, self).__init__()

        self.operation = operation

        self.conv = SGConv(in_channels=in_channels, out_channels=out_channels, K=2, cached=True)
        self.conv2 = SGConv(in_channels=in_channels, out_channels=out_channels, K=3, cached=True)
        self.conv3 = SGConv(in_channels=in_channels, out_channels=out_channels, K=4, cached=True)

    def forward(self, x, edge_index):
        if self.operation == 'sum':
          return self.conv(x, edge_index) + self.conv2(x, edge_index) + self.conv3(x, edge_index) 
        else:
          return torch.cat((self.conv(x, edge_index), self.conv2(x, edge_index), self.conv3(x, edge_index)),1)

In [None]:
class VariationalLinearEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels, operation):
        super(VariationalLinearEncoder, self).__init__()

        self.operation = operation

        self.conv_mu = SGConv(in_channels, out_channels, K=2, cached=True)
        self.conv_mu2 = SGConv(in_channels, out_channels, K=3, cached=True)
        self.conv_mu3 = SGConv(in_channels, out_channels, K=4, cached=True)

        self.conv_logstd = SGConv(in_channels, out_channels, K=2, cached=True)
        self.conv_logstd2 = SGConv(in_channels, out_channels, K=3, cached=True)
        self.conv_logstd3 = SGConv(in_channels, out_channels, K=4, cached=True)

    def forward(self, x, edge_index):
        if self.operation == 'sum':
          mu = self.conv_mu(x, edge_index) + self.conv_mu2(x, edge_index) + self.conv_mu3(x, edge_index)
          logstd = self.conv_logstd(x, edge_index) + self.conv_logstd2(x, edge_index) + self.conv_logstd3(x, edge_index)
        else:
          mu = torch.cat((self.conv_mu(x, edge_index), self.conv_mu2(x, edge_index), self.conv_mu3(x, edge_index)),1)
          logstd = torch.cat((self.conv_logstd(x, edge_index), self.conv_logstd2(x, edge_index), self.conv_logstd3(x, edge_index)),1)
        return mu, logstd

In [None]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)

    if variational:
        loss = loss + (1 / data.num_nodes) * model.kl_loss()
    loss.backward(retain_graph=True)
    
    optimizer.step()
    return float(loss)

In [None]:
def test(pos_edge_index, neg_edge_index):
    model.eval()
    
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)

In [None]:
variational = False
with_features = True
name_data = 'pubmed'
sigmoid = False
operation = 'sum'

In [None]:
loss_list = []
auc_list = []
ap_list = []

dataset = Planetoid(root='/tmp/'+name_data, split='random', name=name_data, transform=T.NormalizeFeatures())
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = data.y = None

for i in range(20): 

  data = train_test_split_edges(dataset[0], val_ratio=0.05, test_ratio=0.1 )
  out_channels = 16

  if with_features:
    num_features = dataset.num_features
    x = data.x.to(device)
  else:
    num_features = data.x.shape[0] #no features model
    x = torch.eye(data.x.shape[0]).to(device) #no feature model

  if variational:
    model = VGAE(VariationalLinearEncoder(num_features, out_channels, operation))
  else:
    model = GAE(LinearEncoder(num_features, out_channels, operation))

  model = model.to(device)

  train_pos_edge_index = data.train_pos_edge_index.to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

  for epoch in range(1, 400 + 1):
    loss = train()
    loss_list.append(loss)
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    if epoch % 400 == 0:
      print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}, Loss: {:.4f}'.format(epoch, auc, ap, loss))
    if epoch == 400:
      auc_list.append(auc)
      ap_list.append(ap)

In [None]:
import numpy as np

In [None]:
print(f'{name_data}:')
print(f'mean auc: {np.mean(auc_list)} +- {np.var(auc_list)}\nmean ap: {np.mean(ap_list)} +- {np.std(ap_list)*100}')

To mount the adjacency matrix:

In [None]:
with torch.no_grad():
  z = model.encode(x, train_pos_edge_index)

adj = torch.matmul(z, z.t())
adj = torch.sigmoid(adj) if sigmoid else adj

In [None]:
pos_pred = model.decoder(z, data.test_pos_edge_index, sigmoid=True)
neg_pred = model.decoder(z, data.test_neg_edge_index, sigmoid=True)
pred = torch.cat([pos_pred, neg_pred], dim=0)