In [2]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

# !pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

2.1.2


In [3]:
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
from torch_geometric.utils import train_test_split_edges

# Tutorial 6  
Graph AutoEncoders GAE &  
Variational Graph Autoencoders VGAE    

[paper](https://arxiv.org/pdf/1611.07308.pdf)  
[code](https://github.com/rusty1s/pytorch_geometric/blob/master/examples/autoencoder.py)

## Graph AutoEncoder GAE

### Load the data

In [14]:
dataset = Planetoid("\..", "CiteSeer", transform=T.NormalizeFeatures())
dataset.data



Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])

In [15]:
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = None
data

Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327])

In [17]:
data = train_test_split_edges(data)



In [18]:
data

Data(x=[3327, 3703], y=[3327], val_pos_edge_index=[2, 227], test_pos_edge_index=[2, 455], train_pos_edge_index=[2, 7740], train_neg_adj_mask=[3327, 3327], val_neg_edge_index=[2, 227], test_neg_edge_index=[2, 455])

### Define the Encoder

In [20]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv2 = GCNConv(2 * out_channels, out_channels, cached=True) # cached only for transductive learning

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)
    

### Define the Autoencoder

In [21]:
from torch_geometric.nn import GAE

In [22]:
# parameters
out_channels = 2
num_features = dataset.num_features
epochs = 100

# model
model = GAE(GCNEncoder(num_features, out_channels))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [23]:
model

GAE(
  (encoder): GCNEncoder(
    (conv1): GCNConv(3703, 4)
    (conv2): GCNConv(4, 2)
  )
  (decoder): InnerProductDecoder()
)

In [24]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)
    #if args.variational:
    #   loss = loss + (1 / data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)



In [25]:
for epoch in range(1, epochs + 1):
    loss = train()

    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))

Epoch: 001, AUC: 0.6617, AP: 0.7071
Epoch: 002, AUC: 0.6404, AP: 0.6953
Epoch: 003, AUC: 0.6273, AP: 0.6778
Epoch: 004, AUC: 0.6256, AP: 0.6785
Epoch: 005, AUC: 0.6253, AP: 0.6815
Epoch: 006, AUC: 0.6294, AP: 0.6882
Epoch: 007, AUC: 0.6318, AP: 0.6928
Epoch: 008, AUC: 0.6319, AP: 0.6934
Epoch: 009, AUC: 0.6321, AP: 0.6937
Epoch: 010, AUC: 0.6319, AP: 0.6936
Epoch: 011, AUC: 0.6329, AP: 0.6948
Epoch: 012, AUC: 0.6341, AP: 0.6966
Epoch: 013, AUC: 0.6348, AP: 0.6984
Epoch: 014, AUC: 0.6351, AP: 0.6995
Epoch: 015, AUC: 0.6355, AP: 0.7010
Epoch: 016, AUC: 0.6363, AP: 0.7032
Epoch: 017, AUC: 0.6369, AP: 0.7047
Epoch: 018, AUC: 0.6369, AP: 0.7068
Epoch: 019, AUC: 0.6365, AP: 0.7082
Epoch: 020, AUC: 0.6365, AP: 0.7097
Epoch: 021, AUC: 0.6364, AP: 0.7109
Epoch: 022, AUC: 0.6364, AP: 0.7125
Epoch: 023, AUC: 0.6365, AP: 0.7133
Epoch: 024, AUC: 0.6361, AP: 0.7138
Epoch: 025, AUC: 0.6354, AP: 0.7137
Epoch: 026, AUC: 0.6354, AP: 0.7142
Epoch: 027, AUC: 0.6351, AP: 0.7146
Epoch: 028, AUC: 0.6353, AP:

In [26]:
Z = model.encode(x, train_pos_edge_index)
Z

tensor([[ 0.1112, -0.3019],
        [-0.9847,  0.8179],
        [ 0.9190, -0.7191],
        ...,
        [-0.5016,  0.1157],
        [ 0.6691, -0.6324],
        [ 0.8664, -0.6709]], device='cuda:0', grad_fn=<AddBackward0>)

## Are the results (AUC) and (AP) easy to read and compare?

# Use Tensorboard

In [27]:
from torch.utils.tensorboard import SummaryWriter

In [39]:
# parameters
out_channels = 20
num_features = dataset.num_features
epochs = 1000

# model
model = GAE(GCNEncoder(num_features, out_channels))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

### Import tensorboard

#### Installation: (if needed) "pip install tensorboard"

In [40]:
writer = SummaryWriter('runs/GAE1_experiment_'+'20d_1000_epochs')

In [41]:
for epoch in range(1, epochs + 1):
    loss = train()
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    
    writer.add_scalar('auc train',auc,epoch) # new line
    writer.add_scalar('ap train',ap,epoch)   # new line

Epoch: 001, AUC: 0.6547, AP: 0.7043
Epoch: 002, AUC: 0.6298, AP: 0.6855
Epoch: 003, AUC: 0.6278, AP: 0.6841
Epoch: 004, AUC: 0.6330, AP: 0.6928
Epoch: 005, AUC: 0.6375, AP: 0.7039
Epoch: 006, AUC: 0.6394, AP: 0.7119
Epoch: 007, AUC: 0.6399, AP: 0.7160
Epoch: 008, AUC: 0.6403, AP: 0.7174
Epoch: 009, AUC: 0.6404, AP: 0.7180
Epoch: 010, AUC: 0.6422, AP: 0.7192
Epoch: 011, AUC: 0.6565, AP: 0.7256
Epoch: 012, AUC: 0.6949, AP: 0.7422
Epoch: 013, AUC: 0.7410, AP: 0.7665
Epoch: 014, AUC: 0.7575, AP: 0.7767
Epoch: 015, AUC: 0.7601, AP: 0.7783
Epoch: 016, AUC: 0.7633, AP: 0.7805
Epoch: 017, AUC: 0.7655, AP: 0.7837
Epoch: 018, AUC: 0.7636, AP: 0.7852
Epoch: 019, AUC: 0.7618, AP: 0.7841
Epoch: 020, AUC: 0.7661, AP: 0.7867
Epoch: 021, AUC: 0.7673, AP: 0.7875
Epoch: 022, AUC: 0.7626, AP: 0.7853
Epoch: 023, AUC: 0.7593, AP: 0.7827
Epoch: 024, AUC: 0.7603, AP: 0.7837
Epoch: 025, AUC: 0.7628, AP: 0.7847
Epoch: 026, AUC: 0.7628, AP: 0.7825
Epoch: 027, AUC: 0.7617, AP: 0.7820
Epoch: 028, AUC: 0.7597, AP:

## Graph Variational AutoEncoder (GVAE)

In [42]:
from torch_geometric.nn import VGAE

In [43]:
dataset = Planetoid("\..", "CiteSeer", transform=T.NormalizeFeatures())
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = data.y = None
data = train_test_split_edges(data)


class VariationalGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VariationalGCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv_mu = GCNConv(2 * out_channels, out_channels, cached=True)
        self.conv_logstd = GCNConv(2 * out_channels, out_channels, cached=True)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)



In [50]:
out_channels = 20
num_features = dataset.num_features
epochs = 1000


model = VGAE(VariationalGCNEncoder(num_features, out_channels))  # new line

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [51]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)
    
    loss = loss + (1 / data.num_nodes) * model.kl_loss()  # new line
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)

In [53]:
writer = SummaryWriter('runs/VGAE_experiment_'+'20d_1000_epochs')

for epoch in range(1, epochs + 1):
    loss = train()
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    
    writer.add_scalar('auc train',auc,epoch) # new line
    writer.add_scalar('ap train',ap,epoch)   # new line

Epoch: 001, AUC: 0.8663, AP: 0.8727
Epoch: 002, AUC: 0.8669, AP: 0.8735
Epoch: 003, AUC: 0.8663, AP: 0.8730
Epoch: 004, AUC: 0.8654, AP: 0.8720
Epoch: 005, AUC: 0.8647, AP: 0.8714
Epoch: 006, AUC: 0.8658, AP: 0.8729
Epoch: 007, AUC: 0.8663, AP: 0.8734
Epoch: 008, AUC: 0.8650, AP: 0.8720
Epoch: 009, AUC: 0.8644, AP: 0.8715
Epoch: 010, AUC: 0.8658, AP: 0.8726
Epoch: 011, AUC: 0.8665, AP: 0.8734
Epoch: 012, AUC: 0.8660, AP: 0.8730
Epoch: 013, AUC: 0.8649, AP: 0.8723
Epoch: 014, AUC: 0.8638, AP: 0.8714
Epoch: 015, AUC: 0.8645, AP: 0.8718
Epoch: 016, AUC: 0.8660, AP: 0.8732
Epoch: 017, AUC: 0.8665, AP: 0.8736
Epoch: 018, AUC: 0.8653, AP: 0.8726
Epoch: 019, AUC: 0.8631, AP: 0.8707
Epoch: 020, AUC: 0.8634, AP: 0.8711
Epoch: 021, AUC: 0.8658, AP: 0.8735
Epoch: 022, AUC: 0.8670, AP: 0.8746
Epoch: 023, AUC: 0.8653, AP: 0.8726
Epoch: 024, AUC: 0.8628, AP: 0.8708
Epoch: 025, AUC: 0.8639, AP: 0.8715
Epoch: 026, AUC: 0.8658, AP: 0.8733
Epoch: 027, AUC: 0.8661, AP: 0.8734
Epoch: 028, AUC: 0.8649, AP: