## GAE: Graph Auto-Encoder

In [1]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

# !pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

1.12.0+cu116


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
from torch_geometric.utils import train_test_split_edges

# Tutorial 6  
Graph AutoEncoders GAE &  
Variational Graph Autoencoders VGAE    

[paper](https://arxiv.org/pdf/1611.07308.pdf)  
[code](https://github.com/rusty1s/pytorch_geometric/blob/master/examples/autoencoder.py)

## Graph AutoEncoder GAE

### Load the data

In [3]:
dataset = Planetoid("dataset", "CiteSeer", transform=T.NormalizeFeatures())
dataset.data

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])

In [37]:
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = None
data

Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327])

In [38]:
data = train_test_split_edges(data)
data



Data(x=[3327, 3703], y=[3327], val_pos_edge_index=[2, 227], test_pos_edge_index=[2, 455], train_pos_edge_index=[2, 7740], train_neg_adj_mask=[3327, 3327], val_neg_edge_index=[2, 227], test_neg_edge_index=[2, 455])

### Define the Encoder

In [39]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv2 = GCNConv(2 * out_channels, out_channels, cached=True) # cached only for transductive learning

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)
    

### Define the Autoencoder

In [40]:
from torch_geometric.nn import GAE

In [41]:
# parameters
out_channels = 2
num_features = dataset.num_features
epochs = 100

# model
model = GAE(GCNEncoder(num_features, out_channels))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [42]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)
    #if args.variational:
    #   loss = loss + (1 / data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)



In [43]:
for epoch in range(1, epochs + 1):
    loss = train()

    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))

Epoch: 001, AUC: 0.6329, AP: 0.6626
Epoch: 002, AUC: 0.6373, AP: 0.6690
Epoch: 003, AUC: 0.6390, AP: 0.6719
Epoch: 004, AUC: 0.6387, AP: 0.6727
Epoch: 005, AUC: 0.6386, AP: 0.6734
Epoch: 006, AUC: 0.6382, AP: 0.6745
Epoch: 007, AUC: 0.6382, AP: 0.6753
Epoch: 008, AUC: 0.6385, AP: 0.6763
Epoch: 009, AUC: 0.6385, AP: 0.6769
Epoch: 010, AUC: 0.6383, AP: 0.6781
Epoch: 011, AUC: 0.6382, AP: 0.6793
Epoch: 012, AUC: 0.6382, AP: 0.6810
Epoch: 013, AUC: 0.6372, AP: 0.6825
Epoch: 014, AUC: 0.6362, AP: 0.6842
Epoch: 015, AUC: 0.6344, AP: 0.6855
Epoch: 016, AUC: 0.6322, AP: 0.6865
Epoch: 017, AUC: 0.6311, AP: 0.6882
Epoch: 018, AUC: 0.6299, AP: 0.6894
Epoch: 019, AUC: 0.6285, AP: 0.6900
Epoch: 020, AUC: 0.6280, AP: 0.6905
Epoch: 021, AUC: 0.6276, AP: 0.6910
Epoch: 022, AUC: 0.6272, AP: 0.6914
Epoch: 023, AUC: 0.6272, AP: 0.6918
Epoch: 024, AUC: 0.6271, AP: 0.6918
Epoch: 025, AUC: 0.6272, AP: 0.6922
Epoch: 026, AUC: 0.6274, AP: 0.6928
Epoch: 027, AUC: 0.6282, AP: 0.6932
Epoch: 028, AUC: 0.6293, AP:

In [44]:
Z = model.encode(x, train_pos_edge_index)
Z

tensor([[ 0.3792,  0.4845],
        [-0.8236, -0.9332],
        [ 0.6134,  0.7135],
        ...,
        [-0.2114, -0.2070],
        [ 0.6134,  0.7135],
        [ 0.6134,  0.7135]], device='cuda:0', grad_fn=<AddBackward0>)

In [45]:
### AUC: Area Under the Curve
### AP: Average Precision

## Are the results (AUC) and (AP) easy to read and compare?

# Use Tensorboard

In [46]:
from torch.utils.tensorboard import SummaryWriter

In [51]:
# parameters
out_channels = 4
num_features = dataset.num_features
epochs = 100

# model
model = GAE(GCNEncoder(num_features, out_channels))

# move to GPU (if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

### Import tensorboard

#### Installation: (if needed) "pip install tensorboard"

In [52]:
writer = SummaryWriter('runs/GAE1_experiment_'+'4d_100_epochs')

In [53]:
for epoch in range(1, epochs + 1):
    loss = train()
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    
    writer.add_scalar('auc train',auc,epoch) # new line
    writer.add_scalar('ap train',ap,epoch)   # new line

Epoch: 001, AUC: 0.6320, AP: 0.6678
Epoch: 002, AUC: 0.6365, AP: 0.6722
Epoch: 003, AUC: 0.6382, AP: 0.6741
Epoch: 004, AUC: 0.6402, AP: 0.6764
Epoch: 005, AUC: 0.6404, AP: 0.6777
Epoch: 006, AUC: 0.6413, AP: 0.6799
Epoch: 007, AUC: 0.6415, AP: 0.6824
Epoch: 008, AUC: 0.6402, AP: 0.6851
Epoch: 009, AUC: 0.6366, AP: 0.6859
Epoch: 010, AUC: 0.6331, AP: 0.6877
Epoch: 011, AUC: 0.6301, AP: 0.6890
Epoch: 012, AUC: 0.6281, AP: 0.6896
Epoch: 013, AUC: 0.6276, AP: 0.6911
Epoch: 014, AUC: 0.6278, AP: 0.6917
Epoch: 015, AUC: 0.6284, AP: 0.6924
Epoch: 016, AUC: 0.6291, AP: 0.6930
Epoch: 017, AUC: 0.6298, AP: 0.6937
Epoch: 018, AUC: 0.6307, AP: 0.6942
Epoch: 019, AUC: 0.6327, AP: 0.6953
Epoch: 020, AUC: 0.6375, AP: 0.6980
Epoch: 021, AUC: 0.6478, AP: 0.7013
Epoch: 022, AUC: 0.6603, AP: 0.7066
Epoch: 023, AUC: 0.6750, AP: 0.7124
Epoch: 024, AUC: 0.6901, AP: 0.7187
Epoch: 025, AUC: 0.7026, AP: 0.7238
Epoch: 026, AUC: 0.7097, AP: 0.7268
Epoch: 027, AUC: 0.7164, AP: 0.7298
Epoch: 028, AUC: 0.7273, AP:

In [54]:
%load_ext tensorboard
%tensorboard --logdir runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 96762), started 0:06:23 ago. (Use '!kill 96762' to kill it.)

## Graph Variational AutoEncoder (GVAE)

In [55]:
from torch_geometric.nn import VGAE

In [58]:
dataset = Planetoid("dataset", "CiteSeer", transform=T.NormalizeFeatures())
data = dataset[0]
data.train_mask = data.val_mask = data.test_mask = data.y = None
data = train_test_split_edges(data)


class VariationalGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VariationalGCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv_mu = GCNConv(2 * out_channels, out_channels, cached=True)
        self.conv_logstd = GCNConv(2 * out_channels, out_channels, cached=True)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


In [59]:
out_channels = 2
num_features = dataset.num_features
epochs = 300


model = VGAE(VariationalGCNEncoder(num_features, out_channels))  # new line

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
x = data.x.to(device)
train_pos_edge_index = data.train_pos_edge_index.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [60]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_pos_edge_index)
    loss = model.recon_loss(z, train_pos_edge_index)
    
    loss = loss + (1 / data.num_nodes) * model.kl_loss()  # new line
    loss.backward()
    optimizer.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_pos_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)

In [61]:
writer = SummaryWriter('runs/VGAE_experiment_'+'2d_100_epochs')

for epoch in range(1, epochs + 1):
    loss = train()
    auc, ap = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    
    writer.add_scalar('auc train',auc,epoch) # new line
    writer.add_scalar('ap train',ap,epoch)   # new line

Epoch: 001, AUC: 0.6446, AP: 0.6615
Epoch: 002, AUC: 0.6582, AP: 0.6720
Epoch: 003, AUC: 0.6589, AP: 0.6733
Epoch: 004, AUC: 0.6593, AP: 0.6747
Epoch: 005, AUC: 0.6595, AP: 0.6752
Epoch: 006, AUC: 0.6596, AP: 0.6754
Epoch: 007, AUC: 0.6593, AP: 0.6752
Epoch: 008, AUC: 0.6590, AP: 0.6750
Epoch: 009, AUC: 0.6589, AP: 0.6748
Epoch: 010, AUC: 0.6585, AP: 0.6742
Epoch: 011, AUC: 0.6583, AP: 0.6739
Epoch: 012, AUC: 0.6579, AP: 0.6735
Epoch: 013, AUC: 0.6576, AP: 0.6733
Epoch: 014, AUC: 0.6570, AP: 0.6729
Epoch: 015, AUC: 0.6567, AP: 0.6726
Epoch: 016, AUC: 0.6561, AP: 0.6721
Epoch: 017, AUC: 0.6555, AP: 0.6717
Epoch: 018, AUC: 0.6549, AP: 0.6712
Epoch: 019, AUC: 0.6542, AP: 0.6710
Epoch: 020, AUC: 0.6534, AP: 0.6703
Epoch: 021, AUC: 0.6531, AP: 0.6704
Epoch: 022, AUC: 0.6527, AP: 0.6703
Epoch: 023, AUC: 0.6525, AP: 0.6702
Epoch: 024, AUC: 0.6527, AP: 0.6704
Epoch: 025, AUC: 0.6531, AP: 0.6709
Epoch: 026, AUC: 0.6537, AP: 0.6718
Epoch: 027, AUC: 0.6544, AP: 0.6725
Epoch: 028, AUC: 0.6551, AP:

In [62]:
%load_ext tensorboard
%tensorboard --logdir runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 96762), started 2:23:35 ago. (Use '!kill 96762' to kill it.)