<a href="https://colab.research.google.com/github/TheoBacqueyrisse/Graph-Neural-Networks/blob/main/GNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Basic Graph Neural Network Architecture**

In [1]:
# Let us first clone the GitHub repository
%%capture
!git clone https://github.com/TheoBacqueyrisse/Graph-Neural-Networks.git

In [2]:
# Install dependencies
%%capture
!pip install -r /content/Graph-Neural-Networks/requirements.txt

In [3]:
# Import Packages
import pandas
import numpy as np
import torch
from torch_geometric.datasets import ZINC

# Visualisation
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

# Data Loader
from torch_geometric.loader import DataLoader

# Neural Network Architecture
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GATConv, GINConv
from torch_geometric.nn import global_max_pool as gmp, global_mean_pool as gap, global_add_pool as gad
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Loss Function
from torch.nn import MSELoss, L1Loss

# Optimizer
from torch.optim import Adam, SGD, Adagrad

# See the progression of the Training
import tqdm

## GNN Module Architecture

In [4]:
EMBEDDING_SIZE = 32

class GNN(torch.nn.Module):
    def __init__(self):
      super(GNN, self).__init__()

      # Care about the design of the NN here
      self.initial_conv = GATConv(in_channels = 1, out_channels = EMBEDDING_SIZE)
      self.conv_layer1 = GATConv(in_channels = EMBEDDING_SIZE, out_channels = EMBEDDING_SIZE)
      self.conv_layer2 = GATConv(in_channels = EMBEDDING_SIZE, out_channels = EMBEDDING_SIZE)

      self.pooling = gmp
      self.out = Linear(in_features = EMBEDDING_SIZE, out_features = 1)

    def forward(self, x, edge_index, edge_attribute, batch_index):


      y = self.initial_conv(x, edge_index, edge_attribute)
      y = F.sigmoid(y)
      y = F.dropout(y, p = 0.2)

      y = self.conv_layer1(y, edge_index, edge_attribute)
      y = F.sigmoid(y)
      y = F.dropout(y, p = 0.1)

      y = self.conv_layer2(y, edge_index, edge_attribute)
      y = F.sigmoid(y)

      y = self.pooling(y, batch_index)

      out = self.out(y)

      return out, y

model = GNN()
print(model)

GNN(
  (initial_conv): GATConv(1, 32, heads=1)
  (conv_layer1): GATConv(32, 32, heads=1)
  (conv_layer2): GATConv(32, 32, heads=1)
  (out): Linear(in_features=32, out_features=1, bias=True)
)


## Configuration

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

NUM_EPOCHS = 50

loss_function = L1Loss()

optimizer = Adam(params = model.parameters(), lr = 0.005)

scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=20, min_lr=0.00001)

In [6]:
NB_GRAPHS_PER_BATCH = 64

train = ZINC('/content/Graph-Neural-Networks/data', split = 'train')
val = ZINC('/content/Graph-Neural-Networks/data', split = 'val')
test = ZINC('/content/Graph-Neural-Networks/data', split = 'test')

train_loader = DataLoader(train,
                          batch_size = NB_GRAPHS_PER_BATCH,
                          shuffle = True)

val_loader = DataLoader(val,
                        batch_size = NB_GRAPHS_PER_BATCH,
                        shuffle = False)

test_loader = DataLoader(test,
                         batch_size = NB_GRAPHS_PER_BATCH,
                         shuffle = False)

print("Number of Batches in Train Loader :", len(train_loader))
print("Number of Batches in Val Loader :", len(val_loader))
print("Number of Batches in Test Loader :", len(test_loader))

Downloading https://www.dropbox.com/s/feo9qle74kg48gy/molecules.zip?dl=1
Extracting /content/Graph-Neural-Networks/data/molecules.zip
Downloading https://raw.githubusercontent.com/graphdeeplearning/benchmarking-gnns/master/data/molecules/train.index
Downloading https://raw.githubusercontent.com/graphdeeplearning/benchmarking-gnns/master/data/molecules/val.index
Downloading https://raw.githubusercontent.com/graphdeeplearning/benchmarking-gnns/master/data/molecules/test.index
Processing...
Processing train dataset: 100%|██████████| 220011/220011 [00:22<00:00, 9581.10it/s] 
Processing val dataset: 100%|██████████| 24445/24445 [00:04<00:00, 5602.23it/s] 
Processing test dataset: 100%|██████████| 5000/5000 [00:00<00:00, 9860.06it/s] 
Done!


Number of Batches in Train Loader : 3438
Number of Batches in Val Loader : 382
Number of Batches in Test Loader : 79


In [7]:
test[0]

Data(x=[24, 1], edge_index=[2, 52], edge_attr=[52], y=[1])

## Train and Test Functions 🚀

In [10]:
def train(train_loader, val_loader):
  for epoch in range(NUM_EPOCHS+1):

    model.train()
    tot_train_loss = 0.0

    for batch in train_loader:

      # Use GPU
      batch.to(device)

      # Set Gradient values to 0
      optimizer.zero_grad()

      pred, y = model(batch.x.float(), batch.edge_index, batch.edge_attr, batch.batch)

      # Compute Loss and Gradients
      loss = loss_function(pred, batch.y.view(-1, 1).float())
      loss.backward()
      tot_train_loss += loss.item()

      optimizer.step()

    average_train_loss = tot_train_loss / len(train_loader)

    model.eval()
    with torch.no_grad():
        tot_val_loss = 0.0

        for val_batch in val_loader:
            val_batch.to(device)

            val_pred, val_y = model(val_batch.x.float(), val_batch.edge_index, val_batch.edge_attr, val_batch.batch)
            val_loss = loss_function(val_pred, val_batch.y.view(-1, 1).float())

            tot_val_loss += val_loss.item()

        average_val_loss = tot_val_loss / len(val_loader)

    scheduler.step(average_val_loss)

    # if epoch % 10 == 0:
    print(f"Epoch {epoch} -> Train Loss: {average_train_loss:.4f} - Val Loss: {average_val_loss:.4f}")


def test(test_loader):
  model.eval()
  with torch.no_grad():
      tot_test_loss = 0.0

      for test_batch in test_loader:
          test_batch.to(device)

          test_pred, test_y = model(test_batch.x.float(), test_batch.edge_index, test_batch.edge_attr, test_batch.batch)
          test_loss = loss_function(test_pred, test_batch.y.view(-1, 1).float())

          tot_test_loss += test_loss.item()

      average_test_loss = tot_test_loss / len(test_loader)

  print(f"Test Loss: {average_test_loss:.4f}")

## Model Training and Evaluation ⚡



In [11]:
train(train_loader, val_loader)



Epoch 0 -> Train Loss: 1.1324 - Val Loss: 1.0270
Epoch 1 -> Train Loss: 1.0118 - Val Loss: 1.0104
Epoch 2 -> Train Loss: 0.9879 - Val Loss: 0.9783
Epoch 3 -> Train Loss: 0.9781 - Val Loss: 0.9651
Epoch 4 -> Train Loss: 0.9705 - Val Loss: 0.9526
Epoch 5 -> Train Loss: 0.9452 - Val Loss: 0.9302
Epoch 6 -> Train Loss: 0.9235 - Val Loss: 0.9272
Epoch 7 -> Train Loss: 0.9156 - Val Loss: 0.8973
Epoch 8 -> Train Loss: 0.9152 - Val Loss: 0.8986
Epoch 9 -> Train Loss: 0.9271 - Val Loss: 0.9648
Epoch 10 -> Train Loss: 0.9186 - Val Loss: 0.9027
Epoch 11 -> Train Loss: 0.8889 - Val Loss: 0.8770
Epoch 12 -> Train Loss: 0.8858 - Val Loss: 0.8740
Epoch 13 -> Train Loss: 0.9238 - Val Loss: 0.8883
Epoch 14 -> Train Loss: 0.8814 - Val Loss: 0.8714
Epoch 15 -> Train Loss: 0.8856 - Val Loss: 0.8695
Epoch 16 -> Train Loss: 0.8771 - Val Loss: 0.8721
Epoch 17 -> Train Loss: 0.8767 - Val Loss: 0.8717
Epoch 18 -> Train Loss: 0.8806 - Val Loss: 0.9316
Epoch 19 -> Train Loss: 0.8772 - Val Loss: 0.8671
Epoch 20 -

In [12]:
test(test_loader)

Test Loss: 0.8571
