<a href="https://colab.research.google.com/github/DataScienceSolutions/GNN/blob/main/Intro_to_graph_neural_networks_(ML_Tech_Talks).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Intro to graph neural networks (ML Tech Talks)

https://www.youtube.com/watch?v=8owQBFAHw7E&t=1937s

In [1]:
!pip install numpy==1.18.5
!pip install tensorflow==2.3.0
!pip install spektral==0.6.2

import numpy as np
import tensorflow as tf
import spektral



In [2]:
# Runtime / Change runtime type / Hardware = GPU
tf.test.gpu_device_name()

'/device:GPU:0'

In [3]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 17734619567826574528, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 5503377676249660966
 physical_device_desc: "device: XLA_CPU device", name: "/device:XLA_GPU:0"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 13085154099502636814
 physical_device_desc: "device: XLA_GPU device", name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 14648653952
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 9936629069816933398
 physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"]

In [4]:
adj, features, labels, train_mask, val_mask, test_mask = spektral.datasets.citation.load_data(dataset_name='cora')

features = features.todense()
adj = adj.todense() + np.eye(adj.shape[0])
features = features.astype('float32')
adj = adj.astype('float32')

print(features.shape)
print(adj.shape)
print(labels.shape)

print(np.sum(train_mask))
print(np.sum(val_mask))
print(np.sum(test_mask))

Loading cora dataset
Pre-processing node features
(2708, 1433)
(2708, 2708)
(2708, 7)
140
500
1000


In [5]:
def masked_softmax_cross_entropy(logits, labels, mask):
  loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
  mask = tf.cast(mask, dtype=tf.float32)
  mask /= tf.reduce_mean(mask)
  loss *= mask
  return tf.reduce_mean(loss)

def masked_accuracy(logits, labels, mask):
  correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
  accuracy_all = tf.cast(correct_prediction, tf.float32)
  mask = tf.cast(mask, dtype=tf.float32)
  mask /= tf.reduce_mean(mask)
  accuracy_all *= mask
  return tf.reduce_mean(accuracy_all)

In [6]:
def gnn(fts, adj, transform, activation):
  seq_fts = transform(fts)
  ret_fts = tf.matmul(adj, seq_fts)
  return activation(ret_fts)

In [7]:
def train_cora(fts, adj, gnn_fn, units, epochs, lr):
  lyr_1 = tf.keras.layers.Dense(units)
  lyr_2 = tf.keras.layers.Dense(7)

  def cora_gnn(fts, adj):
    hidden = gnn_fn(fts, adj, lyr_1, tf.nn.relu)
    logits = gnn_fn(hidden, adj, lyr_2, tf.identity)
    return logits

  optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

  best_accuracy = 0.0
  for ep in range(epochs +1):
    with tf.GradientTape() as t:
      logits = cora_gnn(fts, adj)
      loss = masked_softmax_cross_entropy(logits, labels, train_mask)

    variables = t.watched_variables()
    grads = t.gradient(loss, variables)
    optimizer.apply_gradients(zip(grads, variables))

    logits = cora_gnn(fts, adj)
    val_accuracy = masked_accuracy(logits, labels, val_mask)
    test_accuracy = masked_accuracy(logits, labels, test_mask)
    
    if val_accuracy > best_accuracy:
      best_accuracy = val_accuracy
      print('Epoch', ep, '| Training loss:', loss.numpy(), '| Val accuracy:', val_accuracy.numpy(), '|Test accuracy:', test_accuracy.numpy())


In [8]:
# sum-pooling
train_cora(features, adj, gnn, 32, 200, 0.01)

Epoch 0 | Training loss: 2.0505333 | Val accuracy: 0.328 |Test accuracy: 0.31799996
Epoch 2 | Training loss: 1.6232643 | Val accuracy: 0.6279999 |Test accuracy: 0.65599996
Epoch 3 | Training loss: 1.4150956 | Val accuracy: 0.66199994 |Test accuracy: 0.6769999
Epoch 4 | Training loss: 1.2940761 | Val accuracy: 0.682 |Test accuracy: 0.69799995
Epoch 6 | Training loss: 1.1062359 | Val accuracy: 0.69 |Test accuracy: 0.7299999
Epoch 7 | Training loss: 1.0041367 | Val accuracy: 0.72199994 |Test accuracy: 0.7469999
Epoch 20 | Training loss: 0.35307795 | Val accuracy: 0.7239999 |Test accuracy: 0.75299996
Epoch 21 | Training loss: 0.32622668 | Val accuracy: 0.72599995 |Test accuracy: 0.75399995
Epoch 22 | Training loss: 0.3021875 | Val accuracy: 0.73199993 |Test accuracy: 0.74799997
Epoch 24 | Training loss: 0.2591448 | Val accuracy: 0.73599994 |Test accuracy: 0.74399996
Epoch 25 | Training loss: 0.23997287 | Val accuracy: 0.73999995 |Test accuracy: 0.74499995
Epoch 26 | Training loss: 0.222343

In [9]:
# NN w/o graph structure (identity for adjacency matrix)
train_cora(features, tf.eye(adj.shape[0]), gnn, 32, 200, 0.01)

Epoch 0 | Training loss: 1.9453545 | Val accuracy: 0.16799998 |Test accuracy: 0.15799998
Epoch 1 | Training loss: 1.9287205 | Val accuracy: 0.29799998 |Test accuracy: 0.26999998
Epoch 2 | Training loss: 1.9065403 | Val accuracy: 0.434 |Test accuracy: 0.41099992
Epoch 3 | Training loss: 1.8791271 | Val accuracy: 0.46199998 |Test accuracy: 0.44899994
Epoch 4 | Training loss: 1.8481181 | Val accuracy: 0.48999998 |Test accuracy: 0.47699994
Epoch 5 | Training loss: 1.8145783 | Val accuracy: 0.51 |Test accuracy: 0.49799994
Epoch 16 | Training loss: 1.2710665 | Val accuracy: 0.51399994 |Test accuracy: 0.49199992
Epoch 23 | Training loss: 0.8303304 | Val accuracy: 0.516 |Test accuracy: 0.50999993
Epoch 24 | Training loss: 0.769275 | Val accuracy: 0.5199999 |Test accuracy: 0.51199996
Epoch 25 | Training loss: 0.7098981 | Val accuracy: 0.52199996 |Test accuracy: 0.5109999
Epoch 27 | Training loss: 0.5975907 | Val accuracy: 0.52799994 |Test accuracy: 0.5149999
Epoch 41 | Training loss: 0.13554528

In [10]:
# mean-pooling = normalize using degree matrix
deg = tf.reduce_sum(adj, axis=-1)
train_cora(features, adj / deg, gnn, 32, 200, 0.01)

Epoch 0 | Training loss: 1.946705 | Val accuracy: 0.17199999 |Test accuracy: 0.20499998
Epoch 1 | Training loss: 1.9353514 | Val accuracy: 0.18 |Test accuracy: 0.212
Epoch 2 | Training loss: 1.922523 | Val accuracy: 0.18599999 |Test accuracy: 0.22099999
Epoch 3 | Training loss: 1.906187 | Val accuracy: 0.18800001 |Test accuracy: 0.226
Epoch 4 | Training loss: 1.8877115 | Val accuracy: 0.19599998 |Test accuracy: 0.22999999
Epoch 5 | Training loss: 1.8668983 | Val accuracy: 0.21 |Test accuracy: 0.247
Epoch 6 | Training loss: 1.845751 | Val accuracy: 0.22399998 |Test accuracy: 0.27299994
Epoch 7 | Training loss: 1.8227019 | Val accuracy: 0.26999998 |Test accuracy: 0.311
Epoch 8 | Training loss: 1.796603 | Val accuracy: 0.32999998 |Test accuracy: 0.36599997
Epoch 9 | Training loss: 1.7679597 | Val accuracy: 0.39399996 |Test accuracy: 0.42599997
Epoch 10 | Training loss: 1.7384866 | Val accuracy: 0.48799998 |Test accuracy: 0.501
Epoch 11 | Training loss: 1.7077496 | Val accuracy: 0.55599993

In [13]:
# symmetric normalization
# GCN (Kipf & Welling, ICLR 2017)
norm_deg = tf.linalg.diag(1.0 / tf.sqrt(deg))
norm_adj = tf.matmul(norm_deg, tf.matmul(adj, norm_deg))
train_cora(features, norm_adj, gnn, 32, 200, 0.01)

Epoch 0 | Training loss: 1.9460517 | Val accuracy: 0.22599998 |Test accuracy: 0.23199996
Epoch 2 | Training loss: 1.924489 | Val accuracy: 0.28799996 |Test accuracy: 0.27799997
Epoch 3 | Training loss: 1.9086853 | Val accuracy: 0.306 |Test accuracy: 0.31799996
Epoch 5 | Training loss: 1.8730012 | Val accuracy: 0.32599998 |Test accuracy: 0.34499997
Epoch 6 | Training loss: 1.852701 | Val accuracy: 0.37399998 |Test accuracy: 0.39299995
Epoch 7 | Training loss: 1.8306578 | Val accuracy: 0.42799997 |Test accuracy: 0.42999998
Epoch 8 | Training loss: 1.8070008 | Val accuracy: 0.458 |Test accuracy: 0.47700003
Epoch 9 | Training loss: 1.7813257 | Val accuracy: 0.49999994 |Test accuracy: 0.51699996
Epoch 10 | Training loss: 1.7534341 | Val accuracy: 0.532 |Test accuracy: 0.5679999
Epoch 11 | Training loss: 1.7238009 | Val accuracy: 0.58199996 |Test accuracy: 0.6179999
Epoch 12 | Training loss: 1.6926037 | Val accuracy: 0.626 |Test accuracy: 0.659
Epoch 13 | Training loss: 1.6593904 | Val accur