<a href="https://colab.research.google.com/github/AslantheAslan/GNN/blob/main/GNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install numpy
#!pip install tensorflow==2.3.0 
#!pip install spektral==0.6.2

import numpy as np
import tensorflow as tf
import spektral
print(tf.__version__)
print(spektral.__version__)

Collecting spektral==0.6.2
  Downloading spektral-0.6.2-py3-none-any.whl (95 kB)
[?25l[K     |███▍                            | 10 kB 15.3 MB/s eta 0:00:01[K     |██████▉                         | 20 kB 19.5 MB/s eta 0:00:01[K     |██████████▎                     | 30 kB 22.1 MB/s eta 0:00:01[K     |█████████████▊                  | 40 kB 12.7 MB/s eta 0:00:01[K     |█████████████████▏              | 51 kB 10.1 MB/s eta 0:00:01[K     |████████████████████▋           | 61 kB 9.8 MB/s eta 0:00:01[K     |████████████████████████        | 71 kB 7.5 MB/s eta 0:00:01[K     |███████████████████████████▌    | 81 kB 8.2 MB/s eta 0:00:01[K     |███████████████████████████████ | 92 kB 7.9 MB/s eta 0:00:01[K     |████████████████████████████████| 95 kB 2.7 MB/s 
Installing collected packages: spektral
Successfully installed spektral-0.6.2
2.7.0
0.6.2


In [None]:
print("Aslan Hacı Ismail is the owner of this colab notebook")

adj, features, labels, train_mask, val_mask, test_mask = spektral.datasets.citation.load_data(dataset_name='cora')
#it works for 'cora', 'pubmed', and 'citeseer'
features = features.todense()
adj = adj.todense() + np.eye(adj.shape[0])
features = features.astype('float32')
adj = adj.astype('float32')

print(features.shape)
print(adj.shape)
print(labels.shape)

print(np.sum(train_mask))
print(np.sum(val_mask))
print(np.sum(test_mask))

Aslan Hacı Ismail is the owner of this colab notebook
Loading cora dataset
Pre-processing node features
(2708, 1433)
(2708, 2708)
(2708, 7)
140
500
1000


In [None]:
print(adj)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 1. ... 0. 0. 0.]
 [0. 1. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 1.]
 [0. 0. 0. ... 0. 1. 1.]]


In [None]:
def masked_softmax_cross_entropy(logits, labels, mask):
  loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
  mask = tf.cast(mask, dtype = tf.float32)
  mask /= tf.reduce_mean(mask)
  loss *= mask
  return tf.reduce_mean(loss)

def masked_accuracy(logits, labels, mask):
  correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
  accuracy_all = tf.cast(correct_prediction, tf.float32)
  mask = tf.cast(mask, dtype=tf.float32)
  mask /= tf.reduce_mean(mask)
  accuracy_all *= mask
  return tf.reduce_mean(accuracy_all)

In [None]:
def gnn(fts, adj, transform, activation):
  seq_fts = transform(fts)
  ret_fts = tf.matmul(adj, seq_fts)
  return activation(ret_fts)

In [None]:
def train_cora(fts, adj, gnn_fn, units, epochs, lr):
  lyr_1 = tf.keras.layers.Dense(units)
  lyr_2 = tf.keras.layers.Dense(7)

  def cora_gnn(fts, adj):
    hidden = gnn_fn(fts, adj, lyr_1, tf.nn.relu)
    logits = gnn_fn(hidden, adj, lyr_2, tf.identity)
    return logits

  optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

  best_accuracy = 0.0
  for ep in range(epochs + 1):
    with tf.GradientTape() as t:
      logits = cora_gnn(fts, adj)
      loss = masked_softmax_cross_entropy(logits, labels, train_mask)

    variables = t.watched_variables()
    grads = t.gradient(loss, variables)
    optimizer.apply_gradients(zip(grads, variables))

    logits = cora_gnn(fts, adj)
    val_accuracy = masked_accuracy(logits, labels, val_mask)
    test_accuracy = masked_accuracy(logits, labels, test_mask)

    if val_accuracy > best_accuracy:
      best_accuracy = val_accuracy
      print('Epoch', ep, '| Training loss:', loss.numpy(), '| Val accuracy:', val_accuracy.numpy(), '| Test accuracy:', test_accuracy.numpy())



In [None]:
train_cora(features, adj, gnn, 32, 200, 0.01)
#produces the maximum val accuracy of 79.4 and test accuracy of 77.2 for 'pubmed' dataset.
#produces the maximum val accuracy of 66.8 and test accuracy of 65.7 for 'citeseer' dataset.

Epoch 0 | Training loss: 2.045733 | Val accuracy: 0.188 | Test accuracy: 0.18499999
Epoch 2 | Training loss: 1.6977042 | Val accuracy: 0.484 | Test accuracy: 0.527
Epoch 3 | Training loss: 1.4164035 | Val accuracy: 0.67 | Test accuracy: 0.703
Epoch 4 | Training loss: 1.2301356 | Val accuracy: 0.684 | Test accuracy: 0.677
Epoch 6 | Training loss: 1.0825502 | Val accuracy: 0.69 | Test accuracy: 0.682
Epoch 7 | Training loss: 0.9989937 | Val accuracy: 0.72999996 | Test accuracy: 0.729
Epoch 8 | Training loss: 0.9167713 | Val accuracy: 0.742 | Test accuracy: 0.74999994
Epoch 9 | Training loss: 0.84682935 | Val accuracy: 0.76000005 | Test accuracy: 0.766
Epoch 10 | Training loss: 0.7806585 | Val accuracy: 0.77599996 | Test accuracy: 0.7859999


In [None]:
train_cora(features, tf.eye(adj.shape[0]), gnn, 32, 200, 0.01)
#the same algorithm produces the maximum val accuracy of 69.0 and test accuracy of 70.49 for 'pubmed' dataset.
#produces the maximum val accuracy of 50.6 and test accuracy of 52.7 for 'citeseer' dataset.


Epoch 0 | Training loss: 1.9462022 | Val accuracy: 0.222 | Test accuracy: 0.183
Epoch 1 | Training loss: 1.9308687 | Val accuracy: 0.28599998 | Test accuracy: 0.3
Epoch 2 | Training loss: 1.9107335 | Val accuracy: 0.468 | Test accuracy: 0.44599998
Epoch 3 | Training loss: 1.8855045 | Val accuracy: 0.508 | Test accuracy: 0.49899998
Epoch 4 | Training loss: 1.85642 | Val accuracy: 0.53 | Test accuracy: 0.48799998
Epoch 5 | Training loss: 1.8248341 | Val accuracy: 0.536 | Test accuracy: 0.49299997
Epoch 6 | Training loss: 1.7902638 | Val accuracy: 0.552 | Test accuracy: 0.49699998


In [None]:
deg = tf.reduce_sum(adj, axis=-1)
train_cora(features, adj / deg, gnn, 32, 200, 0.01)
#the same algorithm produces the maximum val accuracy of 76.0 and test accuracy of 75.4 for 'pubmed' dataset.
#produces the maximum val accuracy of 68.4 and test accuracy of 66.5 for 'citeseer' dataset.

Epoch 0 | Training loss: 1.9459122 | Val accuracy: 0.121999994 | Test accuracy: 0.13
Epoch 4 | Training loss: 1.8753542 | Val accuracy: 0.124 | Test accuracy: 0.149
Epoch 5 | Training loss: 1.8528135 | Val accuracy: 0.14999999 | Test accuracy: 0.17499998
Epoch 6 | Training loss: 1.8286971 | Val accuracy: 0.17999999 | Test accuracy: 0.21499999
Epoch 7 | Training loss: 1.8020357 | Val accuracy: 0.20799999 | Test accuracy: 0.24199998
Epoch 8 | Training loss: 1.7736503 | Val accuracy: 0.232 | Test accuracy: 0.264
Epoch 9 | Training loss: 1.7427189 | Val accuracy: 0.254 | Test accuracy: 0.298
Epoch 10 | Training loss: 1.7098124 | Val accuracy: 0.302 | Test accuracy: 0.322
Epoch 11 | Training loss: 1.6757282 | Val accuracy: 0.35399997 | Test accuracy: 0.369
Epoch 12 | Training loss: 1.6404551 | Val accuracy: 0.40399998 | Test accuracy: 0.41599998
Epoch 13 | Training loss: 1.6035055 | Val accuracy: 0.452 | Test accuracy: 0.47599998
Epoch 14 | Training loss: 1.5647258 | Val accuracy: 0.516 | T

In [107]:
norm_deg = tf.linalg.diag(1.0 / tf.sqrt(deg))
norm_adj = tf.matmul(norm_deg, tf.matmul(adj, norm_deg))
train_cora(features, norm_adj, gnn, 32, 200, 0.01)
#the same algorithm produces the maximum val accuracy of 78.2 and test accuracy of 76.3 for 'pubmed' dataset.
#produces the maximum val accuracy of 68.2 and test accuracy of 67.7 for 'citeseer' dataset.

Epoch 0 | Training loss: 1.9459555 | Val accuracy: 0.19999999 | Test accuracy: 0.22599998
Epoch 3 | Training loss: 1.9009651 | Val accuracy: 0.20199999 | Test accuracy: 0.22799999
Epoch 4 | Training loss: 1.88024 | Val accuracy: 0.26 | Test accuracy: 0.29299998
Epoch 5 | Training loss: 1.858334 | Val accuracy: 0.334 | Test accuracy: 0.35500002
Epoch 6 | Training loss: 1.8346263 | Val accuracy: 0.38399997 | Test accuracy: 0.41400003
Epoch 7 | Training loss: 1.8088305 | Val accuracy: 0.438 | Test accuracy: 0.466
Epoch 8 | Training loss: 1.7813953 | Val accuracy: 0.50200003 | Test accuracy: 0.511
Epoch 9 | Training loss: 1.7522472 | Val accuracy: 0.53800005 | Test accuracy: 0.559
Epoch 10 | Training loss: 1.7215056 | Val accuracy: 0.58199996 | Test accuracy: 0.60499996
Epoch 11 | Training loss: 1.6888312 | Val accuracy: 0.612 | Test accuracy: 0.636
Epoch 12 | Training loss: 1.6542604 | Val accuracy: 0.636 | Test accuracy: 0.662
Epoch 13 | Training loss: 1.6179273 | Val accuracy: 0.664 | T

In [104]:
def normalize(data):
    # Written from scratch by Aslan
    def minmax2D(data):
      best_min=9999999
      best_max=-9999999
      for i in range(len(data)):
        for j in range(len(data[0])):
          temp_min = min(data[i,j],best_min)
          temp_max = max(data[i,j],best_max)
          if temp_min < best_min:
            best_min = temp_min
          if temp_max > best_max:
            best_max = temp_max
      return [best_min, best_max]

    normalized_fts = (data - minmax2D(data)[0]) / (minmax2D(data)[1] - minmax2D(data)[0])
    return normalized_fts

norm_fts = normalize(features)

In [105]:
# test the normalizer function
import numpy as np

random_data = np.random.randint(1, 100, size=(4,5))
print(random_data)
print(normalize(random_data))

[[83 85 76 27 19]
 [24 39 49 90 64]
 [25 11 25 28 26]
 [44 65 64 26 28]]
[[0.91139241 0.93670886 0.82278481 0.20253165 0.10126582]
 [0.16455696 0.35443038 0.48101266 1.         0.67088608]
 [0.17721519 0.         0.17721519 0.21518987 0.18987342]
 [0.41772152 0.6835443  0.67088608 0.18987342 0.21518987]]


In [108]:
train_cora(norm_fts, norm_adj, gnn, 32, 200, 0.01)
# after the normalization of features, test accuracy hasn't changed significantly. It is because the feature matrix only consist of 1's and 0's

Epoch 0 | Training loss: 1.955019 | Val accuracy: 0.52000004 | Test accuracy: 0.548
Epoch 1 | Training loss: 1.7771242 | Val accuracy: 0.592 | Test accuracy: 0.642
Epoch 3 | Training loss: 1.3254207 | Val accuracy: 0.602 | Test accuracy: 0.63500005
Epoch 4 | Training loss: 1.1023406 | Val accuracy: 0.644 | Test accuracy: 0.66400003
Epoch 5 | Training loss: 0.9003641 | Val accuracy: 0.69600004 | Test accuracy: 0.714
Epoch 6 | Training loss: 0.720656 | Val accuracy: 0.72999996 | Test accuracy: 0.745
Epoch 7 | Training loss: 0.5659827 | Val accuracy: 0.736 | Test accuracy: 0.7689999
Epoch 8 | Training loss: 0.4380961 | Val accuracy: 0.774 | Test accuracy: 0.7889999
Epoch 11 | Training loss: 0.19331741 | Val accuracy: 0.77599996 | Test accuracy: 0.8109998
Epoch 12 | Training loss: 0.14655073 | Val accuracy: 0.7819999 | Test accuracy: 0.80899984
Epoch 13 | Training loss: 0.11129642 | Val accuracy: 0.784 | Test accuracy: 0.80899984
Epoch 14 | Training loss: 0.08471244 | Val accuracy: 0.78800