[Intro to graph neural networks (ML Tech Talks)](https://www.youtube.com/watch?v=8owQBFAHw7E)

In [1]:
!pip install numpy  # ==1.18.5
!pip install tensorflow==2.3.0
!pip install spektral==0.6.2

import numpy as np

np.float = float  # module 'numpy' has no attribute 'float'
np.int = int  # module 'numpy' has no attribute 'int'
np.bool = bool  # module 'numpy' has no attribute 'bool'
np.object = object  # module 'numpy' has no attribute 'object'

import tensorflow as tf
import spektral

[38;2;247;104;31m[39m
[38;2;251;92;40m [39m[31m[38;2;252;88;43m [39m[38;2;253;84;46m▄[39m[38;2;253;80;50m█[39m[38;2;254;76;53m▀[39m[38;2;254;72;56m [39m[38;2;254;68;60m█[39m[38;2;254;64;64m [39m[38;2;254;61;67m█[39m[38;2;254;57;71m [39m[38;2;254;54;75m▀[39m[38;2;253;50;79m█[39m[38;2;253;47;83m▄[39m[38;2;252;44;87m [39m[38;2;251;41;91m [39m[38;2;250;38;95m [39m[32m[38;2;249;35;99m [39m[38;2;248;32;103m▄[39m[38;2;246;29;107m█[39m[38;2;245;26;111m▀[39m[38;2;243;24;116m [39m[38;2;241;22;120m█[39m[38;2;239;19;124m [39m[38;2;237;17;128m█[39m[38;2;235;15;132m [39m[38;2;233;13;137m▀[39m[38;2;230;11;141m█[39m[38;2;228;10;145m▄[39m[38;2;225;8;149m [39m[38;2;222;7;153m [39m[38;2;219;6;158m [39m[33m[38;2;216;4;162m [39m[38;2;213;3;166m▄[39m[38;2;210;3;170m█[39m[38;2;207;2;174m▀[39m[38;2;203;1;178m [39m[38;2;200;1;182m█[39m[38;2;197;1;185m [39m[38;2;193;1;189m█[39m[38;2;189;1;193m [39m[38;2;186;1;196m▀[39m[38;2;

2023-12-18 14:10:14.186593: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-18 14:10:14.188317: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-18 14:10:14.216848: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-18 14:10:14.217462: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# load the cora dataset, preprocess and give access to these pieces of information
adj, features, labels, train_mask, val_mask, test_mask = spektral.datasets.citation.load_data(dataset_name='cora')

features = features.todense()  # comes as sparse information, so casting to a denser representation
adj = adj.todense() + np.eye(adj.shape[0])  # adjacency matrix comes withou self-edges, adding identity matrix to it
features = features.astype('float32')
adj = adj.astype('float32')

print(features.shape)
print(adj.shape)
print(labels.shape)

print(np.sum(train_mask))
print(np.sum(val_mask))
print(np.sum(test_mask))

Loading cora dataset
Pre-processing node features
(2708, 1433)
(2708, 2708)
(2708, 7)
140
500
1000


In [14]:
def masked_softmax_cross_entropy(logits, labels, mask):
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)

def masked_accuracy(logits, labels, mask):
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)

In [8]:
def gnn(fts, adj, transform, activation):
    seq_fts = transform(fts)
    ret_fts = tf.matmul(adj, seq_fts)
    return activation(ret_fts)

In [5]:
def train_cora(fts, adj, gnn_fn, units, epochs, lr):
    lyr_1 = tf.keras.layers.Dense(units)  # computes the hidden layer
    lyr_2 = tf.keras.layers.Dense(7)  # computas the classification for each node -> 7 outputs for 7 classes

    def cora_gnn(fts, adj):
        hidden = gnn_fn(fts, adj, lyr_1, tf.nn.relu)  # computes the hidden features in every node
        logits = gnn_fn(hidden, adj, lyr_2, tf.identity)  # neural networks predictions
        return logits
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)  #standard optimizer

    best_accuracy = 0.0
    for ep in range(epochs + 1):
        with tf.GradientTape() as t:  # tf.GradientTape() to record the gradients
            logits = cora_gnn(fts, adj)  # compute predictions
            loss = masked_softmax_cross_entropy(logits, labels, train_mask)  # compute the loss (training set)

        variables = t.watched_variables()
        grads = t.gradient(loss, variables)
        optimizer.apply_gradients(zip(grads, variables))

        logits = cora_gnn(fts, adj)
        val_accuracy = masked_accuracy(logits, labels, val_mask)
        test_accuracy = masked_accuracy(logits, labels, test_mask)

        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            print('Epoch', ep, '| Training loss:', loss.numpy(), '| Val accuracy:',
                val_accuracy.numpy(), '| Test accuracy:', test_accuracy.numpy())


In [16]:
train_cora(features, adj, gnn, 32, 200, 0.01)  # standard parameters
# passing raw adjacency matrix which means we'll be multiplying our features with just a 0 1 matrix
# therefore we're implementing sumpooling. expected problems with the scale of the features
# as a result it might not give us the best result possible

Epoch 0 | Training loss: 1.993462 | Val accuracy: 0.116 | Test accuracy: 0.151
Epoch 1 | Training loss: 1.808962 | Val accuracy: 0.17599998 | Test accuracy: 0.171
Epoch 2 | Training loss: 2.025656 | Val accuracy: 0.328 | Test accuracy: 0.33599997
Epoch 3 | Training loss: 1.5937511 | Val accuracy: 0.616 | Test accuracy: 0.613
Epoch 8 | Training loss: 1.0398486 | Val accuracy: 0.662 | Test accuracy: 0.654
Epoch 9 | Training loss: 0.95276517 | Val accuracy: 0.688 | Test accuracy: 0.69200003
Epoch 14 | Training loss: 0.6364694 | Val accuracy: 0.696 | Test accuracy: 0.709
Epoch 18 | Training loss: 0.4498574 | Val accuracy: 0.72 | Test accuracy: 0.739
Epoch 19 | Training loss: 0.40629724 | Val accuracy: 0.74399996 | Test accuracy: 0.75399995
Epoch 25 | Training loss: 0.24033621 | Val accuracy: 0.746 | Test accuracy: 0.76800007
Epoch 26 | Training loss: 0.22193703 | Val accuracy: 0.74799997 | Test accuracy: 0.767


In [17]:
train_cora(features, tf.eye(adj.shape[0]), gnn, 32, 200, 0.01)  # replacing the adjacency matrix with the identity matrix
# it will render the operation of multiplying with the adjacency matrix not changing anything
# so we just have basically a pointwise classifier in each of our nodes
# so a standard MLP Model that's shared across the vertices

Epoch 0 | Training loss: 1.9457034 | Val accuracy: 0.18 | Test accuracy: 0.173
Epoch 1 | Training loss: 1.9319072 | Val accuracy: 0.344 | Test accuracy: 0.344
Epoch 2 | Training loss: 1.9124532 | Val accuracy: 0.41199997 | Test accuracy: 0.43899995
Epoch 3 | Training loss: 1.887785 | Val accuracy: 0.41399997 | Test accuracy: 0.45499995
Epoch 4 | Training loss: 1.8597511 | Val accuracy: 0.44599998 | Test accuracy: 0.46199998
Epoch 5 | Training loss: 1.829275 | Val accuracy: 0.44799998 | Test accuracy: 0.47799993
Epoch 7 | Training loss: 1.7591162 | Val accuracy: 0.45999998 | Test accuracy: 0.48699996
Epoch 8 | Training loss: 1.7194053 | Val accuracy: 0.466 | Test accuracy: 0.49599993
Epoch 26 | Training loss: 0.7053366 | Val accuracy: 0.468 | Test accuracy: 0.48899996
Epoch 27 | Training loss: 0.6496189 | Val accuracy: 0.474 | Test accuracy: 0.49799994
Epoch 28 | Training loss: 0.596165 | Val accuracy: 0.476 | Test accuracy: 0.50899994
Epoch 29 | Training loss: 0.54521954 | Val accuracy

In [18]:
# meanpooling
deg = tf.reduce_sum(adj, axis=-1)  # degree matrix - the degree of each node
train_cora(features, adj / deg, gnn, 32, 200, 0.01)  # adj / deg = covalent to multiplying it with the inverse of the degree matrix
# this will give us a normalized propagation rule - which should hopefully deal with any exploding signal

Epoch 0 | Training loss: 1.9453741 | Val accuracy: 0.21 | Test accuracy: 0.21599999
Epoch 1 | Training loss: 1.932291 | Val accuracy: 0.29 | Test accuracy: 0.296
Epoch 5 | Training loss: 1.857199 | Val accuracy: 0.292 | Test accuracy: 0.295
Epoch 6 | Training loss: 1.8349357 | Val accuracy: 0.30199996 | Test accuracy: 0.306
Epoch 7 | Training loss: 1.8112036 | Val accuracy: 0.31799996 | Test accuracy: 0.32299998
Epoch 8 | Training loss: 1.7855774 | Val accuracy: 0.33799997 | Test accuracy: 0.342
Epoch 9 | Training loss: 1.7578923 | Val accuracy: 0.34999996 | Test accuracy: 0.361
Epoch 10 | Training loss: 1.7279224 | Val accuracy: 0.36399996 | Test accuracy: 0.37999997
Epoch 11 | Training loss: 1.6961602 | Val accuracy: 0.38799998 | Test accuracy: 0.40999997
Epoch 12 | Training loss: 1.6629497 | Val accuracy: 0.42999998 | Test accuracy: 0.44199997
Epoch 13 | Training loss: 1.6279391 | Val accuracy: 0.472 | Test accuracy: 0.48999998
Epoch 14 | Training loss: 1.59126 | Val accuracy: 0.525

In [19]:
# specific version of the normalization that Thomas Kipf has proposed in the
# Graph Convolutional Network Model 1/sqrt(deg) and then multiply that on the both sides of the adjacency matrix
# so you can get a normalized adjacency matrix
norm_deg = tf.linalg.diag(1.0 / tf.sqrt(deg))
norm_adj = tf.matmul(norm_deg, tf.matmul(adj, norm_deg)) # equivalent to D^{-1/2} and then multiplying the adjacency matris on both sides
train_cora(features, norm_adj, gnn, 32, 200, 0.01)

Epoch 0 | Training loss: 1.9459133 | Val accuracy: 0.264 | Test accuracy: 0.267
Epoch 1 | Training loss: 1.9348077 | Val accuracy: 0.316 | Test accuracy: 0.33599997
Epoch 2 | Training loss: 1.9229347 | Val accuracy: 0.35999998 | Test accuracy: 0.36599997
Epoch 3 | Training loss: 1.9074231 | Val accuracy: 0.38599998 | Test accuracy: 0.39299995
Epoch 4 | Training loss: 1.8895105 | Val accuracy: 0.40199998 | Test accuracy: 0.41199997
Epoch 5 | Training loss: 1.8695261 | Val accuracy: 0.468 | Test accuracy: 0.47699994
Epoch 6 | Training loss: 1.8486681 | Val accuracy: 0.53 | Test accuracy: 0.54399997
Epoch 7 | Training loss: 1.8265046 | Val accuracy: 0.57 | Test accuracy: 0.596
Epoch 8 | Training loss: 1.8020225 | Val accuracy: 0.594 | Test accuracy: 0.621
Epoch 9 | Training loss: 1.7751367 | Val accuracy: 0.60599995 | Test accuracy: 0.63500005
Epoch 10 | Training loss: 1.7464532 | Val accuracy: 0.62 | Test accuracy: 0.641
Epoch 11 | Training loss: 1.7164586 | Val accuracy: 0.63 | Test acc