# GNN on CORA dataset

In [16]:
import numpy as np
import tensorflow as tf 
import spektral 

In [17]:
cora_dataset = spektral.datasets.citation.Citation(name='cora')
test_mask = cora_dataset.mask_te
train_mask = cora_dataset.mask_tr
val_mask = cora_dataset.mask_va
graph = cora_dataset.graphs[0]
features = graph.x
adj = graph.a
labels = graph.y

#adj, features, labels, train_mask, val_mask, test_mask = spektral.datasets.citation.load_data(dataset_name = 'cora')
# Only convert to dense if it's a sparse matrix
if hasattr(features, 'todense'):
    features = features.todense()
if hasattr(adj, 'todense'):
    adj = adj.todense()

# Add self-connections and cast types
adj = adj + np.eye(adj.shape[0])
features = features.astype('float32')
adj = adj.astype('float32')


print(features.shape)
print(adj.shape)
print(labels.shape)

print(np.sum(train_mask))
print(np.sum(val_mask))
print(np.sum(test_mask))

(2708, 1433)
(2708, 2708)
(2708, 7)
140
500
1000


Archive  
features = features.todense()
adj = adj.todense() + np.eye(adj.shape[0])
features = features.astype('float32')
adj = adj.astype('float32')

In [22]:
def masked_softmax_cross_entropy(logits, labels, mask):
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)

def masked_accuracy(logits, labels, mask):
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)

In [23]:
def gnn(fits, adj, transform, activation):
    seq_fits = transform(fits)
    ret_fits = tf.matmul(adj, seq_fits)
    return activation(ret_fits)

In [26]:
def train_cora(fts, adj, gnn_fn, units, epochs, lr):
    lyr_1 = tf.keras.layers.Dense(units)
    lyr_2 = tf.keras.layers.Dense(7)

    def cora_gnn(fts, adj):
        hidden = gnn_fn(fts, adj, lyr_1, tf.nn.relu)
        logits = gnn_fn(hidden, adj, lyr_2, tf.identity)
        return logits
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    best_accuracy = 0.0
    for ep in range(epochs + 1):
        with tf.GradientTape() as t:
            logits = cora_gnn(fts, adj)
            loss = masked_softmax_cross_entropy(logits, labels, train_mask)
        
        variables = t.watched_variables()
        grads = t.gradient(loss, variables)
        optimizer.apply_gradients(zip(grads, variables))

        logits = cora_gnn(fts, adj)
        val_accuracy = masked_accuracy(logits, labels, val_mask)
        test_accuracy = masked_accuracy(logits, labels, test_mask)

        if val_accuracy > best_accuracy: 
            best_accuracy = val_accuracy
            print('Epoch', ep, '|Training loss: ', loss.numpy(), '|Val accuracy: ', val_accuracy.numpy(), 'Test accuracy:', test_accuracy.numpy())

In [27]:
train_cora(features, adj, gnn, 32, 200, 0.01)

Epoch 0 |Training loss:  3.9985986 |Val accuracy:  0.22999999 Test accuracy: 0.236
Epoch 1 |Training loss:  4.459577 |Val accuracy:  0.35999998 Test accuracy: 0.38500002
Epoch 2 |Training loss:  2.972389 |Val accuracy:  0.54999995 Test accuracy: 0.58500004
Epoch 3 |Training loss:  1.2214642 |Val accuracy:  0.57 Test accuracy: 0.577
Epoch 4 |Training loss:  1.0482813 |Val accuracy:  0.59 Test accuracy: 0.609
Epoch 5 |Training loss:  0.75445527 |Val accuracy:  0.678 Test accuracy: 0.71000004
Epoch 8 |Training loss:  0.31895858 |Val accuracy:  0.682 Test accuracy: 0.73499995
Epoch 9 |Training loss:  0.260948 |Val accuracy:  0.684 Test accuracy: 0.74499995
Epoch 10 |Training loss:  0.22523393 |Val accuracy:  0.69600004 Test accuracy: 0.755
Epoch 11 |Training loss:  0.19419615 |Val accuracy:  0.702 Test accuracy: 0.75200003
Epoch 12 |Training loss:  0.1653759 |Val accuracy:  0.706 Test accuracy: 0.751
Epoch 13 |Training loss:  0.13913079 |Val accuracy:  0.71199995 Test accuracy: 0.75
Epoch 

In [None]:
train_cora(features, tf.eye(adj.shape[0]), gnn, 32, 200, 0.01) # low accuracy because graph structure is not specified

Epoch 0 |Training loss:  1.957834 |Val accuracy:  0.21199998 Test accuracy: 0.212
Epoch 1 |Training loss:  1.6890169 |Val accuracy:  0.3 Test accuracy: 0.271
Epoch 2 |Training loss:  1.4644334 |Val accuracy:  0.358 Test accuracy: 0.313
Epoch 3 |Training loss:  1.2283909 |Val accuracy:  0.36799997 Test accuracy: 0.342
Epoch 4 |Training loss:  0.99268365 |Val accuracy:  0.39799997 Test accuracy: 0.36900002
Epoch 5 |Training loss:  0.7763204 |Val accuracy:  0.406 Test accuracy: 0.377
Epoch 7 |Training loss:  0.44625774 |Val accuracy:  0.42199996 Test accuracy: 0.409
Epoch 8 |Training loss:  0.3332327 |Val accuracy:  0.436 Test accuracy: 0.42299995
Epoch 9 |Training loss:  0.2482627 |Val accuracy:  0.444 Test accuracy: 0.433
Epoch 10 |Training loss:  0.18484962 |Val accuracy:  0.454 Test accuracy: 0.44799998
Epoch 11 |Training loss:  0.13816255 |Val accuracy:  0.464 Test accuracy: 0.45299995
Epoch 12 |Training loss:  0.1042427 |Val accuracy:  0.47 Test accuracy: 0.46199998
Epoch 13 |Traini

In [29]:
deg = tf.reduce_sum(adj, axis=-1)
train_cora(features, adj/ deg, gnn, 32, 200, 0.01)

Epoch 0 |Training loss:  1.9526887 |Val accuracy:  0.46199998 Test accuracy: 0.48999998
Epoch 1 |Training loss:  1.765854 |Val accuracy:  0.612 Test accuracy: 0.61
Epoch 2 |Training loss:  1.5531013 |Val accuracy:  0.648 Test accuracy: 0.652
Epoch 3 |Training loss:  1.3126489 |Val accuracy:  0.69600004 Test accuracy: 0.721
Epoch 4 |Training loss:  1.0788159 |Val accuracy:  0.742 Test accuracy: 0.76399994
Epoch 5 |Training loss:  0.8729963 |Val accuracy:  0.76000005 Test accuracy: 0.7849999
Epoch 6 |Training loss:  0.69975173 |Val accuracy:  0.77400005 Test accuracy: 0.7999998
Epoch 7 |Training loss:  0.5558219 |Val accuracy:  0.782 Test accuracy: 0.8079998
Epoch 8 |Training loss:  0.4379415 |Val accuracy:  0.79199994 Test accuracy: 0.8099998


In [30]:
norm_deg = tf.linalg.diag(1.0 / tf.sqrt(deg))
norm_adj = tf.matmul(norm_deg, tf.matmul(adj, norm_deg))
train_cora(features, norm_adj, gnn, 32, 200, 0.01)

Epoch 0 |Training loss:  1.9492472 |Val accuracy:  0.41799995 Test accuracy: 0.42699996
Epoch 1 |Training loss:  1.763634 |Val accuracy:  0.636 Test accuracy: 0.656
Epoch 2 |Training loss:  1.555357 |Val accuracy:  0.71 Test accuracy: 0.73999995
Epoch 3 |Training loss:  1.3243644 |Val accuracy:  0.746 Test accuracy: 0.77399987
Epoch 4 |Training loss:  1.1024923 |Val accuracy:  0.75799996 Test accuracy: 0.78299993
Epoch 5 |Training loss:  0.90055156 |Val accuracy:  0.778 Test accuracy: 0.79899985
Epoch 6 |Training loss:  0.7220654 |Val accuracy:  0.78999996 Test accuracy: 0.8099998
Epoch 7 |Training loss:  0.5695971 |Val accuracy:  0.79199994 Test accuracy: 0.8099998
