In [1]:
# import dependencies
import spektral
import tensorflow as tf
from spektral.layers import GraphConv
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dropout

In [2]:
# load cora data set
cora_A, cora_X, cora_L, cora_train_mask, cora_val_mask, cora_test_mask = spektral.datasets.citation.load_data(dataset_name='cora',
                                                                                                              normalize_features=True, 
                                                                                                              random_split=False)

Loading cora dataset
Pre-processing node features


cora_A - adjacency matrix

cora_X - feature matrix (doc by term)

cora_L - label

In [3]:
# define number of nodes, features and classes
num_cora_nodes = cora_A.shape[0]
num_cora_features = cora_X.shape[1]
num_cora_classes = cora_L.shape[1]

In [4]:
# print out attributes
print('shape of cora Adjacency Matrix: {} x {}'.format(num_cora_nodes, num_cora_nodes))
print('number of cora features (number of termrs): ', num_cora_features)
print('number of cora classes: ', num_cora_classes)

shape of cora Adjacency Matrix: 2708 x 2708
number of cora features (number of termrs):  1433
number of cora classes:  7


In [5]:
# check the distribution of each class for balance
cora_L.sum(axis = 0)

array([351, 217, 418, 818, 426, 298, 180])

In [6]:
# define feature and adjacency input to the first GCN layer
X_in = Input(shape = (num_cora_features, ))
A_in = Input(shape = (num_cora_nodes, ), sparse = True)
# construct 3 layers of GCN, features beuing reduced to
# 64, 32 and 7 (number of classes)
# use drop out of 0.5 to minimize overfitting
X_1 = GraphConv(64, 'relu',)([X_in, A_in])
X_1 = Dropout(0.5)(X_1)
X_2 = GraphConv(32, 'relu')([X_1, A_in])
X_2 = Dropout(0.5)(X_2)
X_3 = GraphConv(num_cora_classes, 'softmax')([X_2, A_in])
# use keras functional API to construct a GCN model
cora_model = Model(inputs = [X_in, A_in], outputs = X_3, name = 'cora_GCN_model')

In [7]:
# preprocess adjacency matrix to add self-loops and scale edge weights
cora_A = GraphConv.preprocess(cora_A).astype('f4')

In [8]:
# compile model
cora_model.compile(optimizer = 'adam',
             loss = 'categorical_crossentropy',
             weighted_metrics = ['acc', tf.keras.metrics.AUC()])
cora_model.summary()

Model: "cora_GCN_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1433)]       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 2708)]       0                                            
__________________________________________________________________________________________________
graph_conv (GraphConv)          (None, 64)           91776       input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
dropout (Dropout)               (None, 64)           0           graph_conv[0][0]    

In [9]:
# define number of epochs
cora_epochs = 100
# Prepare data
cora_X = cora_X.toarray()

In [10]:
# prepare val data
cora_val_data = ([cora_X, cora_A], cora_L, cora_val_mask)

# Train model
cora_model.fit([cora_X, cora_A], cora_L,
               sample_weight = cora_train_mask,
               validation_data = cora_val_data,
               epochs = cora_epochs,
               batch_size = num_cora_nodes,
               shuffle = False)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x132e0f150>

In [11]:
# Evaluate model
eval_results = cora_model.evaluate([cora_X, cora_A], 
                                   cora_L,
                                   sample_weight = cora_test_mask,
                                   batch_size = num_cora_nodes)
print('Done.\n'
      'Test loss: {}\n'
      'Test accuracy: {}'.format(*eval_results))

Done.
Test loss: 0.5618841052055359
Test accuracy: 0.7459999918937683
