#### Import all the necessary packages for training the graph convolution network, GCN. 

In [1]:
from __future__ import print_function

from keras.layers import Input, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2

from kegra.layers.graph import GraphConvolution
import utils

import time

Using TensorFlow backend.


#### Define parameters

In [2]:
DATASET = 'cora'
FILTER = 'localpool'  # 'chebyshev'
MAX_DEGREE = 2  # maximum polynomial degree
SYM_NORM = True  # symmetric (True) vs. left-only (False) normalization
NB_EPOCH = 200
PATIENCE = 10  # early stopping patience

#### Load data using function from utils

In [3]:
X, A, y = utils.load_data(dataset=DATASET)

Loading cora dataset...
Dataset has 2708 nodes, 5429 edges, 1433 features.


#### Split dataset into train, validation, and test batches. The focus here is to split only the labels, not X or A itself. We can sample the training data in X and A by train_mask.  

In [4]:
y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = utils.get_splits(y)

#### Lets verify the dataset we loaded

In [17]:
print("X dim={}".format(X.shape))
print("A dim={}".format(A.shape))
print("y dim={}".format(y.shape))

print("y_train dim={}".format(y_train.shape))
print("y_val dim={}".format(y_val.shape))
print("y_test dim={}".format(y_test.shape))

tmp_sum = 0
for i in range(train_mask.shape[0]):
    if train_mask[i] == True:
        tmp_sum+=1

print(tmp_sum)
print(train_mask)

X dim=(2708, 1433)
A dim=(2708, 2708)
y dim=(2708, 7)
y_train dim=(2708, 7)
y_val dim=(2708, 7)
y_test dim=(2708, 7)
80
[ True  True  True ... False False False]


#### Normalize X so that each one-hot embedded paper has $||x_i|| = 1$. 

In [18]:
# Normalize X
from numpy import linalg as LA
for i in range(X.shape[0]):
    X[i] /= LA.norm(X[i])

#### Take a peek at the 100-th one-hot embedded paper after performing normalization

In [19]:
utils.look_sparse_matrix(X, 100)

62:0.19999997317790985
99:0.19999997317790985
132:0.19999997317790985
142:0.19999997317790985
292:0.19999997317790985
402:0.19999997317790985
462:0.19999997317790985
495:0.19999997317790985
507:0.19999997317790985
575:0.19999997317790985
648:0.19999997317790985
675:0.19999997317790985
724:0.19999997317790985
733:0.19999997317790985
778:0.19999997317790985
779:0.19999997317790985
821:0.19999997317790985
1071:0.19999997317790985
1097:0.19999997317790985
1151:0.19999997317790985
1230:0.19999997317790985
1331:0.19999997317790985
1334:0.19999997317790985
1348:0.19999997317790985
1422:0.19999997317790985
tmp_sum=[[0.9999994]]==1


In [20]:
if FILTER == 'localpool':
    """ Local pooling filters (see 'renormalization trick' in Kipf & Welling, arXiv 2016) """
    print('Using local pooling filters...')
    A_hat = utils.preprocess_adj(A, SYM_NORM)
    support = 1
    graph = [X, A_hat]
elif FILTER == 'chebyshev':
    """ Chebyshev polynomial basis filters (Defferard et al., NIPS 2016)  """
    print('Using Chebyshev polynomial basis filters...')
    L = normalized_laplacian(A, SYM_NORM)
    L_scaled = rescale_laplacian(L)
    T_k = chebyshev_polynomial(L_scaled, MAX_DEGREE)
    support = MAX_DEGREE + 1
    graph = [X]+T_k
    
else:
    raise Exception('Invalid filter type.')

Using local pooling filters...


In [12]:
#Define input tensor
A_hat_in = Input(shape=A_hat.shape, batch_shape=(None, None), sparse=True)
X_in = Input(shape=(X.shape[1],))

# Define model architecture
# NOTE: We pass arguments for graph convolutional layers as a list of tensors.
# This is somewhat hacky, more elegant options would require rewriting the Layer base class.
H_1 = Dropout(0.5)(X_in)
H_2 = GraphConvolution(16, support, activation='relu', kernel_regularizer=l2(5e-4))([H_1]+[A_hat_in])
H_2 = Dropout(0.5)(H_2)
Y_out = GraphConvolution(y.shape[1], support, activation='softmax')([H_2]+[A_hat_in]) #H_{l+1} = f_act(H_l, A)

# Compile model
model = Model(inputs=[X_in]+[A_hat_in], outputs=Y_out)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


### Reminders
#### line 4: $\hat{A} = \tilde{D}^{-\frac{1}{2}} \tilde{A} \tilde{D}^{\frac{1}{2}}$, where $\tilde{A} = A + I_N$, and $\tilde{D}_{ii} = \sum_j \tilde{A}_{ij}$.

#### Lets take a look at the model

In [13]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 1433)         0                                            
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 1433)         0           input_2[0][0]                    
__________________________________________________________________________________________________
input_1 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
graph_convolution_1 (GraphConvo (None, 16)           22944       dropout_1[0][0]                  
                                                                 input_1[0][0]                    
__________

In [21]:
# Helper variables for main training loop
wait = 0
preds = None
best_val_loss = 99999

In [22]:
# Fit
for epoch in range(1, NB_EPOCH+1):

    # Log wall-clock time
    t = time.time()

    # Single training iteration (we mask nodes without labels for loss calculation)
    model.fit(graph, y_train, sample_weight=train_mask,
              batch_size=A.shape[0], epochs=1, shuffle=False, verbose=0)

    # Predict on full dataset
    preds = model.predict(graph, batch_size=A.shape[0])
    
    # Train / validation scores
    train_val_loss, train_val_acc = utils.evaluate_preds(preds, [y_train, y_val],
                                                   [idx_train, idx_val])
    print("Epoch: {:04d}".format(epoch),
          "train_loss= {:.4f}".format(train_val_loss[0]),
          "train_acc= {:.4f}".format(train_val_acc[0]),
          "val_loss= {:.4f}".format(train_val_loss[1]),
          "val_acc= {:.4f}".format(train_val_acc[1]),
          "time= {:.4f}".format(time.time() - t))

    # Early stopping
    if train_val_loss[1] < best_val_loss:
        best_val_loss = train_val_loss[1]
        wait = 0
    else:
        if wait >= PATIENCE:
            print('Epoch {}: early stopping'.format(epoch))
            break
        wait += 1

# Testing
test_loss, test_acc = utils.evaluate_preds(preds, [y_test], [idx_test])
print("Test set results:",
      "loss= {:.4f}".format(test_loss[0]),
      "accuracy= {:.4f}".format(test_acc[0]))

Epoch: 0001 train_loss= 0.0846 train_acc= 1.0000 val_loss= 0.7659 val_acc= 0.7905 time= 0.0702
Epoch: 0002 train_loss= 0.0837 train_acc= 1.0000 val_loss= 0.7634 val_acc= 0.7905 time= 0.0773
Epoch: 0003 train_loss= 0.0828 train_acc= 1.0000 val_loss= 0.7599 val_acc= 0.7881 time= 0.0814
Epoch: 0004 train_loss= 0.0820 train_acc= 1.0000 val_loss= 0.7551 val_acc= 0.7881 time= 0.0775
Epoch: 0005 train_loss= 0.0810 train_acc= 1.0000 val_loss= 0.7509 val_acc= 0.7881 time= 0.0718
Epoch: 0006 train_loss= 0.0800 train_acc= 1.0000 val_loss= 0.7485 val_acc= 0.7881 time= 0.0578
Epoch: 0007 train_loss= 0.0791 train_acc= 1.0000 val_loss= 0.7464 val_acc= 0.7857 time= 0.0711
Epoch: 0008 train_loss= 0.0781 train_acc= 1.0000 val_loss= 0.7452 val_acc= 0.7881 time= 0.0700
Epoch: 0009 train_loss= 0.0774 train_acc= 1.0000 val_loss= 0.7444 val_acc= 0.7881 time= 0.0832
Epoch: 0010 train_loss= 0.0766 train_acc= 1.0000 val_loss= 0.7444 val_acc= 0.7881 time= 0.0731
Epoch: 0011 train_loss= 0.0759 train_acc= 1.0000 v