In [1]:
import os
import sys
import time
import numpy as np
import scipy.sparse as sp
from keras.layers import Input, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2

Using TensorFlow backend.


In [2]:
# the path of execution
EXE_PATH = os.path.abspath(os.path.curdir)
# the path of the vendor files
VENDOR_PATH = os.path.join(EXE_PATH, 'vendor')
# the vendors to include in the system path
VENDORS = ['keras-gcn']
# create the absolute paths for all vendors
VENDORS = list(map(lambda x: os.path.join(VENDOR_PATH, x), VENDORS))
# update the Python path to include necessary vendor module
sys.path += VENDORS

In [3]:
from kegra.layers.graph import GraphConvolution
from kegra.utils import load_data, preprocess_adj, get_splits, evaluate_preds
from ggcn import GaussianGraphConvolution

# Data

In [4]:
X, A, y = load_data('data/cora/', dataset='cora')
X /= X.sum(1).reshape(-1, 1)
A = preprocess_adj(A)
y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits(y)

Loading cora dataset...
Dataset has 2708 nodes, 5429 edges, 1433 features.


In [5]:
X

matrix([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [6]:
A

<2708x2708 sparse matrix of type '<class 'numpy.float64'>'
	with 13264 stored elements in Compressed Sparse Row format>

In [7]:
y

array([[0, 0, 0, ..., 0, 1, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0]], dtype=int32)

In [8]:
support = 1
graph = [X, A]
G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True)]

W1110 15:29:28.199064 140340509591360 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W1110 15:29:28.206780 140340509591360 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:515: The name tf.sparse_placeholder is deprecated. Please use tf.compat.v1.sparse_placeholder instead.



In [9]:
X_in = Input(shape=(X.shape[1],))

W1110 15:29:28.213454 140340509591360 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.



# GCN (baseline)

In [10]:
# Define model architecture
# NOTE: We pass arguments for graph convolutional layers as a list of tensors.
# This is somewhat hacky, more elegant options would require rewriting the Layer base class.
H = Dropout(0.5)(X_in)
H = GraphConvolution(16, support, activation='relu', kernel_regularizer=l2(5e-4))([H]+G)
H = Dropout(0.5)(H)
Y = GraphConvolution(y.shape[1], support, activation='softmax')([H]+G)

W1110 15:29:28.221709 140340509591360 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W1110 15:29:28.227010 140340509591360 deprecation.py:506] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W1110 15:29:28.235213 140340509591360 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.



In [11]:
model = Model(inputs=[X_in]+G, outputs=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

W1110 15:29:28.279044 140340509591360 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



In [12]:
NB_EPOCH = 200
PATIENCE = 10  # early stopping patience
# Helper variables for main training loop
wait = 0
preds = None
best_val_loss = 99999

# Fit
for epoch in range(1, NB_EPOCH+1):

    # Log wall-clock time
    t = time.time()

    # Single training iteration (we mask nodes without labels for loss calculation)
    model.fit(graph, y_train, sample_weight=train_mask,
              batch_size=A.shape[0], epochs=1, shuffle=False, verbose=0)

    # Predict on full dataset
    preds = model.predict(graph, batch_size=A.shape[0])

    # Train / validation scores
    train_val_loss, train_val_acc = evaluate_preds(preds, [y_train, y_val],
                                                   [idx_train, idx_val])
    print("Epoch: {:04d}".format(epoch),
          "train_loss= {:.4f}".format(train_val_loss[0]),
          "train_acc= {:.4f}".format(train_val_acc[0]),
          "val_loss= {:.4f}".format(train_val_loss[1]),
          "val_acc= {:.4f}".format(train_val_acc[1]),
          "time= {:.4f}".format(time.time() - t))

    # Early stopping
    if train_val_loss[1] < best_val_loss:
        best_val_loss = train_val_loss[1]
        wait = 0
    else:
        if wait >= PATIENCE:
            print('Epoch {}: early stopping'.format(epoch))
            break
        wait += 1

W1110 15:29:28.351851 140340509591360 deprecation.py:323] From /home/bitcommander/.local/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch: 0001 train_loss= 1.9349 train_acc= 0.2929 val_loss= 1.9348 val_acc= 0.3500 time= 1.0828
Epoch: 0002 train_loss= 1.9233 train_acc= 0.2929 val_loss= 1.9237 val_acc= 0.3500 time= 0.0226
Epoch: 0003 train_loss= 1.9112 train_acc= 0.2929 val_loss= 1.9121 val_acc= 0.3500 time= 0.0180
Epoch: 0004 train_loss= 1.8980 train_acc= 0.2929 val_loss= 1.8996 val_acc= 0.3500 time= 0.0230
Epoch: 0005 train_loss= 1.8844 train_acc= 0.2929 val_loss= 1.8870 val_acc= 0.3500 time= 0.0227
Epoch: 0006 train_loss= 1.8707 train_acc= 0.2929 val_loss= 1.8742 val_acc= 0.3500 time= 0.0236
Epoch: 0007 train_loss= 1.8567 train_acc= 0.2929 val_loss= 1.8614 val_acc= 0.3500 time= 0.0190
Epoch: 0008 train_loss= 1.8428 train_acc= 0.2929 val_loss= 1.8485 val_acc= 0.3500 time= 0.0227
Epoch: 0009 train_loss= 1.8289 train_acc= 0.2929 val_loss= 1.8360 val_acc= 0.3500 time= 0.0222
Epoch: 0010 train_loss= 1.8152 train_acc= 0.2929 val_loss= 1.8238 val_acc= 0.3500 time= 0.0250
Epoch: 0011 train_loss= 1.8020 train_acc= 0.2929 v

Epoch: 0088 train_loss= 0.9733 train_acc= 0.8357 val_loss= 1.2206 val_acc= 0.6933 time= 0.0189
Epoch: 0089 train_loss= 0.9648 train_acc= 0.8429 val_loss= 1.2138 val_acc= 0.7033 time= 0.0212
Epoch: 0090 train_loss= 0.9567 train_acc= 0.8500 val_loss= 1.2076 val_acc= 0.7100 time= 0.0226
Epoch: 0091 train_loss= 0.9492 train_acc= 0.8500 val_loss= 1.2014 val_acc= 0.7167 time= 0.0244
Epoch: 0092 train_loss= 0.9420 train_acc= 0.8500 val_loss= 1.1949 val_acc= 0.7267 time= 0.0249
Epoch: 0093 train_loss= 0.9346 train_acc= 0.8500 val_loss= 1.1885 val_acc= 0.7267 time= 0.0182
Epoch: 0094 train_loss= 0.9267 train_acc= 0.8571 val_loss= 1.1818 val_acc= 0.7333 time= 0.0228
Epoch: 0095 train_loss= 0.9186 train_acc= 0.8643 val_loss= 1.1752 val_acc= 0.7333 time= 0.0185
Epoch: 0096 train_loss= 0.9102 train_acc= 0.8643 val_loss= 1.1690 val_acc= 0.7367 time= 0.0183
Epoch: 0097 train_loss= 0.9018 train_acc= 0.8643 val_loss= 1.1632 val_acc= 0.7500 time= 0.0207
Epoch: 0098 train_loss= 0.8937 train_acc= 0.8714 v

Epoch: 0183 train_loss= 0.5100 train_acc= 0.9429 val_loss= 0.8622 val_acc= 0.7967 time= 0.0230
Epoch: 0184 train_loss= 0.5069 train_acc= 0.9429 val_loss= 0.8596 val_acc= 0.7967 time= 0.0231
Epoch: 0185 train_loss= 0.5043 train_acc= 0.9500 val_loss= 0.8560 val_acc= 0.7967 time= 0.0182
Epoch: 0186 train_loss= 0.5017 train_acc= 0.9571 val_loss= 0.8515 val_acc= 0.7933 time= 0.0184
Epoch: 0187 train_loss= 0.4996 train_acc= 0.9571 val_loss= 0.8474 val_acc= 0.7967 time= 0.0182
Epoch: 0188 train_loss= 0.4978 train_acc= 0.9571 val_loss= 0.8444 val_acc= 0.7900 time= 0.0180
Epoch: 0189 train_loss= 0.4956 train_acc= 0.9571 val_loss= 0.8421 val_acc= 0.7900 time= 0.0180
Epoch: 0190 train_loss= 0.4931 train_acc= 0.9571 val_loss= 0.8405 val_acc= 0.7967 time= 0.0178
Epoch: 0191 train_loss= 0.4903 train_acc= 0.9571 val_loss= 0.8404 val_acc= 0.8033 time= 0.0181
Epoch: 0192 train_loss= 0.4884 train_acc= 0.9571 val_loss= 0.8419 val_acc= 0.8167 time= 0.0194
Epoch: 0193 train_loss= 0.4865 train_acc= 0.9571 v

In [13]:
test_loss, test_acc = evaluate_preds(preds, [y_test], [idx_test])
print(f"""
loss = {test_loss[0]:.4f}
accuracy = {test_acc[0]:.4f}
""".strip())

loss = 0.8801
accuracy = 0.7990


# Gaussian Graph Convolution

In [15]:
H = Dropout(0.5)(X_in)
H = GaussianGraphConvolution(16, A.shape[0], True, activation='relu', kernel_regularizer=l2(5e-4))([H]+G)
H = Dropout(0.5)(H)
Y = GaussianGraphConvolution(y.shape[1], A.shape[0], False, activation='softmax')([H]+G)

In [16]:
model = Model(inputs=[X_in]+G, outputs=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

In [17]:
NB_EPOCH = 200
PATIENCE = 10  # early stopping patience
# Helper variables for main training loop
wait = 0
preds = None
best_val_loss = 99999

# Fit
for epoch in range(1, NB_EPOCH+1):

    # Log wall-clock time
    t = time.time()

    # Single training iteration (we mask nodes without labels for loss calculation)
    model.fit(graph, y_train, sample_weight=train_mask,
              batch_size=A.shape[0], epochs=1, shuffle=False, verbose=0)

    # Predict on full dataset
    preds = model.predict(graph, batch_size=A.shape[0])

    # Train / validation scores
    train_val_loss, train_val_acc = evaluate_preds(preds, [y_train, y_val],
                                                   [idx_train, idx_val])
    print("Epoch: {:04d}".format(epoch),
          "train_loss= {:.4f}".format(train_val_loss[0]),
          "train_acc= {:.4f}".format(train_val_acc[0]),
          "val_loss= {:.4f}".format(train_val_loss[1]),
          "val_acc= {:.4f}".format(train_val_acc[1]),
          "time= {:.4f}".format(time.time() - t))

    # Early stopping
    if train_val_loss[1] < best_val_loss:
        best_val_loss = train_val_loss[1]
        wait = 0
    else:
        if wait >= PATIENCE:
            print('Epoch {}: early stopping'.format(epoch))
            break
        wait += 1

Epoch: 0001 train_loss= 1.9402 train_acc= 0.2857 val_loss= 1.9424 val_acc= 0.1733 time= 0.3353
Epoch: 0002 train_loss= 1.9337 train_acc= 0.4000 val_loss= 1.9376 val_acc= 0.2567 time= 0.0192
Epoch: 0003 train_loss= 1.9261 train_acc= 0.4643 val_loss= 1.9318 val_acc= 0.3333 time= 0.0184
Epoch: 0004 train_loss= 1.9175 train_acc= 0.4786 val_loss= 1.9253 val_acc= 0.3733 time= 0.0236
Epoch: 0005 train_loss= 1.9080 train_acc= 0.4786 val_loss= 1.9182 val_acc= 0.3867 time= 0.0227
Epoch: 0006 train_loss= 1.8978 train_acc= 0.4714 val_loss= 1.9106 val_acc= 0.3767 time= 0.0238
Epoch: 0007 train_loss= 1.8869 train_acc= 0.4714 val_loss= 1.9025 val_acc= 0.3767 time= 0.0185
Epoch: 0008 train_loss= 1.8755 train_acc= 0.4714 val_loss= 1.8941 val_acc= 0.3800 time= 0.0236
Epoch: 0009 train_loss= 1.8637 train_acc= 0.4857 val_loss= 1.8854 val_acc= 0.3767 time= 0.0188
Epoch: 0010 train_loss= 1.8515 train_acc= 0.4857 val_loss= 1.8765 val_acc= 0.3733 time= 0.0184
Epoch: 0011 train_loss= 1.8389 train_acc= 0.4857 v

Epoch: 0095 train_loss= 0.9048 train_acc= 0.8786 val_loss= 1.1521 val_acc= 0.7733 time= 0.0198
Epoch: 0096 train_loss= 0.8973 train_acc= 0.8786 val_loss= 1.1460 val_acc= 0.7733 time= 0.0226
Epoch: 0097 train_loss= 0.8898 train_acc= 0.8786 val_loss= 1.1396 val_acc= 0.7767 time= 0.0199
Epoch: 0098 train_loss= 0.8824 train_acc= 0.8786 val_loss= 1.1333 val_acc= 0.7767 time= 0.0229
Epoch: 0099 train_loss= 0.8751 train_acc= 0.8857 val_loss= 1.1272 val_acc= 0.7767 time= 0.0184
Epoch: 0100 train_loss= 0.8678 train_acc= 0.8857 val_loss= 1.1212 val_acc= 0.7767 time= 0.0195
Epoch: 0101 train_loss= 0.8605 train_acc= 0.8857 val_loss= 1.1155 val_acc= 0.7800 time= 0.0227
Epoch: 0102 train_loss= 0.8532 train_acc= 0.8786 val_loss= 1.1098 val_acc= 0.7800 time= 0.0189
Epoch: 0103 train_loss= 0.8461 train_acc= 0.8786 val_loss= 1.1041 val_acc= 0.7800 time= 0.0191
Epoch: 0104 train_loss= 0.8390 train_acc= 0.8786 val_loss= 1.0984 val_acc= 0.7800 time= 0.0242
Epoch: 0105 train_loss= 0.8321 train_acc= 0.8786 v

Epoch: 0183 train_loss= 0.5010 train_acc= 0.9357 val_loss= 0.8416 val_acc= 0.7800 time= 0.0231
Epoch: 0184 train_loss= 0.4987 train_acc= 0.9357 val_loss= 0.8404 val_acc= 0.7800 time= 0.0194
Epoch: 0185 train_loss= 0.4963 train_acc= 0.9357 val_loss= 0.8387 val_acc= 0.7800 time= 0.0200
Epoch: 0186 train_loss= 0.4938 train_acc= 0.9357 val_loss= 0.8366 val_acc= 0.7800 time= 0.0280
Epoch: 0187 train_loss= 0.4912 train_acc= 0.9357 val_loss= 0.8341 val_acc= 0.7833 time= 0.0193
Epoch: 0188 train_loss= 0.4886 train_acc= 0.9429 val_loss= 0.8315 val_acc= 0.7833 time= 0.0188
Epoch: 0189 train_loss= 0.4861 train_acc= 0.9429 val_loss= 0.8291 val_acc= 0.7867 time= 0.0185
Epoch: 0190 train_loss= 0.4836 train_acc= 0.9429 val_loss= 0.8264 val_acc= 0.7867 time= 0.0189
Epoch: 0191 train_loss= 0.4812 train_acc= 0.9429 val_loss= 0.8235 val_acc= 0.7867 time= 0.0246
Epoch: 0192 train_loss= 0.4788 train_acc= 0.9429 val_loss= 0.8211 val_acc= 0.7867 time= 0.0226
Epoch: 0193 train_loss= 0.4766 train_acc= 0.9429 v

In [18]:
test_loss, test_acc = evaluate_preds(preds, [y_test], [idx_test])
print(f"""
loss = {test_loss[0]:.4f}
accuracy = {test_acc[0]:.4f}
""".strip())

loss = 0.8607
accuracy = 0.7970


# Attacks