In [22]:
import os
import sys
import time
import numpy as np
import scipy.sparse as sp
from keras.layers import Input, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2

In [2]:
# the path of execution
EXE_PATH = os.path.abspath(os.path.curdir)
# the path of the vendor files
VENDOR_PATH = os.path.join(EXE_PATH, 'vendor')
# the vendors to include in the system path
VENDORS = ['gcn', 'keras-gcn']
# create the absolute paths for all vendors
VENDORS = list(map(lambda x: os.path.join(VENDOR_PATH, x), VENDORS))
# update the Python path to include necessary vendor module
sys.path += VENDORS

# Data

In [24]:
from kegra.utils import load_data, preprocess_adj, get_splits

In [10]:
X, A, y = load_data('data/cora/', dataset='cora')
X /= X.sum(1).reshape(-1, 1)
A = preprocess_adj(A)
y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits(y)

Loading cora dataset...
Dataset has 2708 nodes, 5429 edges, 1433 features.


In [11]:
X

matrix([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [12]:
A

<2708x2708 sparse matrix of type '<class 'numpy.float64'>'
	with 13264 stored elements in Compressed Sparse Row format>

In [13]:
y

array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0]], dtype=int32)

In [15]:
support = 1
graph = [X, A]
G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True)]

W1110 13:22:38.579905 140016016086848 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W1110 13:22:38.587121 140016016086848 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:515: The name tf.sparse_placeholder is deprecated. Please use tf.compat.v1.sparse_placeholder instead.



In [16]:
X_in = Input(shape=(X.shape[1],))

W1110 13:22:50.261755 140016016086848 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.



# GCN (baseline)

In [25]:
from kegra.layers.graph import GraphConvolution
from kegra.utils import evaluate_preds

In [18]:
# Define model architecture
# NOTE: We pass arguments for graph convolutional layers as a list of tensors.
# This is somewhat hacky, more elegant options would require rewriting the Layer base class.
H = Dropout(0.5)(X_in)
H = GraphConvolution(16, support, activation='relu', kernel_regularizer=l2(5e-4))([H]+G)
H = Dropout(0.5)(H)
Y = GraphConvolution(y.shape[1], support, activation='softmax')([H]+G)

W1110 13:23:14.492358 140016016086848 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W1110 13:23:14.505705 140016016086848 deprecation.py:506] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W1110 13:23:14.515266 140016016086848 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.



In [19]:
model = Model(inputs=[X_in]+G, outputs=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

W1110 13:23:15.699908 140016016086848 deprecation_wrapper.py:119] From /home/bitcommander/.local/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



In [26]:
NB_EPOCH = 200
PATIENCE = 10  # early stopping patience
# Helper variables for main training loop
wait = 0
preds = None
best_val_loss = 99999

# Fit
for epoch in range(1, NB_EPOCH+1):

    # Log wall-clock time
    t = time.time()

    # Single training iteration (we mask nodes without labels for loss calculation)
    model.fit(graph, y_train, sample_weight=train_mask,
              batch_size=A.shape[0], epochs=1, shuffle=False, verbose=0)

    # Predict on full dataset
    preds = model.predict(graph, batch_size=A.shape[0])

    # Train / validation scores
    train_val_loss, train_val_acc = evaluate_preds(preds, [y_train, y_val],
                                                   [idx_train, idx_val])
    print("Epoch: {:04d}".format(epoch),
          "train_loss= {:.4f}".format(train_val_loss[0]),
          "train_acc= {:.4f}".format(train_val_acc[0]),
          "val_loss= {:.4f}".format(train_val_loss[1]),
          "val_acc= {:.4f}".format(train_val_acc[1]),
          "time= {:.4f}".format(time.time() - t))

    # Early stopping
    if train_val_loss[1] < best_val_loss:
        best_val_loss = train_val_loss[1]
        wait = 0
    else:
        if wait >= PATIENCE:
            print('Epoch {}: early stopping'.format(epoch))
            break
        wait += 1

Epoch: 0001 train_loss= 1.9193 train_acc= 0.2929 val_loss= 1.9219 val_acc= 0.3500 time= 0.0390
Epoch: 0002 train_loss= 1.9044 train_acc= 0.2929 val_loss= 1.9089 val_acc= 0.3500 time= 0.0194
Epoch: 0003 train_loss= 1.8894 train_acc= 0.2929 val_loss= 1.8958 val_acc= 0.3500 time= 0.0232
Epoch: 0004 train_loss= 1.8742 train_acc= 0.2929 val_loss= 1.8825 val_acc= 0.3500 time= 0.0182
Epoch: 0005 train_loss= 1.8591 train_acc= 0.2929 val_loss= 1.8693 val_acc= 0.3500 time= 0.0234
Epoch: 0006 train_loss= 1.8441 train_acc= 0.2929 val_loss= 1.8562 val_acc= 0.3500 time= 0.0200
Epoch: 0007 train_loss= 1.8294 train_acc= 0.2929 val_loss= 1.8435 val_acc= 0.3500 time= 0.0195
Epoch: 0008 train_loss= 1.8153 train_acc= 0.2929 val_loss= 1.8317 val_acc= 0.3500 time= 0.0233
Epoch: 0009 train_loss= 1.8019 train_acc= 0.2929 val_loss= 1.8205 val_acc= 0.3500 time= 0.0199
Epoch: 0010 train_loss= 1.7891 train_acc= 0.2929 val_loss= 1.8101 val_acc= 0.3500 time= 0.0215
Epoch: 0011 train_loss= 1.7770 train_acc= 0.2929 v

Epoch: 0090 train_loss= 1.0024 train_acc= 0.7929 val_loss= 1.2320 val_acc= 0.7067 time= 0.0186
Epoch: 0091 train_loss= 0.9956 train_acc= 0.7929 val_loss= 1.2257 val_acc= 0.7000 time= 0.0186
Epoch: 0092 train_loss= 0.9889 train_acc= 0.7929 val_loss= 1.2200 val_acc= 0.6967 time= 0.0177
Epoch: 0093 train_loss= 0.9825 train_acc= 0.7929 val_loss= 1.2145 val_acc= 0.6967 time= 0.0191
Epoch: 0094 train_loss= 0.9756 train_acc= 0.7929 val_loss= 1.2094 val_acc= 0.7000 time= 0.0179
Epoch: 0095 train_loss= 0.9684 train_acc= 0.7929 val_loss= 1.2042 val_acc= 0.7000 time= 0.0180
Epoch: 0096 train_loss= 0.9610 train_acc= 0.7929 val_loss= 1.1995 val_acc= 0.7067 time= 0.0180
Epoch: 0097 train_loss= 0.9535 train_acc= 0.8000 val_loss= 1.1949 val_acc= 0.7100 time= 0.0176
Epoch: 0098 train_loss= 0.9462 train_acc= 0.8071 val_loss= 1.1907 val_acc= 0.7100 time= 0.0185
Epoch: 0099 train_loss= 0.9392 train_acc= 0.8143 val_loss= 1.1863 val_acc= 0.7067 time= 0.0182
Epoch: 0100 train_loss= 0.9326 train_acc= 0.8214 v

Epoch: 0178 train_loss= 0.5709 train_acc= 0.9214 val_loss= 0.8960 val_acc= 0.8100 time= 0.0207
Epoch: 0179 train_loss= 0.5689 train_acc= 0.9214 val_loss= 0.8925 val_acc= 0.8100 time= 0.0245
Epoch: 0180 train_loss= 0.5663 train_acc= 0.9286 val_loss= 0.8899 val_acc= 0.8133 time= 0.0188
Epoch: 0181 train_loss= 0.5639 train_acc= 0.9286 val_loss= 0.8873 val_acc= 0.8167 time= 0.0202
Epoch: 0182 train_loss= 0.5613 train_acc= 0.9214 val_loss= 0.8851 val_acc= 0.8167 time= 0.0220
Epoch: 0183 train_loss= 0.5581 train_acc= 0.9214 val_loss= 0.8836 val_acc= 0.8133 time= 0.0196
Epoch: 0184 train_loss= 0.5550 train_acc= 0.9357 val_loss= 0.8828 val_acc= 0.8100 time= 0.0190
Epoch: 0185 train_loss= 0.5522 train_acc= 0.9357 val_loss= 0.8833 val_acc= 0.8133 time= 0.0197
Epoch: 0186 train_loss= 0.5503 train_acc= 0.9429 val_loss= 0.8842 val_acc= 0.8133 time= 0.0189
Epoch: 0187 train_loss= 0.5489 train_acc= 0.9429 val_loss= 0.8852 val_acc= 0.8100 time= 0.0206
Epoch: 0188 train_loss= 0.5472 train_acc= 0.9429 v

In [33]:
test_loss, test_acc = evaluate_preds(preds, [y_test], [idx_test])
print(f"""
loss = {test_loss[0]:.4f}
accuracy = {test_acc[0]:.4f}
""".strip())

loss = 0.9183
accuracy = 0.7780


# Gaussian Graph Convolution

In [34]:
from gaussian_graph_convolution import GaussianGraphConvolution

In [35]:
H = Dropout(0.5)(X_in)
H = GaussianGraphConvolution(16, support, activation='relu', kernel_regularizer=l2(5e-4))([H]+G)
H = Dropout(0.5)(H)
Y = GaussianGraphConvolution(y.shape[1], support, activation='softmax')([H]+G)

In [36]:
model = Model(inputs=[X_in]+G, outputs=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

In [37]:
NB_EPOCH = 200
PATIENCE = 10  # early stopping patience
# Helper variables for main training loop
wait = 0
preds = None
best_val_loss = 99999

# Fit
for epoch in range(1, NB_EPOCH+1):

    # Log wall-clock time
    t = time.time()

    # Single training iteration (we mask nodes without labels for loss calculation)
    model.fit(graph, y_train, sample_weight=train_mask,
              batch_size=A.shape[0], epochs=1, shuffle=False, verbose=0)

    # Predict on full dataset
    preds = model.predict(graph, batch_size=A.shape[0])

    # Train / validation scores
    train_val_loss, train_val_acc = evaluate_preds(preds, [y_train, y_val],
                                                   [idx_train, idx_val])
    print("Epoch: {:04d}".format(epoch),
          "train_loss= {:.4f}".format(train_val_loss[0]),
          "train_acc= {:.4f}".format(train_val_acc[0]),
          "val_loss= {:.4f}".format(train_val_loss[1]),
          "val_acc= {:.4f}".format(train_val_acc[1]),
          "time= {:.4f}".format(time.time() - t))

    # Early stopping
    if train_val_loss[1] < best_val_loss:
        best_val_loss = train_val_loss[1]
        wait = 0
    else:
        if wait >= PATIENCE:
            print('Epoch {}: early stopping'.format(epoch))
            break
        wait += 1

Epoch: 0001 train_loss= 1.9385 train_acc= 0.3500 val_loss= 1.9393 val_acc= 0.3633 time= 0.4442
Epoch: 0002 train_loss= 1.9303 train_acc= 0.3571 val_loss= 1.9320 val_acc= 0.3833 time= 0.0296
Epoch: 0003 train_loss= 1.9205 train_acc= 0.3500 val_loss= 1.9230 val_acc= 0.3567 time= 0.0271
Epoch: 0004 train_loss= 1.9108 train_acc= 0.3357 val_loss= 1.9144 val_acc= 0.3533 time= 0.0203
Epoch: 0005 train_loss= 1.9011 train_acc= 0.3643 val_loss= 1.9060 val_acc= 0.3600 time= 0.0187
Epoch: 0006 train_loss= 1.8910 train_acc= 0.3786 val_loss= 1.8972 val_acc= 0.3633 time= 0.0236
Epoch: 0007 train_loss= 1.8807 train_acc= 0.3857 val_loss= 1.8880 val_acc= 0.3633 time= 0.0245
Epoch: 0008 train_loss= 1.8701 train_acc= 0.3643 val_loss= 1.8785 val_acc= 0.3633 time= 0.0190
Epoch: 0009 train_loss= 1.8595 train_acc= 0.3643 val_loss= 1.8691 val_acc= 0.3633 time= 0.0194
Epoch: 0010 train_loss= 1.8489 train_acc= 0.3643 val_loss= 1.8596 val_acc= 0.3633 time= 0.0240
Epoch: 0011 train_loss= 1.8384 train_acc= 0.3571 v

Epoch: 0095 train_loss= 0.9056 train_acc= 0.9000 val_loss= 1.1636 val_acc= 0.7767 time= 0.0195
Epoch: 0096 train_loss= 0.8980 train_acc= 0.9000 val_loss= 1.1580 val_acc= 0.7733 time= 0.0185
Epoch: 0097 train_loss= 0.8899 train_acc= 0.9000 val_loss= 1.1505 val_acc= 0.7667 time= 0.0183
Epoch: 0098 train_loss= 0.8822 train_acc= 0.9000 val_loss= 1.1426 val_acc= 0.7700 time= 0.0236
Epoch: 0099 train_loss= 0.8750 train_acc= 0.9000 val_loss= 1.1350 val_acc= 0.7667 time= 0.0187
Epoch: 0100 train_loss= 0.8682 train_acc= 0.9000 val_loss= 1.1283 val_acc= 0.7800 time= 0.0197
Epoch: 0101 train_loss= 0.8615 train_acc= 0.9000 val_loss= 1.1221 val_acc= 0.7733 time= 0.0185
Epoch: 0102 train_loss= 0.8550 train_acc= 0.9000 val_loss= 1.1165 val_acc= 0.7700 time= 0.0228
Epoch: 0103 train_loss= 0.8483 train_acc= 0.9000 val_loss= 1.1113 val_acc= 0.7733 time= 0.0232
Epoch: 0104 train_loss= 0.8413 train_acc= 0.9000 val_loss= 1.1069 val_acc= 0.7767 time= 0.0182
Epoch: 0105 train_loss= 0.8343 train_acc= 0.9000 v

Epoch: 0186 train_loss= 0.5269 train_acc= 0.9429 val_loss= 0.8579 val_acc= 0.7967 time= 0.0237
Epoch: 0187 train_loss= 0.5246 train_acc= 0.9500 val_loss= 0.8539 val_acc= 0.7967 time= 0.0185
Epoch: 0188 train_loss= 0.5225 train_acc= 0.9500 val_loss= 0.8495 val_acc= 0.7967 time= 0.0193
Epoch: 0189 train_loss= 0.5208 train_acc= 0.9500 val_loss= 0.8459 val_acc= 0.7900 time= 0.0187
Epoch: 0190 train_loss= 0.5191 train_acc= 0.9500 val_loss= 0.8435 val_acc= 0.7933 time= 0.0228
Epoch: 0191 train_loss= 0.5173 train_acc= 0.9500 val_loss= 0.8416 val_acc= 0.7933 time= 0.0187
Epoch: 0192 train_loss= 0.5150 train_acc= 0.9500 val_loss= 0.8406 val_acc= 0.8000 time= 0.0231
Epoch: 0193 train_loss= 0.5120 train_acc= 0.9500 val_loss= 0.8402 val_acc= 0.8033 time= 0.0184
Epoch: 0194 train_loss= 0.5085 train_acc= 0.9500 val_loss= 0.8406 val_acc= 0.8000 time= 0.0190
Epoch: 0195 train_loss= 0.5056 train_acc= 0.9500 val_loss= 0.8412 val_acc= 0.8100 time= 0.0253
Epoch: 0196 train_loss= 0.5038 train_acc= 0.9500 v

In [38]:
test_loss, test_acc = evaluate_preds(preds, [y_test], [idx_test])
print(f"""
loss = {test_loss[0]:.4f}
accuracy = {test_acc[0]:.4f}
""".strip())

loss = 0.9019
accuracy = 0.7860


# Attacks