#### Global & Local Modules

In [1]:
import os
from itertools import permutations 
import sys
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import scipy.sparse as sp
from tqdm import tqdm
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2
from rgcn import GaussianGraphConvolution, kl_reg

Using TensorFlow backend.


In [2]:
%matplotlib inline

#### Vendor Modules

In [3]:
# the path of execution
EXE_PATH = os.path.abspath(os.path.curdir)
# the path of the vendor files
VENDOR_PATH = os.path.join(EXE_PATH, 'vendor')
# the vendors to include in the system path
VENDORS = ['keras-gcn']
# create the absolute paths for all vendors
VENDORS = list(map(lambda x: os.path.join(VENDOR_PATH, x), VENDORS))
# update the Python path to include necessary vendor module
sys.path += VENDORS
# import vendor modules
from kegra.layers.graph import GraphConvolution
from kegra.utils import load_data, preprocess_adj, get_splits

# Data

In [4]:
def attack_edges(attack_ratio: float) -> 'callable':
    """
    Create an edge attack function with given attack ratio.

    Args:
        attack_ratio: the ratio of noise to clean edges

    Returns:
        a callable function for attacking a graph

    """
    def _attack_edges(edges: np.ndarray) -> np.ndarray:
        f"""
        Attack the edges of the given list of edges.

        Args:
            edges: the ndarray of tuples representing edges to attack

        Returns:
            a new ndarray of edges with random edges added

        Note:
            the ratio of noise to clean edges is {attack_ratio}

        """
        # create the set of all possible edges
        all_edges = set(permutations(list(range(edges.max())), 2))
        # convert the ndarray of edges to a set of tuples
        edges = set(map(tuple, edges))
        # create the set of edges to sample by subtracting the existing
        # edges from the set of possible edges
        sample_edges = all_edges - edges
        # select random edges without replacement based on the noise ratio
        attack_edges = np.random.choice(list(range(len(sample_edges))), 
            size=int(len(edges) * attack_ratio), 
            replace=False)
        attack_edges = [edge for (i, edge) in enumerate(list(sample_edges)) if i in attack_edges]
        # create the new ndarray of edges with the attack edges added
        edges = list(edges) + attack_edges
        return np.array(edges)
    return _attack_edges

In [5]:
def run(attack_ratio) -> tuple:
    """
    Perform an experimental run with given attack ratio.

    Args:
        attack_ratio: the ratio of noise to clean edges

    Returns:
        a tuple of GCN and RGCN accuracies

    """
    # load the input data
    X, A, y = load_data('data/cora/', dataset='cora', attack_edges=attack_edges(attack_ratio))
    X /= X.sum(1).reshape(-1, 1)
    A = preprocess_adj(A)
    y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask, val_mask, test_mask = get_splits(y)
    # create the input layers
    X_in = Input(shape=(X.shape[1], ), name='features')
    A_in = Input(shape=(None, None), batch_shape=(None, None), sparse=True, name='graph')
    # create the training loop
    def train(model, epochs=200, patience=10):
        """
        Train the given model.

        Args:
            model: the model to train
            epochs: the maximum number of training epochs
            patience: the number of patience epochs for early stoping

        Returns:
            a tuple of the testing loss and accuracy

        """
        history = model.fit([X, A], y_train,
            sample_weight=train_mask,
            batch_size=A.shape[0],
            epochs=epochs,
            shuffle=False,
            verbose=False,
            callbacks=[
                EarlyStopping(monitor='val_loss', patience=patience),
            ],
            validation_data = ([X, A], y_val, val_mask)
        )
        test = model.evaluate([X, A], y_test, 
            sample_weight=test_mask, 
            batch_size=A.shape[0],
            verbose=0
        )
        return test

    # GCN
    H = Dropout(0.5)(X_in)
    H = GraphConvolution(32, 1, 
        activation='relu', 
        kernel_regularizer=l2(5e-4)
    )([H, A_in])
    H = Dropout(0.5)(H)
    Y = GraphConvolution(y.shape[1], 1, 
        activation='softmax'
    )([H, A_in])
    gcn = Model(inputs=[X_in, A_in], outputs=Y)
    gcn.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01), weighted_metrics=['acc'])
    gcn_acc = train(gcn)[1]

    # RGCN
    H = Dropout(0.6)(X_in)
    M1, S1 = GaussianGraphConvolution(16,
        is_first=True,
        dropout=0.6
    )([H, A_in])
    Y = GaussianGraphConvolution(y.shape[1],
        is_last=True,
        last_activation='softmax',
    )([M1, S1, A_in])
    rgcn = Model(inputs=[X_in, A_in], outputs=Y)
    rgcn.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01), weighted_metrics=['acc'])
    rgcn.add_loss(kl_reg(M1, S1, 5e-4), [M1, S1])
    rgcn_acc = train(rgcn)[1]

    return gcn_acc, rgcn_acc

In [6]:
run(0)





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.


(0.7999990582466125, 0.7859990000724792)

In [7]:
ratios = list(map(lambda x: x / 10 if x > 0 else 0, range(0, 11)))
ratios

[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

In [8]:
MC_RUNS = 10
def monte_carlo_run(attack_ratio: float) -> tuple:
    """
    Perform an experimental run with given attack ratio.

    Args:
        attack_ratio: the ratio of noise to clean edges

    Returns:
        a tuple of GCN and RGCN accuracies averaged over 10 trials

    """
    return np.array([run(attack_ratio) for _ in range(MC_RUNS)]).mean(axis=0)

In [None]:
results = list(map(monte_carlo_run, tqdm(ratios)))

  0%|          | 0/11 [00:00<?, ?it/s]



In [None]:
df = pd.DataFrame(results, columns=['GCN', 'RGCN'], index=ratios)
df.index.name = 'Noise Edge Ratio'
df

In [None]:
!mkdir -p img

In [None]:
ax = df.plot()
ax.set_ylabel('Accuracy')
plt.savefig('img/plot.png')
plt.savefig('img/plot.pdf')