#Graph Anomaly Prediction

---
On Yelp Reviews Dataset and Elliptic dataset







### Python Imports
All relevant project libraries and utilities for the notebook are installed and imported here.

In [None]:
!pip install spektral

In [None]:
!pip install --ignore-installed --upgrade tensorflow

In [None]:
import os
import numpy as np
import scipy
import pickle
import math
import tensorflow as tf
from tensorflow import keras
from numpy import polynomial
from scipy.special import comb
import scipy.sparse as ss



In [None]:
from typing import Optional
from tensorflow.keras import backend as KB
from tensorflow.experimental import Optional as OptTensor
from tensorflow.python.ops.numpy_ops import np_config

In [None]:
# from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.layers import Dropout, Input, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

Spektral is a Python Library which is used for Graph Deep Learning based on Keras and TensorFlow. We have used this as an alternative to PyG Library.

In [None]:
import spektral as spktrl
from spektral.data import SingleLoader
from spektral.data import Dataset, Graph
from spektral.data.dataset import Dataset
from spektral.layers import MessagePassing
from spektral.layers import ops
from spektral.layers.convolutional.conv import Conv
from spektral.utils import normalized_laplacian, rescale_laplacian, laplacian, degree_matrix

In [None]:
from spektral.data.loaders import SingleLoader
from spektral.datasets.citation import Citation
from spektral.layers import ChebConv
from spektral.transforms import LayerPreprocess
from spektral.layers import GATConv

In [None]:
np_config.enable_numpy_behavior()

### I. Data Importation


1. Upload datasets to Drive and mount Drive
2. Unzip the datasets  
3. Uncomment graph if testing this model on respective Elliptic dataset or Yelp dataset
4. Give path to the dataset

In [None]:
# from google.colab import drive
# drive.mount('/content/gdrive')

In [None]:
# !unzip gdrive/My\ Drive/ellipticFraud.zip

In [None]:
# !unzip gdrive/My\ Drive/yelpFraud.zip

In [None]:
# graph=pickle.load(open("/content/ellipticFraud.dat","rb"))

In [None]:
graph = pickle.load(open("/content/yelpFraud.dat", "rb"))

In [None]:
# Graph converted to spektral data object for processing in keras and tensor flow, 
# Its a graph container. 
# label is either anomolous or normal. 
class MyDataset(Dataset):

   def __init__(self, **kwargs):
      
        self.mask_tr = self.mask_te = self.mask_va = None
        self.num_nodes= self.num_features=None
        super().__init__(**kwargs)
   def read(self):
        g=graph  
        x = g.x
        a = g.a

        y = g.y.reshape(-1,1)
        anomaly = (g.y == 1)
        normal = (g.y == 0)
        label = (anomaly, normal)
        
        y=np.stack((normal, anomaly))
        y=y*1
        y=np.transpose(y)
        self.num_nodes= a.shape[0]
        self.num_features= x.shape[1]
        
        self.mask_tr = 1*g.train_mask
        self.mask_te = 1*g.test_mask
        self.mask_va = 1*g.val_mask

        return [Graph(x=x, a=a, y=y)]
        

In [None]:
dataset= MyDataset()
dataset[0]

Graph(n_nodes=46564, n_node_features=93, n_edge_features=None, n_labels=2)

In [None]:
# for processing on single graph, single data loader is used. 
#It internally converts sparse arrays and numpy arrays to keras Tensor. 

loader=SingleLoader(dataset)
inputs= loader.load()
inputs

<RepeatDataset element_spec=((TensorSpec(shape=(46564, 93), dtype=tf.float32, name=None), SparseTensorSpec(TensorShape([46564, 46564]), tf.float32)), TensorSpec(shape=(46564, 2), dtype=tf.int64, name=None))>

In [None]:
#We converted the binary masks to sample weights so that we can compute the
# average loss over the nodes (following original implementation by
# Kipf & Welling)

def mask_to_weights(mask):
    return mask / np.count_nonzero(mask)

weights_tr, weights_va, weights_te = (
    mask_to_weights(mask)
    for mask in (dataset.mask_tr, dataset.mask_va, dataset.mask_te)
)

### II. Bernstein Convolution
Bernstein polynomial makes a filter

Call(): The messages are gathered from the neighbouring nodes. The number of neighbours depends on  the degree of polynomial. If degree is 'K', the messages are gathered from 'K' hop neighbours.

Preprocess():
A function preprocess is used to preprocess the input adjacency matrix using laplacian functions. 

get_bern_coeff():
The filters are paramterized using bernstien polynomial co-efficients, the values range  between 0-1.


In [None]:
class BernConv(Conv):

    def __init__(
        self,
        channels,
        K=1,
        activation=None,
        use_bias=True,
        kernel_initializer="glorot_uniform",
        bias_initializer="zeros",
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        bias_constraint=None,
        **kwargs
    ):
        super().__init__(
            activation=activation,
            use_bias=use_bias,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            **kwargs
        )
        self.channels = channels
        self.K = K

        # initializer = tf.keras.initializers.RandomUniform(minval=0., maxval=1.)
        # self.wazn = (initializer(shape=(K+1, 1)))
        # # self.mask= tf.keras.backend.variable(self.wazn, trainable=True)
        # self.mask= tf.keras.backend.variable(self.wazn)

    def build(self, input_shape):
        assert len(input_shape) >= 2
        input_dim = input_shape[0][-1]
        
        # learnable weight matrix, Parameters are learned for high pass and low pass filters
        self.kernel = self.add_weight( 
            shape=(self.K, input_dim, self.channels),
            initializer=self.kernel_initializer,
            name="kernel",
            regularizer=self.kernel_regularizer,
            constraint=self.kernel_constraint,
        )
        if self.use_bias:
            self.bias = self.add_weight(
                shape=(self.channels,),
                initializer=self.bias_initializer,
                name="bias",
                regularizer=self.bias_regularizer,
                constraint=self.bias_constraint,
            )
        self.built = True

    def call(self, inputs, mask=None):

        # takes adjacency matrix and feature vector as input, 
        # adjacency matrix is preprocessed to laplacian and normalized laplacian   
        x, a = inputs  
        
        # messages are calculated 
        Bx_0 = ops.modal_dot(a, x)  
        Bx = [Bx_0]
        Bx_next = Bx_0
        out = tf.keras.backend.zeros_like(x)
        for i in range(self.K):
            bx_next = ops.modal_dot(a, Bx_next)
            Bx_next = KB.dot(Bx_next, self.kernel[i])
            Bx.append(Bx_next)    # depending on degree of polynomial, messages are gathered. 
        bern_coeff =  BernConv.get_bern_coeff(self.K)

        # the filters are paramterized using bernstien polynomial co-efficients, ranging between 0-1
        for k in range(0, self.K + 1):
            coeff = bern_coeff[k]
            basis = Bx[0] * coeff[0]
            for i in range(1, self.K + 1):
                basis += Bx[i] * coeff[i]
            out += basis
        return out


    @property
    def config(self):
        return {"channels": self.channels, "K": self.K}

    # part of conv layer, used to preprocess the input adjacency matrix depending on the use case
    @staticmethod
    def preprocess(a):
        a = normalized_laplacian(a)
        a = rescale_laplacian(a)
        return a

    # used to calculate bern co-efficients
    @staticmethod
    def get_bern_coeff(degree):

        def Bernstein(de, i):
            #coefficients = [0, ] * i + [math.comb(de, i)]
            coefficients = [0, ] * i + [comb(de, i)]
            
            first_term = polynomial.polynomial.Polynomial(coefficients)
            second_term = polynomial.polynomial.Polynomial([1, -1]) ** (de - i)
            return first_term * second_term

        out = []

        for i in range(degree + 1):
            out.append(Bernstein(degree, i).coef)

        return out

### III. Train Model

Model Parameters

In [None]:
# paramters for the network 

channels = 64  # Number of channels in the first layer
K = 5  # Max degree of the Bernstien polynomials
dropout = 0.3  # Dropout rate for the features
l2_reg = 2.5e-4  # L2 regularization rate
learning_rate = 5e-4  # Learning rate
epochs = 2000  # Number of training epochs
patience = 200  # Patience for early stopping
a_dtype = dataset[0].a.dtype  

N = dataset.n_nodes  # Number of nodes in the graph
F = dataset.n_node_features  # Original size of node features
n_out = dataset.n_labels  # Number of classes

In [None]:
# lambda function used to conduct operations between two keras layers. 
def custom_layer(inputs):
  a, b =inputs
  res=a[:, 0, :] * b[:, 0]
  for i in range(1, 2): # range is number of filters , here we are using 2 filters
      res += a[:, i, :] * b[:, i]
  return res

Model Definition

In [None]:
x_in = Input(shape=(F,)) # F is the dimenion of features , in case of yelp its 32
a_in = Input((N,), sparse=True, dtype=a_dtype) # N is the number of nodes // custom input to genrate layers of Model 


den1=Dense(channels, activation="relu")(x_in) # linear transformation from x_in to channels with relu activation . 
den2=Dense(channels)(den1) # another linear transformation with same input and output channels. 


# Two separate filters, one as high pass filter, other as low pass filter. Parameters are learned by the model 
conv1=BernConv(channels, k =5)
conv2=BernConv(channels, k=5)

# conv1=ChebConv(channels, k=5)
# conv2=ChebConv(channels, k=5)
h_list = []

h1 = conv1([den2, a_in])
h_list.append(h1)

h2 =conv2([den2, a_in])
h_list.append(h2)
# Filters are stacked together. 
h_filters = keras.backend.stack(h_list, axis=1)

print(h_filters)


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


KerasTensor(type_spec=TensorSpec(shape=(None, 2, 64), dtype=tf.float32, name=None), name='tf.stack/stack:0', description="created by layer 'tf.stack'")


In [None]:
#Node Level Attention. where h_filters_proj and x_proj indicate weight matricies transformation of the filters and input feature vectors,  transformed linearly. 

h_filters_proj = Dense(channels, activation="tanh")(h_filters)
x_proj=Dense(channels, activation="tanh") (den2)
x_proj = tf.keras.backend.expand_dims(x_proj,axis=-1)


score_logit = tf.keras.backend.batch_dot(h_filters_proj, x_proj)
softmax =tf.keras.layers.Softmax(axis=1)

score = softmax(score_logit)

lmbda = tf.keras.layers.Lambda(custom_layer)
res=lmbda([h_filters , score])

# after calculating the score dropout is used with rate 0.3
do1= Dropout(rate=0.1)(res)
den3=Dense(n_out, activation="softmax")(do1)

Model Building

In [None]:
from tensorflow.keras.metrics import AUC
from sklearn.metrics import average_precision_score

model = Model(inputs=[x_in, a_in], outputs=den3)
optimizer = Adam(lr=learning_rate)

def pr_auc(y_true, y_pred):
    return tf.py_function(average_precision_score, (y_true, y_pred), tf.float64)
    
model.compile(
    optimizer=optimizer,
    loss=CategoricalCrossentropy(reduction="sum"),  # To compute mean
    weighted_metrics=["accuracy"],
    # metrics=[roc_auc,pr_auc]
    metrics=[AUC(name='ROC_auc'),pr_auc]) # to compute auc-roc and pr score. 

model.summary()



Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 93)]         0           []                               
                                                                                                  
 dense (Dense)                  (None, 64)           6016        ['input_1[0][0]']                
                                                                                                  
 dense_1 (Dense)                (None, 64)           4160        ['dense[0][0]']                  
                                                                                                  
 input_2 (InputLayer)           [(None, 46564)]      0           []                               
                                                                                            

Model Training

In [None]:
# Loader is used to feed data into the model
loader_tr = SingleLoader(dataset, sample_weights=weights_tr) 
loader_va = SingleLoader(dataset, sample_weights=weights_va)

In [None]:
model.fit(
    loader_tr.load(),
    steps_per_epoch=loader_tr.steps_per_epoch,
    validation_data=loader_va.load(),
    validation_steps=loader_va.steps_per_epoch,
    epochs=epochs,
    callbacks=[EarlyStopping(patience=patience, restore_best_weights=True)],
)

Epoch 1/2000




Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
Epoch 73/2000


<keras.callbacks.History at 0x7f19b00773a0>

### IV. Model Evaluation

Accuracy and Loss in Model predictions

In [None]:
print("Evaluating model.")
loader_te = SingleLoader(dataset, sample_weights=weights_te)
eval_results = model.evaluate(loader_te.load(), steps=loader_te.steps_per_epoch)
print("Done.\n" "Test loss: {}\n" "Test accuracy: {}".format(*eval_results))

Evaluating model.
Done.
Test loss: 0.39874526858329773
Test accuracy: 0.8848407864570618
