# Anomaly Detection with Temporal-GNN
## Demo
___

In [3]:
import sys
sys.path.append('../')

from scripts.utils import (generate_val_test_lab_nodes, 
                           generate_val_test_lab_edges)
from scripts.tgnn import GCN_GRU
from torch import load as tc_load
from sklearn.metrics import roc_auc_score
import torch
from matplotlib import pyplot as plt

CUDA = torch.cuda.is_available()

### Dataset Preparation

In [4]:
dset = "reddit"
X = tc_load(f"../data/adjs_anom_{dset}")
anomalies_edges_idx = tc_load(f"../data/anomalies_edges_idx_{dset}")
anomalies_nodes_idx = tc_load(f"../data/anomalies_nodes_idx_{dset}")

# Total number of nodes
n_nodes = X[0].shape[0]
# Number of time-steps used for (history, training, validation, test)
if dset == "reddit":
    hist, train_start, train_end, val, test = (10, 9, 19, 5, len(X)-1)
    bipartite = True
elif dset == "webbrowsing":
    hist, train_start, train_end, val, test = (0, 9, 19, 5, len(X)-1)
    bipartite = True
elif dset == "stackoverflow":
    hist, train_start, train_end, val, test = (10, 9, 19, 5, len(X)-1)
    bipartite = False
elif dset == "uci":
    hist, train_start, train_end, val, test = (0, 9, 19, 5, len(X)-1)
    bipartite = False

splits = (hist, train_start, train_end, val, test)

Set hyperparameters

In [5]:
nout=1024 # Output size of GCN
nout_gru=128 # Output size of GRU
nhid_gcn=128 # Hidden size of GCN (only if n_layers > 1)
nhid_edges=64 # Hidden size for edge anomaly score
nhid_nodes=64 # Hidden size for node anomaly score
dropout=.0 # Dropout of GCN
n_layers=2 # Number of layers of GCN
ns=1 # Proportion of negative samples for nodes
ns_edge=1 # Proportion of negative samples for edges (only for GCN-GRU-edges)
anomalies_thr = 3 # How many anomalous edges make a node anomalous
lr=0.001 # Learning rate
epochs=10 # Number of epochs 

In [6]:
nhid_gcn, nout, nout_gru, nhid_edges, nhid_nodes, λ = 1024, 1024, 32, 64, 32, .3

## Node Anomaly Detection
___
Train and evaluate a node-only model

In [7]:
# Get the validation and test labels. Skip training labels (self-supervised)
y_val, y_test = generate_val_test_lab_nodes(X, train_end, val, test, anomalies_edges_idx, anomalies_thr=anomalies_thr, anomalies_nodes_idx=anomalies_nodes_idx,bipartite=bipartite)

T-GNN Training

In [None]:
# Initalize the model
model = GCN_GRU(n=n_nodes, entities='nodes', ns=ns, splits=splits, epochs=epochs, bipartite=bipartite,
               nout=nout, nout_gru=nout_gru, nhid_gcn=nhid_gcn, nhid_nodes=nhid_nodes, dropout=dropout, 
                n_layers=n_layers, lr=lr, cuda=CUDA)
# Run the self-supervised training
model.fit(X, y_val, dset=dset, save=False)

T-GNN Test

In [None]:
# Predict
y_pred = model.predict(X, save=False, ret_emb=False)
test_auc = roc_auc_score(y_test.numpy(), y_pred.cpu().numpy())
print(f'Test AUC: {test_auc}')

## Edges
___
Train and evaluate an edge-only model

In [13]:
# Get the validation and test labels. Skip training labels (self-supervised)
y_val, y_test = generate_val_test_lab_edges(X, train_end, val, test, anomalies_edges_idx)

T-GNN Training

In [None]:
# Initalize the model
model = GCN_GRU(n=n_nodes, entities='edges', ns=ns, splits=splits, epochs=epochs, bipartite=bipartite, ns_edge=1,
               nout=nout, nout_gru=nout_gru, nhid_gcn=nhid_gcn, dropout=dropout, 
                n_layers=n_layers, lr=lr, cuda=CUDA, nhid_edges=nhid_edges)
# Run the self-supervised training
model.fit(X, y_val, dset=dset, save=False)

In [None]:
# Predict
y_pred = model.predict(X, y=y_test.cpu(), save=False, ret_emb=False)
test_auc = roc_auc_score(y_test.numpy(), y_pred.cpu().numpy())
print(f'Test AUC: {test_auc}')

# Both
----
Train and evaluate a *multitask* model for edge and node anomaly detection

In [16]:
y_val_edges, y_test_edges = generate_val_test_lab_edges(X, train_end, val, test, anomalies_edges_idx)
y_val_nodes, y_test_nodes = generate_val_test_lab_nodes(X, train_end, val, test, anomalies_edges_idx, anomalies_thr=anomalies_thr, anomalies_nodes_idx=anomalies_nodes_idx,bipartite=bipartite)
y_val = (y_val_edges, y_val_nodes)
y_test = (y_test_edges, y_test_nodes)

In [None]:
# Initalize the model
model = GCN_GRU(n=n_nodes, entities='both', ns=ns, splits=splits, epochs=epochs, LAMBDA=λ, bipartite=bipartite,
                       nout=nout, nout_gru=nout_gru, nhid_gcn=nhid_gcn, nhid_edges=nhid_edges, nhid_nodes=nhid_nodes, 
                        dropout=dropout, n_layers=n_layers, lr=lr, cuda=CUDA)
# Run the self-supervised training
model.fit(X, y_val, dset=dset, save=False)

In [None]:
# Predict
y_pred = model.predict(X, y=y_test, save=False, ret_emb=False)
y_pred_edges, y_pred_nodes = y_pred
y_edges, y_nodes = y_test
test_auc_edges = roc_auc_score(y_edges.numpy(), y_pred_edges.cpu().numpy())
test_auc_nodes = roc_auc_score(y_nodes.numpy(), y_pred_nodes.cpu().numpy())
print(f'Test AUC edges: {test_auc_edges} Test AUC nodes: {test_auc_nodes, 3}')