RTGNN vs. greedy binarized attack model

- this model should utilize the greedy binarized attack (white box poison) on a dataset to hide (potential) anomalies or poison in a datset, such as CORA. 
-   This should produce a dataset which contains hidden anomalies
- After obtaining a dataset with disguised poison (hidden anomalies) --> use RTGNN model to create a model that is unaffected/ robust to these anomalies
-   Like DOMINANT, RTGNN also makes use of an Autoencoder (encoder - decoder)?
-   

Goal: See if RTGNN is able to withstand or counteract a binarized attack on the input data

trying to figure out whether RTGNN would be useful or not, against a dataset with disguised poison (hidden anomalies)

In [1]:
import sys
import os
import numpy as np
import pygod
from pygod.utils import load_data
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
import torch_sparse
from torch_sparse import SparseTensor
from typing import List
from sklearn.metrics import roc_auc_score
# from gad_adversarial_robustness.gad.dominant import dominant
from gad_adversarial_robustness.utils.graph_utils import prepare_graph, get_n_anomaly_indexes, load_anomaly_detection_dataset
from gad_adversarial_robustness.poison.greedy import multiple_AS, poison_attack

import argparse
import scipy.sparse as sp
from scipy.sparse import coo_matrix
from torch_geometric.utils.convert import from_scipy_sparse_matrix


In [2]:
NUM_CLASSES = 2

PRELOADED_EDGE_INDEX = True
EDGE_INDEX_PT = "300_budget_greedy_edge_index.pt"

In [3]:
from gad_adversarial_robustness.gad.RTGNN.utils import noisify_with_P
from gad_adversarial_robustness.gad.RTGNN.dataset import Dataset
from gad_adversarial_robustness.gad.RTGNN.model.RTGNN import RTGNN

In [4]:
script_dir = os.path.abspath('')
dataset_caching_path = os.path.join(script_dir, '..', '..', '..', 'data')

# import dataset from pygod
clean_data: Data = load_data("inj_cora", dataset_caching_path)
poisoned_data: Data = load_data("inj_cora", dataset_caching_path)

In [5]:
print(poisoned_data)

# x = [2708, 1433] : our node feature matrix of the shape [number of nodes, number of features]
# edge_index = [2, 11060] : our graph connectivity matrix of the shape [2, number of edges]
# y = [2708] : the node ground truth labels 
# train_mask = [2708] : an optional attribute that says which node should be used for training, with a list of True or False statements
# etc... in this case, the train_mask, vel_mask and test_mask have the same size....

Data(x=[2708, 1433], edge_index=[2, 11060], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [6]:
print(f'Edges are directed: {poisoned_data.is_directed()}')
print(f'Graph has isolated nodes: {poisoned_data.has_isolated_nodes()}')
print(f'Graph has loops: {poisoned_data.has_self_loops()}')

Edges are directed: True
Graph has isolated nodes: False
Graph has loops: False


In [7]:
print(poisoned_data)
print('------------')
print(f'Number of graphs: {len(poisoned_data)}')
print(f'Number of features: {poisoned_data.num_features}')
# print(f'Number of classes: {poisoned_data.num_classes}') ### does not have an attribute for the number of classes

Data(x=[2708, 1433], edge_index=[2, 11060], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
------------
Number of graphs: 6
Number of features: 1433


In [8]:
print(f'edge_index = {poisoned_data.edge_index.shape}')
print(poisoned_data.edge_index)

# the graph's connections are stored in two lists (11060 directed edges, which equate to 5530 bidirectional edges).

edge_index = torch.Size([2, 11060])
tensor([[   0,    0,    0,  ...,  869,  127, 1674],
        [ 633, 1862, 2582,  ..., 1732,  214,  438]])


In [9]:
##### -->  the method of obtaining dense_adj matrix from edge_index tensor #####
edge_weight = torch.ones(poisoned_data.edge_index.size(1))
edge_weight = edge_weight.cpu()
print(edge_weight.shape) # 11060

adj = sp.csr_matrix((edge_weight, poisoned_data.edge_index), (poisoned_data.num_nodes, poisoned_data.num_nodes))
print(adj.size) # 11054

adj = torch_sparse.SparseTensor.from_scipy(adj).coalesce().to("cpu")
# adj at this point:
#   SparseTensor(row=tensor([   0,    0,    0,  ..., 2707, 2707, 2707]), 
#       col=tensor([ 633, 1862, 2582,  ...,  598, 1473, 2706]), 
#       val=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(2708, 2708), nnz=11054, density=0.15%)

# adj matrix based on edge_index
poisoned_data.adj = adj.to_dense() # produces tensor of shape: [2708, 2708]

torch.Size([11060])
11054


In [10]:
##############################################################################################

# np_adj = poisoned_data.adj.detach().cpu().numpy()
# sparse_adj = coo_matrix(np_adj)
# edge_index = from_scipy_sparse_matrix(sparse_adj)[0]
# edge_index.shape

##############################################################################################

In [11]:
##############################################################################################
# # Convert the dense tensor to a sparse tensor
# sparse_adj = poisoned_data.adj.to_sparse()
# print("sparse adj: ",sparse_adj.shape) # shape: [2708, 2708]

# # Convert to a sparse COO tensor
# sparse_COO = torch.sparse_coo_tensor(sparse_adj._indices(), sparse_adj._values(), sparse_adj.size())
# print("sparse COO: ",sparse_COO.shape)

# # Convert the sparse COO tensor to a dense edge_index tensor
# edge_index2 = sparse_COO.to_dense()
# print("dense coo: ", edge_index2.shape)

# # Reshape the edge_index tensor to [2, num_edges]
# edge_index2 = edge_index2.view(2, -1)

# edge_index2.shape
##############################################################################################

In [12]:
print(poisoned_data.adj)
print(poisoned_data.adj.shape)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 1., 0.]])
torch.Size([2708, 2708])


In [13]:
### compute new or load in poisoned data
if PRELOADED_EDGE_INDEX is False :

    # truth, of type int list, is instantiated to the T/F labels indicating whether a node is an anomalous node
    truth: List[int] = poisoned_data.y.bool()

    print("Create poison compatible adjacency matrix...") # based on code from: https://github.com/zhuyulin-tony/BinarizedAttack/blob/main/src/Greedy.py
    triple = []
    for i in range(poisoned_data.num_nodes): # Cora has 2708 nodes
        for j in range(i + 1, poisoned_data.num_nodes):
            triple.append([i, j, poisoned_data.adj[i,j]])  #Fill with 0, then insert actual after

    # convert tripple to numpy array
    triple = np.array(triple)

    # These are the nodes we try reduce the "active subnetwork score" for (i.e. disguising anonomalous nodes)
    target_node_lst = get_n_anomaly_indexes(truth, 999) # the indexes of the anomalies

    # print(type(target_node_lst)), print(f'target node list: {target_node_lst}'), print(target_node_lst)

    print("Making model...")
    model = multiple_AS(target_lst = target_node_lst, n_node = poisoned_data.num_nodes, device = 'cpu')
    budget = 100  # The amount of edges to change


    print("Starting attack...")
    adj_adversary, _, _ = poison_attack(model, triple, budget)


    print("Converting to compatible tensor...")

    # Create Edge Index'
    edge_index = torch.tensor([[],[]])

    # Transpose it to make shape compatible
    transposed_adj_adversary = torch.transpose(adj_adversary, 0, 1)

    for i in range(len(adj_adversary)):
        if(adj_adversary[i][2] != 0):   #If edge value is not 0 (no edge)
            #Add edge to edge index, choosing first 2 elements (edges), and then the ith edge
            edge_index = torch.cat((edge_index, transposed_adj_adversary[:2, i:i+1]), -1)
            # Dataset uses edges both ways so add reverse edge as well
            edge_index = torch.cat((edge_index, torch.flip(transposed_adj_adversary[:2, i:i+1], dims=[0])), -1)


    edge_index = edge_index.type(torch.int64)
    poisoned_data.edge_index = edge_index # assign to dataset obj

else : 
    poisoned_data.edge_index = torch.load(EDGE_INDEX_PT)

In [14]:
print(poisoned_data)
print(clean_data)

Data(x=[2708, 1433], edge_index=[2, 10486], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], adj=[2708, 2708])
Data(x=[2708, 1433], edge_index=[2, 11060], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


## RTGNN

In [15]:
# define the args for the model

parser = argparse.ArgumentParser()

######## the following are the parameters defined in the RTGNN main file ########

parser.add_argument('--seed', type=int, default=12, help='Random seed.')
parser.add_argument('--weight_decay', type=float, default=5e-4,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=128,
                    help='Number of hidden units.')
parser.add_argument('--edge_hidden', type=int, default=64,
                    help='Number of hidden units of MLP graph constructor')
parser.add_argument('--dropout', type=float, default=0.5,
                    help='Dropout rate (1 - keep probability).')
# parser.add_argument('--dataset', type=str, default="cora",
#                     choices=['cora', 'citeseer','blogcatalog'], help='dataset')
parser.add_argument('--ptb_rate', type=float, default=0.3,
                    help="noise ptb_rate")
parser.add_argument('--epochs', type=int,  default=200,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.001,
                    help='Initial learning rate.')
parser.add_argument('--alpha', type=float, default=1,
                    help='loss weight of graph reconstruction')
parser.add_argument('--tau',type=float, default=0.05,
                    help='threshold of filtering noisy edges')
parser.add_argument('--th',type=float, default=0.95,
                    help='threshold of adding pseudo labels')
parser.add_argument("--K", type=int, default=100,
                    help='number of KNN search for each node')
parser.add_argument("--n_neg", type=int, default=100,
                    help='number of negitive sampling for each node')
parser.add_argument('--noise', type=str, default='uniform', choices=['uniform', 'pair'],
                    help='type of noises')
parser.add_argument('--decay_w', type=float, default=0.1,
                    help='down-weighted factor')
parser.add_argument('--co_lambda',type=float,default=0.1,
                     help='weight for consistency regularization term')

_StoreAction(option_strings=['--co_lambda'], dest='co_lambda', nargs=None, const=None, default=0.1, type=<class 'float'>, choices=None, required=False, help='weight for consistency regularization term', metavar=None)

In [16]:
args = parser.parse_known_args()[0]
print(args)

Namespace(seed=12, weight_decay=0.0005, hidden=128, edge_hidden=64, dropout=0.5, ptb_rate=0.3, epochs=200, lr=0.001, alpha=1, tau=0.05, th=0.95, K=100, n_neg=100, noise='uniform', decay_w=0.1, co_lambda=0.1)


In [17]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')
# data = Dataset(root='./data', name=args.dataset)

### insert the data that was attacked by greedy BinarizedAttack
data = poisoned_data

In [18]:
# initialize adj matrix, node features and labels
adj = data.adj
features = data.x
labels = data.y.bool() # converts to true / false

# noise perturbation rate --> it controls the amount of noise added to the training and validation labels
ptb = args.ptb_rate 

# initalize number of classes in dataset ---> hardcoded
nclass = NUM_CLASSES + 1 
args.class_num=nclass


#### train validation test split

# Assuming data.y.shape == (2708,)
num_nodes = data.y.shape[0]

# --- create new masks with 6/2/2 split ---
    # Determine the split ratios (e.g., 0.6, 0.2, 0.2)
train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2

    # Create a random permutation of node indices
node_indices = torch.randperm(num_nodes)

    # Calculate the split indices
train_size = int(num_nodes * train_ratio)
val_size = int(num_nodes * val_ratio)
test_size = num_nodes - train_size - val_size

    # Create new masks based on the split indices
new_train_mask = torch.zeros(num_nodes, dtype=torch.bool)
new_train_mask[node_indices[:train_size]] = True

new_val_mask = torch.zeros(num_nodes, dtype=torch.bool)
new_val_mask[node_indices[train_size:train_size+val_size]] = True

new_test_mask = torch.zeros(num_nodes, dtype=torch.bool)
new_test_mask[node_indices[train_size+val_size:]] = True

# Assign the new masks to the data object
data.train_mask = new_train_mask
data.val_mask = new_val_mask
data.test_mask = new_test_mask


# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Extract the new indices for the training, validation, and test sets
idx_train = np.where(data.train_mask == True)[0]
idx_val = np.where(data.val_mask == True)[0]
idx_test = np.where(data.test_mask == True)[0]

# from the RTGNN github ----------------------------------------------
train_labels = labels[idx_train]
val_labels = labels[idx_val]

# Concatenating training and validation labels
train_val_labels = np.concatenate([train_labels, val_labels],axis=0)

# Concatenating training and validation masks
idx = np.concatenate([idx_train, idx_val],axis=0)
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# OLD CODE ----------------------------------------------------------------
# # Extracting labels for training, validation, and test data using masks
# train_labels = labels[data.train_mask]
# val_labels = labels[data.val_mask]
# test_labels = labels[data.test_mask]

# # Concatenating training and validation labels
# train_val_labels = torch.cat([train_labels, val_labels], dim=0)

# # Concatenating training and validation masks
# idx = torch.cat([data.train_mask, data.val_mask], dim=0)

# # convert tensors to numpy arrays
# train_val_labels = train_val_labels.cpu().detach().numpy()
# idx = idx.cpu().detach().numpy()

# # convert the T/F labels to integers --> used in the following call of "noisyfy_with_P"
train_val_labels_int = train_val_labels.astype(int)
# idx_int = idx.astype(int)

In [19]:
print(idx)
print(idx.shape)
print(train_val_labels)
print(train_val_labels.shape)
print(data.val_mask.shape)
print(data.train_mask.shape)

print()

print(idx_train)
print(idx_train.shape)

[   4    5    7 ... 2689 2696 2700]
(2165,)
[False False False ... False False False]
(2165,)
torch.Size([2708])
torch.Size([2708])

[   4    5    7 ... 2705 2706 2707]
(1624,)


In [20]:
# Adding noise to the concatenated labels and getting the noise indices and clean indices
# noise_y, P, noise_idx, clean_idx = noisify_with_P(train_val_labels, data.train_mask.shape[0], nclass, ptb, 10, args.noise)
noise_y, P, noise_idx, clean_idx = noisify_with_P(train_val_labels_int, idx_train.shape[0], nclass, ptb, 10, args.noise)

args.noise_idx, args.clean_idx = noise_idx, clean_idx

Uniform noise
probability matrix P:
 [[0.7  0.15 0.15]
 [0.15 0.7  0.15]
 [0.15 0.15 0.7 ]]
count: 636


In [21]:
noise_labels = labels.to(torch.int).clone() # size = 2708

# size of noisy_y = 2165 --> which is the total amount of labels (2708) minus the test_labels (543)
# # set the noisy labels
# for i in range(noise_y.shape[0]):
    
#     noise_labels[idx[i]] = noise_y[i]

# set the noisy labels (need to convert from np to torch tensor)
noise_labels[idx] = torch.from_numpy(noise_y)

In [22]:
np.random.seed(args.seed)
torch.manual_seed(args.seed)
# torch.cuda.manual_seed(args.seed)
torch.manual_seed(args.seed)


# instatiate the model
model = RTGNN(args, device)

In [23]:
# # Convert the dense tensor to a sparse tensor
# sparse_adj = adj.to_sparse()

# # Convert to a sparse COO tensor
# sparse_COO = torch.sparse_coo_tensor(sparse_adj._indices(), sparse_adj._values(), adj.size())

# # Convert the sparse COO tensor to a dense edge_index tensor
# edge_index = sparse_COO.to_dense()

# # Reshape the edge_index tensor to [2, num_edges]
# edge_index = edge_index.view(2, -1)

# edge_index.shape

In [24]:
# adj = adj.detach().cpu().numpy()

In [25]:
# train --------------------------------------------------
# model fit : features, adj, noise_labels, real labels, 
torch.autograd.set_detect_anomaly(True)
model.fit(features, adj, noise_labels, labels, idx_train, idx_val, noise_idx, clean_idx) 

edge_index: 
 torch.Size([2, 11054])
=====Train Accuray=====
Epoch 0: #1 = 0.562808, #2= 0.531404




=====Validation Accuray=====
Epoch 0: #1 = 0.506470, #2= 0.604436
=====Train Accuray=====
Epoch 1: #1 = 0.547414, #2= 0.527094
=====Validation Accuray=====
Epoch 1: #1 = 0.506470, #2= 0.604436
=====Train Accuray=====
Epoch 2: #1 = 0.522167, #2= 0.506158
=====Validation Accuray=====
Epoch 2: #1 = 0.506470, #2= 0.604436
=====Train Accuray=====
Epoch 3: #1 = 0.538793, #2= 0.525246
=====Validation Accuray=====
Epoch 3: #1 = 0.506470, #2= 0.604436
=====Train Accuray=====
Epoch 4: #1 = 0.514778, #2= 0.524015
=====Validation Accuray=====
Epoch 4: #1 = 0.506470, #2= 0.604436
=====Train Accuray=====
Epoch 5: #1 = 0.551108, #2= 0.529557
=====Validation Accuray=====
Epoch 5: #1 = 0.506470, #2= 0.604436
=====Train Accuray=====
Epoch 6: #1 = 0.540640, #2= 0.516626
=====Validation Accuray=====
Epoch 6: #1 = 0.506470, #2= 0.604436
=====Train Accuray=====
Epoch 7: #1 = 0.540025, #2= 0.535714
=====Validation Accuray=====
Epoch 7: #1 = 0.506470, #2= 0.604436
=====Train Accuray=====
Epoch 8: #1 = 0.50554

In [26]:
# model test
model.test(idx_test) # test

Test Accuray: #1 = 0.604052, #2= 0.747698


tensor(0.6759, dtype=torch.float64)