## GNNExplainer baseline

Steps of the GNNExplainer baseline (according to the paper):
1. Identify the S most relevent edges in prediction.
2. Remove these edges
3. S in {1,2,3,4,5,GT}

(GT is 6 for Syn1, 6 for Syn4 and 12 for Syn5, according to the paper)

In [5]:
# import needed packages and files (from original code)
from gnnexplainer import explain
import torch
import numpy as np
from gcn import *
from gcn_perturbation_matrix import *
from sklearn.metrics import accuracy_score
from calculate_metrics import *
import pickle

In [6]:
with open('data/syn1.pickle','rb') as pickle_file: 
    data_syn1 = pickle.load(pickle_file)

with open('data/syn4.pickle','rb') as pickle_file:
    data_syn4 = pickle.load(pickle_file)
    
with open('data/syn5.pickle','rb') as pickle_file:
    data_syn5 = pickle.load(pickle_file)

In [7]:
# squeeze the labels (as it has a singleton dim and then make it a tensor)
labels_syn1 = np.squeeze(data_syn1['labels'])
labels_syn1 = torch.tensor(labels_syn1)

labels_syn4 = np.squeeze(data_syn4['labels'])
labels_syn4 = torch.tensor(labels_syn4)

labels_syn5 = np.squeeze(data_syn5['labels'])
labels_syn5 = torch.tensor(labels_syn5)

# same for features, but define the type of data here
features_syn1 = np.squeeze(data_syn1['feat'])
features_syn1 = torch.tensor(features_syn1, dtype=torch.float)

features_syn4 = np.squeeze(data_syn4['feat'])
features_syn4 = torch.tensor(features_syn4, dtype=torch.float)

features_syn5 = np.squeeze(data_syn5['feat'])
features_syn5 = torch.tensor(features_syn5, dtype=torch.float)

# adjacency matrix
adjacency_matrix_syn1 = torch.tensor(np.squeeze(data_syn1['adj']), dtype=torch.float)
adjacency_matrix_syn4 = torch.tensor(np.squeeze(data_syn4['adj']), dtype=torch.float)
adjacency_matrix_syn5 = torch.tensor(np.squeeze(data_syn5['adj']), dtype=torch.float)

# the indices are already a list --> but have to split the training data in training and validation data first
train_indices_full_syn1 = torch.tensor(data_syn1['train_idx'])
train_indices_full_syn4 = torch.tensor(data_syn4['train_idx'])
train_indices_full_syn5 = torch.tensor(data_syn5['train_idx'])

# split in training and validation indices
train_indices_syn1, validation_indices_syn1 = torch.utils.data.random_split(train_indices_full_syn1, [0.8, 0.2], generator=torch.Generator().manual_seed(42))
train_indices_syn4, validation_indices_syn4 = torch.utils.data.random_split(train_indices_full_syn4, [0.8, 0.2], generator=torch.Generator().manual_seed(42))
train_indices_syn5, validation_indices_syn5 = torch.utils.data.random_split(train_indices_full_syn5, [0.8, 0.2], generator=torch.Generator().manual_seed(42))

test_indices_syn1 = torch.tensor(data_syn1['test_idx'])
test_indices_syn4 = torch.tensor(data_syn4['test_idx'])
test_indices_syn5 = torch.tensor(data_syn5['test_idx'])

In [8]:
model_syn1 = torch.load('models_newinit/syn1model.pt')
model_syn4 = torch.load('models_newinit/syn4model.pt')
model_syn5 = torch.load('models_newinit/syn5model.pt')

In [9]:
model_syn1.eval()
sparse_adj_1 = get_sparse_adjacency_normalized(features_syn1.shape[0], adjacency_matrix_syn1)
outputs_syn1 = model_syn1(features_syn1, sparse_adj_1)

# print accuracy too (to check that it is the same as in the original)
_, predictions_1 = torch.max(outputs_syn1.data, 1)
print("Test accuracy of Syn1 data: ", accuracy_score(labels_syn1[test_indices_syn1], predictions_1[test_indices_syn1]))

model_syn4.eval()
sparse_adj_4 = get_sparse_adjacency_normalized(features_syn4.shape[0], adjacency_matrix_syn4)
outputs_syn4 = model_syn4(features_syn4, sparse_adj_4)

# print accuracy too (to check that it is the same as in the original)
_, predictions_4 = torch.max(outputs_syn4.data, 1)
print("Test accuracy of Syn4 data: ", accuracy_score(labels_syn4[test_indices_syn4], predictions_4[test_indices_syn4]))

model_syn5.eval()
sparse_adj_5 = get_sparse_adjacency_normalized(features_syn5.shape[0], adjacency_matrix_syn5)
outputs_syn5 = model_syn5(features_syn5, sparse_adj_5)

# print accuracy too (to check that it is the same as in the original)
_, predictions_5 = torch.max(outputs_syn5.data, 1)
print("Test accuracy of Syn5 data: ", accuracy_score(labels_syn5[test_indices_syn5], predictions_5[test_indices_syn5]))

Test accuracy of Syn1 data:  0.9928571428571429
Test accuracy of Syn4 data:  0.9142857142857143
Test accuracy of Syn5 data:  0.8623481781376519


  A_hat = torch.sparse_coo_tensor((A_hat.row, A_hat.col), A_hat.data, dtype=torch.float)


In [None]:
# Create explainer
#	explainer = explain.Explainer(
#		model=model,
#		adj=adj,
#		feat=features,
#		label=labels,
#		pred=y_pred_orig,
#		train_idx=idx_train,
#		# args=prog_args,
#		# writer=writer,
#		print_training=True,
#		# graph_mode=graph_mode,
#		# graph_idx=prog_args.graph_idx,
#	)

In [11]:


# get the gnnexplainer for a certain model
explainer = explain.Explainer(model=model_syn1, adj=sparse_adj_1, feat=features_syn1, label=labels_syn1, pred=predictions_1, train_idx=train_indices_syn1, args=None)

AttributeError: 'NoneType' object has no attribute 'num_gc_layers'

In [None]:


for index in torch.tensor([1]):# index in test_indices_syn4: 
    old_prediction = predictions_4[index.item()]
    
    # get the subgraph neighbourhood
    adjacency_matrix, vertex_mapping, labels_perturbed, features_perturbed = create_subgraph_neighbourhood2(index.item(), 4, labels_syn4, features_syn4, adjacency_matrix_syn4)
    
    new_index = vertex_mapping[index.item()]
