# Training the CF explainer and getting a CF example

In [1]:
# first import the needed packages
import torch
import numpy as np
from gcn import *
from gcn_perturbation_matrix import *
from sklearn.metrics import accuracy_score
from explainer_framework import *

  from .autonotebook import tqdm as notebook_tqdm


### Read in the data we are working with:

In [2]:
with open('data/syn1.pickle','rb') as pickle_file: 
    data_syn1 = pickle.load(pickle_file)

with open('data/syn4.pickle','rb') as pickle_file:
    data_syn4 = pickle.load(pickle_file)
    
with open('data/syn5.pickle','rb') as pickle_file:
    data_syn5 = pickle.load(pickle_file)

### Put data into tensors

In [3]:
# squeeze the labels (as it has a singleton dim and then make it a tensor)
labels_syn1 = np.squeeze(data_syn1['labels'])
labels_syn1 = torch.tensor(labels_syn1)

labels_syn4 = np.squeeze(data_syn4['labels'])
labels_syn4 = torch.tensor(labels_syn4)

labels_syn5 = np.squeeze(data_syn5['labels'])
labels_syn5 = torch.tensor(labels_syn5)

# same for features, but define the type of data here
features_syn1 = np.squeeze(data_syn1['feat'])
features_syn1 = torch.tensor(features_syn1, dtype=torch.float)

features_syn4 = np.squeeze(data_syn4['feat'])
features_syn4 = torch.tensor(features_syn4, dtype=torch.float)

features_syn5 = np.squeeze(data_syn5['feat'])
features_syn5 = torch.tensor(features_syn5, dtype=torch.float)

# adjacency matrix
adjacency_matrix_syn1 = torch.tensor(np.squeeze(data_syn1['adj']), dtype=torch.float)
adjacency_matrix_syn4 = torch.tensor(np.squeeze(data_syn4['adj']), dtype=torch.float)
adjacency_matrix_syn5 = torch.tensor(np.squeeze(data_syn5['adj']), dtype=torch.float)

# the indices are already a list --> but have to split the training data in training and validation data first
train_indices_full_syn1 = torch.tensor(data_syn1['train_idx'])
train_indices_full_syn4 = torch.tensor(data_syn4['train_idx'])
train_indices_full_syn5 = torch.tensor(data_syn5['train_idx'])

# split in training and validation indices
train_indices_syn1, validation_indices_syn1 = torch.utils.data.random_split(train_indices_full_syn1, [0.8, 0.2], generator=torch.Generator().manual_seed(42))
train_indices_syn4, validation_indices_syn4 = torch.utils.data.random_split(train_indices_full_syn4, [0.8, 0.2], generator=torch.Generator().manual_seed(42))
train_indices_syn5, validation_indices_syn5 = torch.utils.data.random_split(train_indices_full_syn5, [0.8, 0.2], generator=torch.Generator().manual_seed(42))

test_indices_syn1 = torch.tensor(data_syn1['test_idx'])
test_indices_syn4 = torch.tensor(data_syn4['test_idx'])
test_indices_syn5 = torch.tensor(data_syn5['test_idx'])

### Get the original models

In [4]:
model_syn1 = torch.load('models/syn1model.pt')
model_syn4 = torch.load('models/syn4model.pt')
model_syn5 = torch.load('models/syn5model.pt')

### First, get the original predictions for the model we're researching

In [5]:
model_syn1.eval()
sparse_adj_1 = get_sparse_adjacency_normalized(features_syn1.shape[0], adjacency_matrix_syn1)
outputs_syn1 = model_syn1(features_syn1, sparse_adj_1)

# print accuracy too (to check that it is the same as in the original)
_, predictions_1 = torch.max(outputs_syn1.data, 1)
print("Test accuracy of Syn1 data: ", accuracy_score(labels_syn1[test_indices_syn1], predictions_1[test_indices_syn1]))

model_syn4.eval()
sparse_adj_4 = get_sparse_adjacency_normalized(features_syn4.shape[0], adjacency_matrix_syn4)
outputs_syn4 = model_syn4(features_syn4, sparse_adj_4)

# print accuracy too (to check that it is the same as in the original)
_, predictions_4 = torch.max(outputs_syn4.data, 1)
print("Test accuracy of Syn4 data: ", accuracy_score(labels_syn4[test_indices_syn4], predictions_4[test_indices_syn4]))

model_syn5.eval()
sparse_adj_5 = get_sparse_adjacency_normalized(features_syn5.shape[0], adjacency_matrix_syn5)
outputs_syn5 = model_syn5(features_syn5, sparse_adj_5)

# print accuracy too (to check that it is the same as in the original)
_, predictions_5 = torch.max(outputs_syn5.data, 1)
print("Test accuracy of Syn5 data: ", accuracy_score(labels_syn5[test_indices_syn5], predictions_5[test_indices_syn5]))

Test accuracy of Syn1 data:  0.9571428571428572
Test accuracy of Syn4 data:  0.88
Test accuracy of Syn5 data:  0.7854251012145749


  A_hat = torch.sparse_coo_tensor((A_hat.row, A_hat.col), A_hat.data, dtype=torch.float)


### Get the weights and biases for the models

In [6]:
layer1_W_syn1 = model_syn1.gcn_layer_1.W.detach()
layer1_b_syn1 = model_syn1.gcn_layer_1.b.detach()
layer2_W_syn1 = model_syn1.gcn_layer_2.W.detach()
layer2_b_syn1 = model_syn1.gcn_layer_2.b.detach()
layer3_W_syn1 = model_syn1.gcn_layer_3.W.detach()
layer3_b_syn1 = model_syn1.gcn_layer_3.b.detach()
lin_weight_syn1 = model_syn1.linear_layer.weight.detach()
lin_b_syn1 = model_syn1.linear_layer.bias.detach()

layer1_W_syn4 = model_syn4.gcn_layer_1.W.detach()
layer1_b_syn4 = model_syn4.gcn_layer_1.b.detach()
layer2_W_syn4 = model_syn4.gcn_layer_2.W.detach()
layer2_b_syn4 = model_syn4.gcn_layer_2.b.detach()
layer3_W_syn4 = model_syn4.gcn_layer_3.W.detach()
layer3_b_syn4 = model_syn4.gcn_layer_3.b.detach()
lin_weight_syn4 = model_syn4.linear_layer.weight.detach()
lin_b_syn4 = model_syn4.linear_layer.bias.detach()

layer1_W_syn5 = model_syn5.gcn_layer_1.W.detach()
layer1_b_syn5 = model_syn5.gcn_layer_1.b.detach()
layer2_W_syn5 = model_syn5.gcn_layer_2.W.detach()
layer2_b_syn5 = model_syn5.gcn_layer_2.b.detach()
layer3_W_syn5 = model_syn5.gcn_layer_3.W.detach()
layer3_b_syn5 = model_syn5.gcn_layer_3.b.detach()
lin_weight_syn5 = model_syn5.linear_layer.weight.detach()
lin_b_syn5 = model_syn5.linear_layer.bias.detach()

### Set up structure to get CF-examples (new perturbed GCN for every node we get a CF-explanation for!)
As subgraphs may have different sizes--> first get subgraph, then we know how big the perturbation matrix should be.

## Train it and get the CF-examples!!

In [17]:
examples_all = []
train_loss_all = []
nr_cf = 0

for index in test_indices_syn1: # we first do these three to test whether everything works properly!!
    # get the old prediction
    old_prediction = predictions_1[index.item()]
    
    # get the subgraph neighbourhood
    adjacency_matrix, vertex_mapping, labels_perturbed, features_perturbed = create_subgraph_neighbourhood2(index.item(), 4, labels_syn1, features_syn1, adjacency_matrix_syn1)
    
    new_index = vertex_mapping[index.item()]
    
    # test whether it gets the same outcome
    sparse_adj_test = get_sparse_adjacency_normalized(features_perturbed.shape[0], adjacency_matrix)
    with torch.no_grad():
        outputs_test = model_syn1(features_perturbed, sparse_adj_test)

    # get accuracy too (to check that it is the same as in the original)
    _, predictions_test = torch.max(outputs_test.data, 1)
    
    # as a small test:
    assert predictions_test[new_index].item() == old_prediction, "wrong prediction"
        
    # make a gcn model (to use for perturbation):
    model_pert = GCNPerturbed(layer1_W_syn1, layer1_b_syn1, layer2_W_syn1, layer2_b_syn1, layer3_W_syn1, layer3_b_syn1, lin_weight_syn1, lin_b_syn1, adjacency_matrix.shape[0])
    
    # from the model hyperparams:
    alpha = 0.1
    optim = torch.optim.SGD(model_pert.parameters(), lr=alpha, nesterov=True, momentum=0.9)
    beta = 0.5
    k = 500
    
    # get the new cf example!
    examples_for_index, train_loss = get_cf_example(new_index, old_prediction, model_pert, optim, beta, k, adjacency_matrix, labels_perturbed, features_perturbed)
    
    # append to all examples!!
    examples_all.append(examples_for_index)
    train_loss_all.append(train_loss)
    
    if examples_for_index != []:
        nr_cf = nr_cf + 1

print(nr_cf)
print(len(test_indices_syn1))

122
140


In [16]:
train_loss

[-0.23569774627685547,
 -0.2413913905620575,
 -0.24997884035110474,
 -0.26172947883605957,
 -0.2771143317222595,
 -0.2968655228614807,
 -0.3221174478530884,
 -0.35453811287879944,
 -0.3966902792453766,
 -0.4525355398654938,
 -0.5284311771392822,
 -0.6347899436950684,
 -0.7891736030578613,
 0.5,
 1.0,
 1.0,
 1.0,
 1.0,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1.5,
 1

In [14]:
examples_for_index

[tensor([[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]])]

In [37]:
expl_size = []

for i in examples_all:
    if i != []:
        expl_size.append((sum(sum(i[-1] == 0))/2).item())
        print((sum(sum(i[0] == 0))/2).item())

mean(expl_size)

1.0
3.0
1.0
2.0
2.0
2.0
1.0
1.0
3.0
2.0
3.0
1.0
4.0
2.0
1.0
1.0
3.0
2.0
1.0
1.0
1.0
1.0
1.0
2.0
3.0
3.0
1.0
2.0
3.0
1.0
2.0
1.0
2.0
2.0
1.0
1.0
2.0
2.0
3.0
3.0
1.0
2.0
4.0
2.0
2.0
2.0
4.0
1.0
1.0
2.0
1.0
2.0
1.0
1.0
1.0
1.0
3.0
1.0
2.0
2.0
1.0
3.0
3.0
20.0
1.0
3.0
1.0
1.0
3.0
1.0
3.0
3.0
2.0
2.0
2.0
2.0
2.0
2.0
1.0
2.0
3.0
4.0
6.0
3.0
2.0
1.0
1.0
2.0
1.0
2.0
1.0
1.0
2.0
1.0
1.0
1.0
2.0
3.0
2.0
2.0
2.0
2.0
1.0
1.0
2.0
1.0
2.0
7.0
1.0
2.0
1.0
1.0
2.0
9.0
1.0
1.0
3.0
3.0
2.0
3.0
2.0
2.0


In [38]:
from statistics import mean
mean(expl_size)

2.1311475409836067

In [30]:
122/140

0.8714285714285714

In [39]:
labels_syn1

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [41]:
examples_all = []
train_loss_all = []
nr_cf_5 = 0

for index in test_indices_syn5: # we first do these three to test whether everything works properly!!
    # get the old prediction
    old_prediction = predictions_5[index.item()]
    
    # get the subgraph neighbourhood
    adjacency_matrix, vertex_mapping, labels_perturbed, features_perturbed = create_subgraph_neighbourhood2(index.item(), 4, labels_syn5, features_syn5, adjacency_matrix_syn5)
    
    new_index = vertex_mapping[index.item()]
    
    # test whether it gets the same outcome
    sparse_adj_test = get_sparse_adjacency_normalized(features_perturbed.shape[0], adjacency_matrix)
    with torch.no_grad():
        outputs_test = model_syn5(features_perturbed, sparse_adj_test)

    # get accuracy too (to check that it is the same as in the original)
    _, predictions_test = torch.max(outputs_test.data, 1)
    
    # as a small test:
    assert predictions_test[new_index].item() == old_prediction, "wrong prediction"
        
    # make a gcn model (to use for perturbation):
    model_pert = GCNPerturbed(layer1_W_syn5, layer1_b_syn5, layer2_W_syn5, layer2_b_syn5, layer3_W_syn5, layer3_b_syn5, lin_weight_syn5, lin_b_syn5, adjacency_matrix.shape[0])
    
    # from the model hyperparams:
    alpha = 0.1
    optim = torch.optim.SGD(model_pert.parameters(), lr=alpha)
    beta = 0.5
    k = 500
    
    # get the new cf example!
    examples_for_index, train_loss = get_cf_example(new_index, old_prediction, model_pert, optim, beta, k, adjacency_matrix, labels_perturbed, features_perturbed)
    
    # append to all examples!!
    examples_all.append(examples_for_index)
    train_loss_all.append(train_loss)
    
    if examples_for_index != []:
        nr_cf_5 = nr_cf_5 + 1

print(nr_cf_5)

175


NameError: name 'test_indices_syn' is not defined

In [42]:
expl_size = []

for i in examples_all:
    if i != []:
        expl_size.append((sum(sum(i[-1] == 0))/2).item())
        print((sum(sum(i[0] == 0))/2).item())

mean(expl_size)

1.0
2.0
2.0
2.0
2.0
1.0
1.0
1.0
2.0
1.0
2.0
1.0
1.0
2.0
1.0
2.0
1.0
2.0
2.0
2.0
2.0
2.0
2.0
2.0
2.0
2.0
2.0
1.0
2.0
1.0
2.0
1.0
2.0
2.0
2.0
2.0
2.0
1.0
2.0
1.0
1.0
1.0
1.0
2.0
2.0
1.0
2.0
2.0
1.0
1.0
1.0
1.0
2.0
3.0
1.0
1.0
1.0
1.0
2.0
1.0
1.0
1.0
1.0
2.0
1.0
2.0
2.0
2.0
2.0
1.0
1.0
2.0
2.0
1.0
2.0
2.0
1.0
2.0
2.0
1.0
1.0
1.0
2.0
2.0
2.0
3.0
1.0
1.0
1.0
2.0
1.0
2.0
1.0
1.0
3.0
1.0
1.0
2.0
2.0
2.0
2.0
2.0
2.0
2.0
1.0
2.0
1.0
1.0
2.0
2.0
1.0
2.0
2.0
3.0
1.0
1.0
1.0
1.0
1.0
1.0
3.0
2.0
1.0
1.0
2.0
1.0
1.0
2.0
1.0
1.0
2.0
1.0
2.0
1.0
2.0
1.0
2.0
2.0
1.0
1.0
2.0
2.0
2.0
1.0
2.0
1.0
1.0
1.0
2.0
2.0
1.0
2.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
2.0
1.0
1.0
2.0
2.0
1.0
1.0
2.0
1.0
2.0
1.0
1.0
2.0
1.0
1.0
1.0


1.52

In [44]:
print(nr_cf_5/ len(test_indices_syn5))

0.708502024291498
