# Mutagenecity

In [1]:
from collections import defaultdict
import pickle

import numpy as np
import torch

In [2]:
# Specify the folder containing the outputs of get_outputs.sh.
FOLDER = "../outputs/mutag/mutag-alp_0.0-1658483050"

## Data

In [3]:
with open(f"{FOLDER}/exp_dict.pkl", "rb") as file:
    exp_dict = pickle.load(file) # format: node_id: explanation_mask over the adjacency_matrix

with open(f"{FOLDER}/log.txt", "r") as file:
    log = file.readlines()

with open(f"{FOLDER}/num_dict.pkl", "rb") as file:
    num_dict = pickle.load(file) # format: node_id: #counterfactuals_found

with open(f"{FOLDER}/pred_label_dict.pkl", "rb") as file:
    pred_label_dict = pickle.load(file) # format: node_id: initial_blackbox_prediction

with open(f"{FOLDER}/pred_proba.txt", "r") as file:
    pred_proba = file.readlines()

with open(f"{FOLDER}/t_gid.pkl", "rb") as file:
    t_gid = pickle.load(file) # format: subgraph_id (same as node_id)

# ! TEMPORARY
with open("../datasets/Eval-sets/eval-set-bashapes.pkl", "rb") as file:
    eval_set = pickle.load(file)

### Constants

In [4]:
NUMBER_OF_LABELS = 2 
# ! Temporary:
# ! There should be 2 labels. But, since cfsqr is explaining label 1,
# ! there is no graph with label 0 in the set. 
# NUMBER_OF_LABELS = len(str.strip(pred_proba[0]).split())

NODES_PER_LABEL = defaultdict(int)
for node_id, label in pred_label_dict.items():
    NODES_PER_LABEL[f"label-{int(label)}"] += 1

In [5]:
print(NODES_PER_LABEL)

defaultdict(<class 'int'>, {'label-0': 32})


## Per-label Explanation size

In [6]:
per_label_explanation_size = defaultdict(list)

# iterate over the nodes
for node_id, number_of_cfs in num_dict.items():
    # find out the initial label
    label = pred_label_dict[node_id]
    # update size of corresponding label
    per_label_explanation_size[f"label-{int(label)}"].append(int(number_of_cfs))

# find mean and std
for label in range(NUMBER_OF_LABELS):
    if len(per_label_explanation_size[f"label-{int(label)}"]) == 0:
        mean, std = None, None
    else:
        mean = np.mean(per_label_explanation_size[f"label-{int(label)}"])
        std = np.std(per_label_explanation_size[f"label-{int(label)}"])
    per_label_explanation_size[f"label-{int(label)}"] = [mean, std]

In [7]:
print("Explanation size:\n")
for key, value in per_label_explanation_size.items(): # format: label: (mean, std)
    print(f"{key}: {value[0]} +- {value[1]}")

Explanation size:

label-0: 0.09375 +- 0.5219779090153146
label-1: None +- None


## Explanation size

In [8]:
mean = np.array(list(num_dict.values())).mean()
std = np.array(list(num_dict.values())).std()
explanation_size = [mean, std]

In [9]:
print(f"Explanation size: {explanation_size[0]:.2f} +- {explanation_size[1]:.2f}")

Explanation size: 0.09 +- 0.52


## Per-node fidelity

In [10]:
predictions = defaultdict(int)
for node_id, line in zip(t_gid, pred_proba):
    line = line.strip().split()
    line = [float(pred) for pred in line]
    sigmoid_proba = line[0] # assuming only one output as in case of sigmoid.
    pred = round(sigmoid_proba)
    predictions[node_id] = pred

In [11]:
labels_and_preds = defaultdict(tuple)
for node_id in t_gid:
    labels_and_preds[node_id] = (int(pred_label_dict[node_id]), predictions[node_id])

In [12]:
per_label_cf_found = defaultdict(int)
for node_id, (label, prediction) in labels_and_preds.items():
    if label != prediction:
        per_label_cf_found[f"label-{label}"] += 1

In [13]:
per_label_fidelity = dict()
for key, value in per_label_cf_found.items():
    per_label_fidelity[key] = 1 - per_label_cf_found[key]/NODES_PER_LABEL[key]

In [14]:
print(f"Fidelity:\n")
for key, value in per_label_fidelity.items():
    print(f"{key}: {value}")

Fidelity:



## Fidelity

In [15]:
cf_found = 0
for node_id, (label, prediction) in labels_and_preds.items():
    if label != prediction:
        cf_found += 1
        
fidelity = 1 - cf_found/sum(list(NODES_PER_LABEL.values()))

In [16]:
print(f"Fidelity: {fidelity}")

Fidelity: 1.0


## Rough

exp_dict
log
num_dict
pred_label_dict
pred_proba
t_gid

In [20]:
new_edge_num = len(exp_dict[201][exp_dict[201] > 0.5])

In [21]:
new_edge_num

0