# Exploring
Exploring what the outputs of various scripts represent.

## Imports

In [1]:
import os
import pickle
from collections import defaultdict

import numpy as np
import pandas as pd
import torch

## Data

### Outputs of the evaluation script

In [2]:
# * Comments represent corresponding names in test_explained_adj.py
# merged_correct
merged_correct = pd.read_csv(
    "../log/syn1_top6/acc.csv",
    header = None,
    names = ["Node", "0", "1", "2", "3"]
)

# merged_odd
merged_odd = pd.read_csv(
    "../log/syn1_top6/log_odd.csv",
    header = None
)

# merged_loss
merged_loss = pd.read_csv(
    "../log/syn1_top6/loss.csv",
    header = None,
    names = ["Node", "0", "1", "2", "3"]
)

# pred_prob
pred_prob = pd.read_csv(
    "../log/syn1_top6/prob.csv",
    header = None
)

In [3]:
print(
    "Shapes:\n"
    f"merged_correct:   {merged_correct.shape}\n"
    f"merged_odd:       {merged_odd.shape}\n"
    f"merged_loss:      {merged_loss.shape}\n"
    f"pred_prob:        {pred_prob.shape}"
)

Shapes:
merged_correct:   (34, 5)
merged_odd:       (4, 34)
merged_loss:      (34, 5)
pred_prob:        (34, 17)


### Explanations

In [4]:
filenames = os.listdir("../explanation/syn1_top6")
explanation_filenames = sorted([file for file in filenames if 'label' in file])

In [5]:
explanations = dict()
for filename in explanation_filenames:
    explanation = pd.read_csv(f"../explanation/syn1_top6/{filename}", header=None).to_numpy()
    # filename[4:7] is the target node_id.
    explanations[int(filename[4:7])] = explanation

### Labels and Predictions

In [6]:
# columns to be extracted from pred_proba
base = [1,2,3,4]
proba_columns_original = [0] + [i for i in base]
proba_columns_gem = [0] + [8 + i for i in base]

In [20]:
# extracting relevant data into separate dataframes
pred_proba_original = pred_prob[proba_columns_original].copy()
pred_proba_gem = pred_prob[proba_columns_gem].copy()

# rename columns to [node_id, 0,1,2,3] in both dataframes
columns = ["node_id", 0,1,2,3]
pred_proba_original.rename(
    columns = {old : new for old, new in zip(pred_proba_original, columns)},
    inplace = True
)
pred_proba_gem.rename(
    columns = {old : new for old, new in zip(pred_proba_gem, columns)},
    inplace = True
)

In [49]:
# get predictions based on probability
predictions_original = {
    int(key):value for key, value in zip(
        pred_proba_original["node_id"],
        np.argmax(
            pred_proba_original.drop("node_id", axis=1).to_numpy(),
            axis = -1
        )
    )
}

predictions_gem = {
    int(key):value for key, value in zip(
        pred_proba_gem["node_id"],
        np.argmax(
            pred_proba_gem.drop("node_id", axis=1).to_numpy(),
            axis = -1
        )
    )
}

## Reference

In [5]:
'''
merged_loss = np.stack([
        valid_node_idxs,
        org_losses,
        extracted_losses,
        ours_losses,
        gnnexp_losses
    ], axis=1)
'''

'\nmerged_loss = np.stack([\n        valid_node_idxs,\n        org_losses,\n        extracted_losses,\n        ours_losses,\n        gnnexp_losses\n    ], axis=1)\n'

In [6]:
'''
merged_correct = np.stack([
    valid_node_idxs,
    org_corrects,
    extracted_corrects,
    ours_corrects,
    gnnexp_corrects
    ], axis=1)
'''

'\nmerged_correct = np.stack([\n    valid_node_idxs,\n    org_corrects,\n    extracted_corrects,\n    ours_corrects,\n    gnnexp_corrects\n    ], axis=1)\n'

In [7]:
'''
pred_prob += [[node_idx] + list(org_p) + list(extracted_p) + list(ours_p) + list(gnnexp_p)]
pred_prob = np.stack(pred_prob, axis=0)
'''

'\npred_prob += [[node_idx] + list(org_p) + list(extracted_p) + list(ours_p) + list(gnnexp_p)]\npred_prob = np.stack(pred_prob, axis=0)\n'

In [8]:
'''
merged_odd = np.stack([
        our_ground_truth_odd,
        gnnexp_ground_truth_odd,
        our_pred_label_odd,
        gnnexp_pred_label_odd
    ])
'''

'\nmerged_odd = np.stack([\n        our_ground_truth_odd,\n        gnnexp_ground_truth_odd,\n        our_pred_label_odd,\n        gnnexp_pred_label_odd\n    ])\n'

In [9]:
'''
our_ground_truth_odd = []
our_ground_truth_odd += [log_odd(org_p[node_label]) - log_odd(ours_p[node_label])]

our_pred_label_odd = []
our_pred_label_odd += [log_odd(org_p[org_pred]) - log_odd(ours_p[org_pred])]
'''

'\nour_ground_truth_odd = []\nour_ground_truth_odd += [log_odd(org_p[node_label]) - log_odd(ours_p[node_label])]\n\nour_pred_label_odd = []\nour_pred_label_odd += [log_odd(org_p[org_pred]) - log_odd(ours_p[org_pred])]\n'

In [10]:
'''
org_pred, org_p = evaluate_adj(
            node_idx_new,
            sub_feat,
            org_adj,
            sub_label,
            org_losses,
            org_corrects
        )
'''

'\norg_pred, org_p = evaluate_adj(\n            node_idx_new,\n            sub_feat,\n            org_adj,\n            sub_label,\n            org_losses,\n            org_corrects\n        )\n'

In [11]:
'''
ours_pred, ours_p = evaluate_adj(
            node_idx_new,
            sub_feat,
            ours_adj,
            sub_label,
            ours_losses,
            ours_corrects
        )
'''

'\nours_pred, ours_p = evaluate_adj(\n            node_idx_new,\n            sub_feat,\n            ours_adj,\n            sub_label,\n            ours_losses,\n            ours_corrects\n        )\n'

## Explanation size

In [22]:
explanation_size = list()
for explanation in explanations.values():
    explanation_size.append(np.sum(explanation - np.eye(explanation.shape[0])) / 2)
explanation_size = f"{np.mean(explanation_size):.2f} +- {np.std(explanation_size):.2f}"

In [23]:
print(f"Explanation size: {explanation_size}")

Explanation size: 5.97 +- 0.17


## Per label Explanation size

In [59]:
per_label_explanation_size = defaultdict(list)
for node_id, explanation in explanations.items():
    label = predictions_original[node_id]
    explanation_size = np.sum(explanation - np.eye(explanation.shape[0])) / 2
    per_label_explanation_size[f"label{label}"].append(explanation_size)

for label in range(4):
    if len(per_label_explanation_size[f"label{label}"]) == 0:
        per_label_explanation_size[f"label{label}"] = "NA"
    else:    
        per_label_explanation_size[f"label{label}"] = (
            f"{np.mean(per_label_explanation_size[f'label{label}']):.2f}"
            f" +- {np.std(per_label_explanation_size[f'label{label}']):.2f}"
        )

In [60]:
per_label_explanation_size

defaultdict(list,
            {'label1': '5.94 +- 0.23',
             'label2': '6.00 +- 0.00',
             'label3': '6.00 +- 0.00',
             'label0': 'NA'})

## Fidelity

In [8]:
# find diffrences between predictions
differences = sum(predictions_original != predictions_gem)
fidelity = differences/predictions_gem.shape[0]

In [10]:
print(f"Fidelity: {fidelity:.2f}")

Fidelity: 0.06


## Per Label Fidelity

In [29]:
per_label_fidelity = defaultdict(list)

for (__, proba_original), (__, proba_gem) in \
        zip(pred_proba_original.iterrows(), pred_proba_gem.iterrows()):
    prediction_original = np.argmax(proba_original)
    prediction_gem = np.argmax(proba_gem)
    if prediction_original != prediction_gem:
        per_label_fidelity[f"label{prediction_original}"].append(1)
    else:
        per_label_fidelity[f"label{prediction_original}"].append(0)

for label in range(4):
    if len(per_label_fidelity[f"label{label}"]) == 0:
        per_label_fidelity[f"label{label}"] = f"NA"
    else:    
        per_label_fidelity[f"label{label}"] = f"{np.mean(per_label_fidelity[f'label{label}']):.2f}"

In [33]:
print("Per label fidelity:")
print(per_label_fidelity)

Per label fidelity:
defaultdict(<class 'list'>, {'label1': '0.11', 'label2': '0.00', 'label3': '0.00', 'label0': 'NA'})
