# Baselines

In [41]:
import numpy as np
import os
import pandas as pd
import pickle
import sys

import torch

sys.path.append("../gnnexp")
from models import GCNSynthetic

In [42]:
DATASET = 'syn1'

## Data

In [43]:
# The original dataset
data = torch.load(f"../data/{DATASET}/eval_as_eval.pt")

In [44]:
# Extracted subadjacency matrices.
with open("../output/syn1/1657870103/original_sub_data.pkl", "rb") as file:
    sub_data = pickle.load(file)

In [45]:
sub_labels = dict()
for node in sub_data:
    new_idx = sub_data[node]['node_idx_new']
    sub_labels[node] = sub_data[node]['sub_label'][new_idx]

In [46]:
explanations = dict()
PATH = f"../explanation/{DATASET}_top6"
for filename in os.listdir(PATH):
    if 'label' not in filename:
        continue
    explanations[int(filename[4:7])] = pd.read_csv(f"{PATH}/{filename}", header=None).to_numpy()

In [47]:
print(
    sub_data[519]['org_adj'].shape,
    explanations[519].shape,
    sep='\n'
)

torch.Size([1, 15, 15])
(15, 15)


## Model

In [48]:
ckpt = torch.load(f"../data/{DATASET}/eval_as_eval.pt") #todo: Automate this: eval_as_eval and eval_as_train.
cg_dict = ckpt["cg"]
input_dim = cg_dict["feat"].shape[2] 
num_classes = cg_dict["pred"].shape[2]

In [49]:
with open(f"prog_args_{DATASET}.pkl", "rb") as file:
    prog_args = pickle.load(file)

In [50]:
model = GCNSynthetic(
    nfeat=input_dim,
    nhid=prog_args.hidden_dim,
    nout=prog_args.output_dim,
    nclass=num_classes,
    dropout=0.0,
)

model.load_state_dict(ckpt["model_state"])

feat = torch.from_numpy(cg_dict["feat"]).float()
adj = torch.from_numpy(cg_dict["adj"]).float()
label = torch.from_numpy(cg_dict["label"]).long()
model.eval()

GCNSynthetic(
  (gc1): GraphConvolution (10 -> 20)
  (gc2): GraphConvolution (20 -> 20)
  (gc3): GraphConvolution (20 -> 20)
  (lin): Linear(in_features=60, out_features=4, bias=True)
)

## Predictions

In [51]:
predictions = dict()
for node in explanations:
    sub_adj = sub_data[node]['org_adj'] - torch.Tensor(explanations[node]).unsqueeze(0)
    sub_adj = sub_adj + torch.eye(sub_adj.size(-1))
    new_idx = sub_data[node]['node_idx_new']
    pred_proba = model(
        sub_data[node]['sub_feat'],
        sub_adj
    ).squeeze(0)
    predictions[node] = int(torch.argmax(pred_proba[new_idx]))

In [52]:
predictions = torch.Tensor(list(predictions.values()))

In [53]:
labels = torch.Tensor(list(sub_labels.values()))

## Fidelity

In [64]:
print(
    f"labels: {labels.unique(return_counts=True)}",
    f"predictions: {predictions.unique(return_counts=True)}",
    sep='\n'
)

labels: (tensor([1., 2., 3.]), tensor([13, 18,  7]))
predictions: (tensor([0., 1., 3.]), tensor([12, 25,  1]))


In [71]:
fidelity = 1 - torch.sum(predictions != labels)/labels.size(0)
print(f"Fidelity: {fidelity:.2f}")

Fidelity: 0.26


## Per label fidelity

In [67]:
from collections import defaultdict

In [68]:
per_label_fidelity = defaultdict(int)
for label, pred in zip(labels, predictions):
    if label == pred:
        continue
    per_label_fidelity[int(label)] += 1

In [69]:
nodes_per_label = {
    int(key):int(val) for key, val in zip(
        labels.unique(return_counts=True)[0],
        labels.unique(return_counts=True)[1]
    )
}

In [70]:
print("Fidelity:")
for label in per_label_fidelity:
    print(f"Label-{label}")
    print(1 - per_label_fidelity[label]/nodes_per_label[label])
    print()

Fidelity:
Label-3
0.0

Label-2
0.0

Label-1
0.7692307692307692

