# Baselines

In [23]:
from collections import defaultdict
import os
import pickle
import sys

import pandas as pd
import torch

sys.path.append("../gnnexp")
from models import GCNSynthetic

In [2]:
DATASET = 'syn1'
EVAL = 'eval'

## Data

In [4]:
# Extracted subadjacency matrices.
if DATASET == 'syn1':
    with open("../output/syn1/1657970490/original_sub_data.pkl", "rb") as file:
        sub_data = pickle.load(file)
elif DATASET == 'syn4':
    with open("../output/syn4/1657890667/original_sub_data.pkl", "rb") as file:
        sub_data = pickle.load(file)

In [5]:
sub_labels = dict()
for node in sub_data:
    new_idx = sub_data[node]['node_idx_new']
    sub_labels[node] = int(sub_data[node]['sub_label'][new_idx])

In [6]:
explanations = dict()
PATH = f"../explanation/{DATASET}_top6"
for filename in os.listdir(PATH):
    if 'label' not in filename:
        continue
    explanations[int(filename[4:7])] = pd.read_csv(f"{PATH}/{filename}", header=None).to_numpy()

## Model

In [13]:
ckpt = torch.load(f"../data/{DATASET}/eval_as_{EVAL}.pt")
cg_dict = ckpt["cg"]
input_dim = cg_dict["feat"].shape[2] 
num_classes = cg_dict["pred"].shape[2]
feat = torch.from_numpy(cg_dict["feat"]).float()
adj = torch.from_numpy(cg_dict["adj"]).float()
label = torch.from_numpy(cg_dict["label"]).long()

In [14]:
with open(f"prog_args_{DATASET}.pkl", "rb") as file:
    prog_args = pickle.load(file)

In [15]:
model = GCNSynthetic(
    nfeat=input_dim,
    nhid=prog_args.hidden_dim,
    nout=prog_args.output_dim,
    nclass=num_classes,
    dropout=0.0,
)
model.load_state_dict(ckpt["model_state"])
model.eval()

GCNSynthetic(
  (gc1): GraphConvolution (10 -> 20)
  (gc2): GraphConvolution (20 -> 20)
  (gc3): GraphConvolution (20 -> 20)
  (lin): Linear(in_features=60, out_features=4, bias=True)
)

## Predictions

In [16]:
predictions = dict()
for node in explanations:
    sub_adj = sub_data[node]['org_adj'] - torch.Tensor(explanations[node]).unsqueeze(0)
    new_idx = sub_data[node]['node_idx_new']
    pred_proba = model(
        sub_data[node]['sub_feat'],
        sub_adj
    ).squeeze(0)
    predictions[node] = int(torch.argmax(pred_proba[new_idx]))

## Fidelity

In [36]:
misclassifications = 0
for node in predictions:
    if predictions[node] != sub_labels[node]:
        misclassifications += 1
fidelity = 1 - misclassifications/len(predictions)
print(f"Fidelity: {fidelity:.2f}")

Fidelity: 0.00


## Per label fidelity

In [27]:
per_label_mismatches = defaultdict(int)
for node in predictions:
    label = sub_labels[node]
    if predictions[node] != label:
        per_label_mismatches[int(label)] += 1

In [34]:
labels, label_counts = torch.Tensor(list(sub_labels.values())).unique(return_counts=True)
nodes_per_label = {
    int(key):int(val) for key, val in zip(labels, label_counts)
}

In [37]:
print("Fidelity:")
for label in per_label_mismatches:
    print(f"Label-{label}", end=": ")
    print(f"{1 - per_label_mismatches[label]/nodes_per_label[label]:.2f}")
    print()

Fidelity:
Label-3: 0.00

Label-1: 0.00

Label-2: 0.00

