In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import seaborn as sns
from matplotlib import axes
import pandas as pd
import os
from os.path import join
import json

In [2]:

import sys, os
sys.path.append('/cluster/home/kamara/Explain')
from clutils.nbutils import *
os.getcwd()

'/cluster/home/kamara/Explain/notebooks'

In [3]:
def parseLogs(logfile, kw="rawlogs:", **kwargs):
    logs = {}
    dicts = []
    with open(logfile) as f:
        for i_line, line in enumerate(f):
            pos = line.find(kw)
            if pos >= 0:
                pos += len(kw)
                pos_tensor = line.find("tensor(")
                while pos_tensor >= 0:
                    comma_end = line.find(",", pos_tensor)
                    tensor_end = line.find(")", pos_tensor)
                    line_new = line[:pos_tensor] + line[pos_tensor + len("tensor("):comma_end] + line[tensor_end+1:]
                    print("LINE", line)
                    print("NEW", line_new)
                    if len(line_new) >= len(line):
                        print("No line reduction")
                        break
                    line = line_new
                    pos_tensor = line.find("tensor(")
                    
                #print(ast.literal_eval(line[pos:]))
                #line[pos:] = line[pos:].replace("'", "")
                kline = line[pos:]
                kline = kline.replace("true", "True")
                try:
                    dic = ast.literal_eval(kline)
                except:
                    # print("Encountering weird patterns in logs")
                    # print("Line number %d" % i_line)
                    # print(line)
                    line = line.replace("nan,", "-1e8,")
                    line = line.replace("NaN", "-1e8")
                    try:
                        dic = ast.literal_eval(line[pos:])
                    except:
                        print("Unable to replace NaNs")
                        print(line)
                        continue
                for k in dic.keys():
                    if k not in logs:
                        logs[k] = []
                    logs[k].append(dic[k])
                dicts.append(dic)

    df = pd.DataFrame(dicts)

    return logs, df


In [4]:
def get_info(logdir):
    infos, fidelity, initial_mask_infos, transformed_mask_infos = {}, {}, {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__infos:')
            _, initial_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__initial_edge_mask_infos:')
            _, transformed_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__transformed_mask_infos:')
            _, fidelity[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__fidelity:')             
    return(infos, initial_mask_infos, transformed_mask_infos, fidelity)


def get_gnn_info(logdir):
    gnn_train, gnn_test = {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, gnn_train[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__gnn_train_scores: ')
            _, gnn_test[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__gnn_test_scores: ')
    return(gnn_train, gnn_test)


def get_param_ranges(jsonpath, dataset = ""):
    f = open(jsonpath,)
    data = json.load(f)
    ranges = data['params']
    ranges['none'] = ''
    return(ranges)   

In [5]:
def get_df_results(logs, ranges, name, metrics, selection = 'last'):
    dicts = []
    for params_set in enumerateParams(ranges):
        key = name.format(**params_set)
        # if key in logs and len(logs[key]) >= 1 and metric in logs[key]:
        any_metric = (key in logs) and (len(logs[key]) >= 1) and any([metric in logs[key] for metric in metrics])
        if any_metric:
            if selection == 'all':
                for index, row in logs[key].iterrows():
                    metrics_dict = {
                        metric: select_value(row.to_frame().T, metric, 'last') if key in logs and len(logs[key]) >= 1 and metric in logs[key] else -1
                        for metric in metrics
                    }
                    dicts.append(dictmerge(params_set, metrics_dict))
                    
            else:
                metrics_dict = {
                    metric: select_value(logs[key], metric, selection) if key in logs and len(logs[key]) >= 1 and metric in logs[key] else -1
                    for metric in metrics
                }
                dicts.append(dictmerge(params_set, metrics_dict))

    df = pd.DataFrame(dicts)
    #df = df.drop(columns=['none'])
    return df

## Node Classification - Real world

### Sparsity

In [25]:
logdir = f'/cluster/scratch/kamara/checkpoints/node_classification/real/sparsity_real_avg10expe/logs'
jsonpath = f'/cluster/home/kamara/Explain/configs/config_real_sparsity_avg10expe.json'
infos, initial_mask_infos, transformed_mask_infos, fidelity = get_info(logdir)
gnn_train, gnn_test = get_gnn_info(logdir)

print(list(fidelity.keys())[1])
ranges = get_param_ranges(jsonpath)

_explainer_name=random_sparsity=0_dataset=pubmed_true_label_as_target=True_hard_mask=True_seed=7


In [27]:
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'
name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}_seed={seed}'
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}'


### GNN scores

In [28]:
df_gnn_train = get_df_results(gnn_train, ranges, name, metrics = ["prec", "recall", "f1_score", "acc"])
df_gnn_test = get_df_results(gnn_test, ranges, name, metrics = ["prec", "recall", "f1_score", "acc"])

df_gnn_train = df_gnn_train.groupby(by=['dataset']).mean().reset_index()
df_gnn_test = df_gnn_test.groupby(by=['dataset']).mean().reset_index()

gnn_scores = pd.merge(df_gnn_train, df_gnn_test, on=['dataset', 'sparsity', 'num_test'], suffixes=['_train', '_test'])
gnn_scores = gnn_scores.drop(columns=["sparsity", "num_test", "seed_train", "seed_test"])
gnn_scores

Unnamed: 0,dataset,prec_train,recall_train,acc_train,prec_test,recall_test,acc_test
0,actor,0.594775,0.480935,0.538197,0.260749,0.249035,0.285942
1,chameleon,0.817928,0.806324,0.80641,0.646635,0.634489,0.632237
2,citeseer,1.0,1.0,1.0,0.651307,0.652012,0.6764
3,cora,1.0,1.0,1.0,0.781336,0.817381,0.803496
4,cornell,0.92717,0.908066,0.974694,0.354803,0.338859,0.532487
5,facebook,0.931203,0.926196,0.933334,0.923301,0.917619,0.926356
6,pubmed,1.0,1.0,1.0,0.771646,0.781766,0.7785
7,texas,0.972625,0.966848,0.991954,0.296897,0.277057,0.510811
8,wisconsin,0.980174,0.955045,0.975833,0.397882,0.406152,0.535294


In [29]:
#gnn_scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_gnn_scores.csv', index=False)

### Explainability methods scores

In [30]:
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
df_tranformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-'])

In [31]:
df_mask_infos = pd.merge(df_initial_mask_infos, df_tranformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
scores = df_fid
scores = pd.merge(scores, df_mask_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
scores
#scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_all.csv', index=False)


Unnamed: 0,none_x,explainer_name,sparsity,dataset,true_label_as_target,hard_mask,explain_graph_x,num_test,seed,data_save_dir,...,explain_graph_transf,mask_size_transf,mask_entropy_transf,max_avg_transf,none_y,explain_graph_y,time,number_of_edges,mask_sparsity_init,non_zero_values_init
0,,random,0.00,cora,True,True,False,100,0,data,...,False,13249.92,9.299591,0.428312,,False,0.0002,13264,0.001062,13249.92
1,,random,0.00,cora,True,True,False,100,1,data,...,False,13250.48,9.299754,0.490545,,False,0.0002,13264,0.001019,13250.48
2,,random,0.00,cora,True,True,False,100,2,data,...,False,13250.34,9.299545,0.498040,,False,0.0002,13264,0.001030,13250.34
3,,random,0.00,cora,True,True,False,100,3,data,...,False,13249.36,9.299762,0.489739,,False,0.0002,13264,0.001104,13249.36
4,,random,0.00,cora,True,True,False,100,4,data,...,False,13249.54,9.299681,0.470068,,False,0.0002,13264,0.001090,13249.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12327,,pgmexplainer,0.99,facebook,False,True,False,100,1,data,...,False,2768.48,7.475550,0.516261,,False,89.3687,364116,0.944714,20130.39
12328,,pgmexplainer,0.99,facebook,False,True,False,100,2,data,...,False,2392.10,7.094828,0.522513,,False,87.7417,364116,0.948680,18686.40
12329,,pgmexplainer,0.99,facebook,False,True,False,100,3,data,...,False,2672.38,7.429812,0.492495,,False,91.9752,364116,0.957629,15427.86
12330,,pgmexplainer,0.99,facebook,False,True,False,100,4,data,...,False,2674.88,7.356349,0.536693,,False,87.2857,364116,0.951040,17826.97


In [32]:
res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'sparsity', 'explainer_name']).mean()
res = res.drop(columns=['seed'])
#res[['mask_size', "mask_entropy", "max_avg"]]
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,num_test,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,mask_sparsity,...,mask_size_init,mask_entropy_init,max_avg_init,mask_size_transf,mask_entropy_transf,max_avg_transf,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,true_label_as_target,hard_mask,sparsity,explainer_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
actor,False,False,0.000,basic_gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.722,0.312,0.150069,0.088249,0.970945,...,1769.996,5.684779,0.713987,1769.996,5.684779,0.713987,3.79272,60918.0,0.970945,1769.996
actor,False,False,0.000,distance,100.0,-1.000,-1.000,-1.000000,-1.000000,0.820,0.687,0.182060,0.151183,0.000080,...,60913.117,10.958437,0.117579,60913.117,10.958437,0.117579,0.74688,60918.0,0.000080,60913.117
actor,False,False,0.000,gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.836,0.753,0.178340,0.175748,0.970945,...,1769.996,5.007442,0.062815,1769.996,5.007442,0.062815,3.76496,60918.0,0.970945,1769.996
actor,False,False,0.000,gradcam,100.0,-1.000,-1.000,-1.000000,-1.000000,0.770,0.693,0.174446,0.152992,0.419769,...,35346.504,8.168174,0.116363,35346.504,8.168174,0.116363,0.00529,60918.0,0.419769,35346.504
actor,False,False,0.000,ig,100.0,-1.000,-1.000,-1.000000,-1.000000,0.828,0.640,0.180331,0.140814,0.030613,...,59053.126,10.742004,0.185476,59053.126,10.742004,0.185476,0.25437,60918.0,0.030613,59053.126
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wisconsin,True,True,0.997,gnnexplainer,100.0,0.666,0.458,0.545173,0.308689,-1.000,-1.000,-1.000000,-1.000000,0.997393,...,287.783,3.921350,0.066819,3.000,0.774409,0.659021,3.83499,1151.0,0.749971,287.783
wisconsin,True,True,0.997,ig,100.0,0.366,0.063,0.246022,-0.068371,-1.000,-1.000,-1.000000,-1.000000,0.997393,...,1120.830,6.850792,0.210837,3.000,1.079416,0.719721,0.13043,1151.0,0.026212,1120.830
wisconsin,True,True,0.997,occlusion,100.0,0.667,0.205,0.545069,0.080260,-1.000,-1.000,-1.000000,-1.000000,0.997393,...,287.783,5.029174,0.727468,3.000,1.086211,0.795199,0.35526,1151.0,0.749971,287.783
wisconsin,True,True,0.997,pagerank,100.0,0.290,0.077,0.171710,-0.056536,-1.000,-1.000,-1.000000,-1.000000,0.997393,...,1087.551,5.931450,0.001056,3.000,1.088098,0.758350,0.02136,1151.0,0.055125,1087.551


In [33]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_avg10expe.csv', index=False)

## Topk

In [19]:
logdir = f'/cluster/scratch/kamara/checkpoints/node_classification/real/topk/topk_real_1/logs'
jsonpath = f'/cluster/home/kamara/Explain/configs/real/topk/config_real_topk_1expe.json'
infos, initial_mask_infos, transformed_mask_infos, fidelity = get_info(logdir)
gnn_train, gnn_test = get_gnn_info(logdir)

print(list(fidelity.keys())[1])
ranges = get_param_ranges(jsonpath)

_explainer_name=pagerank_dataset=wisconsin_true_label_as_target=False_hard_mask=True


In [20]:
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'
name = '{none}_explainer_name={explainer_name}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'


In [21]:
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges"])
df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
df_tranformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg", "topk"], selection="all")
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-', 'topk'], selection="all")

In [22]:
df_mask_infos = pd.merge(df_initial_mask_infos, df_tranformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores = df_fid
scores = pd.merge(scores, df_mask_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "topk"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores
#scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_all.csv', index=False)


Unnamed: 0,none_x,explainer_name,dataset,topk_list_x,true_label_as_target,hard_mask,explain_graph_x,num_test,seed,data_save_dir,...,topk_list_transf,explain_graph_transf,mask_size_transf,mask_entropy_transf,max_avg_transf,none_y,topk_list_y,explain_graph_y,time,number_of_edges
0,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,1.00,0.000000,1.000000,,151015202550100,False,0.0002,13264
1,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,5.00,1.609438,0.999720,,151015202550100,False,0.0002,13264
2,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,10.00,2.302585,0.999468,,151015202550100,False,0.0002,13264
3,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,15.00,2.708050,0.999279,,151015202550100,False,0.0002,13264
4,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,20.00,2.995732,0.999034,,151015202550100,False,0.0002,13264
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2464,,pgmexplainer,wisconsin,151015202550100,False,False,False,100,0,data,...,151015202550100,False,15.00,2.694923,0.836447,,151015202550100,False,2.4069,1151
2465,,pgmexplainer,wisconsin,151015202550100,False,False,False,100,0,data,...,151015202550100,False,19.98,2.979302,0.810814,,151015202550100,False,2.4069,1151
2466,,pgmexplainer,wisconsin,151015202550100,False,False,False,100,0,data,...,151015202550100,False,24.82,3.193790,0.798190,,151015202550100,False,2.4069,1151
2467,,pgmexplainer,wisconsin,151015202550100,False,False,False,100,0,data,...,151015202550100,False,48.76,3.847085,0.689827,,151015202550100,False,2.4069,1151


In [23]:
res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'topk', 'explainer_name']).mean()
res = res.drop(columns=['seed'])
#res[['mask_size', "mask_entropy", "max_avg"]]
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,num_test,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,mask_size_init,mask_entropy_init,max_avg_init,mask_size_transf,mask_entropy_transf,max_avg_transf,time,number_of_edges
dataset,true_label_as_target,hard_mask,topk,explainer_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
actor,False,False,1.0,basic_gnnexplainer,100.0,-1.00,-1.00,-1.000000,-1.000000,0.00,0.42,0.002503,0.113221,1208.43,5.300966,0.696889,1.00,0.000000,1.000000,3.8270,60918.0
actor,False,False,1.0,distance,100.0,-1.00,-1.00,-1.000000,-1.000000,0.07,0.39,0.016935,0.106466,60913.74,10.960461,0.110511,1.00,0.000000,1.000000,0.8971,60918.0
actor,False,False,1.0,gnnexplainer,100.0,-1.00,-1.00,-1.000000,-1.000000,0.43,0.41,0.074586,0.098053,1208.43,4.690754,0.051125,1.00,0.000000,1.000000,7.4523,60918.0
actor,False,False,1.0,gradcam,100.0,-1.00,-1.00,-1.000000,-1.000000,0.16,0.38,0.037296,0.082071,39772.57,8.478164,0.149737,1.00,0.000000,1.000000,0.0079,60918.0
actor,False,False,1.0,occlusion,100.0,-1.00,-1.00,-1.000000,-1.000000,0.38,0.39,0.062185,0.083376,1208.25,5.610678,0.479297,1.00,0.000000,1.000000,3.3738,60918.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wisconsin,True,True,100.0,occlusion,100.0,0.67,0.13,0.133390,-0.034419,-1.00,-1.00,-1.000000,-1.000000,290.10,4.690044,0.418526,78.65,3.744467,0.420155,0.6651,1151.0
wisconsin,True,True,100.0,pagerank,100.0,0.70,0.09,0.117450,-0.001679,-1.00,-1.00,-1.000000,-1.000000,1082.04,5.922828,0.001028,100.00,4.446748,0.206235,0.0262,1151.0
wisconsin,True,True,100.0,pgmexplainer,100.0,0.63,0.13,0.108370,-0.005719,-1.00,-1.00,-1.000000,-1.000000,517.66,5.756787,0.298066,95.11,4.465948,0.550145,2.5542,1151.0
wisconsin,True,True,100.0,random,100.0,0.20,0.55,-0.008910,0.088731,-1.00,-1.00,-1.000000,-1.000000,1148.96,6.853682,0.490891,100.00,4.604816,0.943692,0.0001,1151.0


In [24]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/topk/nc_real_topk_1expe.csv', index=False)

## Node Classification - Real world - 10 seeds

In [6]:
datasets = [
      "cora",
      "pubmed",
      "citeseer",
      "cornell",
      "texas",
      "wisconsin",
      "actor",
      "chameleon",
    "squirrel",
    "facebook"
    ]

### Sparsity

In [7]:
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}_seed={seed}'
name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}_seed={seed}'
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}'


In [8]:
SCORES = []
for data in datasets:

    logdir = f'/cluster/home/kamara/checkpoints/node_classification/real/sparsity_real_avg10expe_{data}/logs'
    jsonpath = f'/cluster/home/kamara/Explain/configs/config_real_sparsity_avg10expe_{data}.json'
    ranges = get_param_ranges(jsonpath)
    infos, initial_mask_infos, transformed_mask_infos, fidelity = get_info(logdir)
    
    df_infos = get_df_results(infos, ranges, name, metrics = ['dataset', 'time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
    print(df_infos)
    df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
    df_tranformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
    df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-',
                                                           'mask_sparsity', 'expl_edges'])
    
    df_mask_infos = pd.merge(df_initial_mask_infos, df_tranformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
    scores = df_fid
    scores = pd.merge(scores, df_mask_infos, on=["explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
    scores = pd.merge(scores, df_infos, on=["explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
    res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'sparsity', 'explainer_name']).mean()
    res = res.drop(columns=['seed'])
    print('res', res)

    SCORES.append(res)
    print(SCORES)
                                                                                                                     


     none explainer_name  sparsity dataset true_label_as_target hard_mask  \
0                 random     0.000    cora                 True      True   
1                 random     0.000    cora                 True      True   
2                 random     0.000    cora                 True      True   
3                 random     0.000    cora                 True      True   
4                 random     0.000    cora                 True      True   
...   ...            ...       ...     ...                  ...       ...   
1595        pgmexplainer     0.997    cora                False     False   
1596        pgmexplainer     0.997    cora                False     False   
1597        pgmexplainer     0.997    cora                False     False   
1598        pgmexplainer     0.997    cora                False     False   
1599        pgmexplainer     0.997    cora                False     False   

     explain_graph  num_test  seed data_save_dir     time  number_of_edges 

     none explainer_name  sparsity dataset true_label_as_target hard_mask  \
0                 random     0.000  pubmed                 True      True   
1                 random     0.000  pubmed                 True      True   
2                 random     0.000  pubmed                 True      True   
3                 random     0.000  pubmed                 True      True   
4                 random     0.000  pubmed                 True      True   
...   ...            ...       ...     ...                  ...       ...   
1595        pgmexplainer     0.997  pubmed                False     False   
1596        pgmexplainer     0.997  pubmed                False     False   
1597        pgmexplainer     0.997  pubmed                False     False   
1598        pgmexplainer     0.997  pubmed                False     False   
1599        pgmexplainer     0.997  pubmed                False     False   

     explain_graph  num_test  seed data_save_dir     time  number_of_edges 

     none explainer_name  sparsity   dataset true_label_as_target hard_mask  \
0                 random     0.000  citeseer                 True      True   
1                 random     0.000  citeseer                 True      True   
2                 random     0.000  citeseer                 True      True   
3                 random     0.000  citeseer                 True      True   
4                 random     0.000  citeseer                 True      True   
...   ...            ...       ...       ...                  ...       ...   
1593        pgmexplainer     0.997  citeseer                False     False   
1594        pgmexplainer     0.997  citeseer                False     False   
1595        pgmexplainer     0.997  citeseer                False     False   
1596        pgmexplainer     0.997  citeseer                False     False   
1597        pgmexplainer     0.997  citeseer                False     False   

     explain_graph  num_test  seed data_save_dir   

     none explainer_name  sparsity  dataset true_label_as_target hard_mask  \
0                 random     0.000  cornell                 True      True   
1                 random     0.000  cornell                 True      True   
2                 random     0.000  cornell                 True      True   
3                 random     0.000  cornell                 True      True   
4                 random     0.000  cornell                 True      True   
...   ...            ...       ...      ...                  ...       ...   
1595        pgmexplainer     0.997  cornell                False     False   
1596        pgmexplainer     0.997  cornell                False     False   
1597        pgmexplainer     0.997  cornell                False     False   
1598        pgmexplainer     0.997  cornell                False     False   
1599        pgmexplainer     0.997  cornell                False     False   

     explain_graph  num_test  seed data_save_dir    time  numbe

     none explainer_name  sparsity dataset true_label_as_target hard_mask  \
0                 random     0.000   texas                 True      True   
1                 random     0.000   texas                 True      True   
2                 random     0.000   texas                 True      True   
3                 random     0.000   texas                 True      True   
4                 random     0.000   texas                 True      True   
...   ...            ...       ...     ...                  ...       ...   
1595        pgmexplainer     0.997   texas                False     False   
1596        pgmexplainer     0.997   texas                False     False   
1597        pgmexplainer     0.997   texas                False     False   
1598        pgmexplainer     0.997   texas                False     False   
1599        pgmexplainer     0.997   texas                False     False   

     explain_graph  num_test  seed data_save_dir    time  number_of_edges  

     none explainer_name  sparsity    dataset true_label_as_target hard_mask  \
0                 random     0.000  wisconsin                 True      True   
1                 random     0.000  wisconsin                 True      True   
2                 random     0.000  wisconsin                 True      True   
3                 random     0.000  wisconsin                 True      True   
4                 random     0.000  wisconsin                 True      True   
...   ...            ...       ...        ...                  ...       ...   
1591        pgmexplainer     0.997  wisconsin                False     False   
1592        pgmexplainer     0.997  wisconsin                False     False   
1593        pgmexplainer     0.997  wisconsin                False     False   
1594        pgmexplainer     0.997  wisconsin                False     False   
1595        pgmexplainer     0.997  wisconsin                False     False   

     explain_graph  num_test  seed data

     none explainer_name  sparsity dataset true_label_as_target hard_mask  \
0                 random     0.000   actor                 True      True   
1                 random     0.000   actor                 True      True   
2                 random     0.000   actor                 True      True   
3                 random     0.000   actor                 True      True   
4                 random     0.000   actor                 True      True   
...   ...            ...       ...     ...                  ...       ...   
1595        pgmexplainer     0.997   actor                False     False   
1596        pgmexplainer     0.997   actor                False     False   
1597        pgmexplainer     0.997   actor                False     False   
1598        pgmexplainer     0.997   actor                False     False   
1599        pgmexplainer     0.997   actor                False     False   

     explain_graph  num_test  seed data_save_dir     time  number_of_edges 

     none explainer_name  sparsity    dataset true_label_as_target hard_mask  \
0                 random     0.000  chameleon                 True      True   
1                 random     0.000  chameleon                 True      True   
2                 random     0.000  chameleon                 True      True   
3                 random     0.000  chameleon                 True      True   
4                 random     0.000  chameleon                 True      True   
...   ...            ...       ...        ...                  ...       ...   
1595        pgmexplainer     0.997  chameleon                False     False   
1596        pgmexplainer     0.997  chameleon                False     False   
1597        pgmexplainer     0.997  chameleon                False     False   
1598        pgmexplainer     0.997  chameleon                False     False   
1599        pgmexplainer     0.997  chameleon                False     False   

     explain_graph  num_test  seed data

     none explainer_name  sparsity   dataset true_label_as_target hard_mask  \
0                 random     0.000  squirrel                 True      True   
1                 random     0.000  squirrel                 True      True   
2                 random     0.000  squirrel                 True      True   
3                 random     0.000  squirrel                 True      True   
4                 random     0.000  squirrel                 True      True   
...   ...            ...       ...       ...                  ...       ...   
1433        pgmexplainer     0.997  squirrel                False     False   
1434        pgmexplainer     0.997  squirrel                False     False   
1435        pgmexplainer     0.997  squirrel                False     False   
1436        pgmexplainer     0.997  squirrel                False     False   
1437        pgmexplainer     0.997  squirrel                False     False   

     explain_graph  num_test  seed data_save_dir   

     none explainer_name  sparsity   dataset true_label_as_target hard_mask  \
0                 random     0.000  facebook                 True      True   
1                 random     0.000  facebook                 True      True   
2                 random     0.000  facebook                 True      True   
3                 random     0.000  facebook                 True      True   
4                 random     0.000  facebook                 True      True   
...   ...            ...       ...       ...                  ...       ...   
1595        pgmexplainer     0.997  facebook                False     False   
1596        pgmexplainer     0.997  facebook                False     False   
1597        pgmexplainer     0.997  facebook                False     False   
1598        pgmexplainer     0.997  facebook                False     False   
1599        pgmexplainer     0.997  facebook                False     False   

     explain_graph  num_test  seed data_save_dir   

In [9]:
avg_scores = pd.concat(SCORES)
avg_scores

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,num_test,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,mask_sparsity,...,mask_size_init,mask_entropy_init,max_avg_init,mask_size_transf,mask_entropy_transf,max_avg_transf,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,true_label_as_target,hard_mask,sparsity,explainer_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
cora,False,False,0.000,basic_gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.376,0.379,0.384231,0.413214,0.987620,...,164.210,3.292074,0.368996,164.210,3.292074,0.368996,3.63883,13264.0,0.987620,164.210
cora,False,False,0.000,distance,100.0,-1.000,-1.000,-1.000000,-1.000000,0.162,0.068,0.539109,0.311883,0.122430,...,11640.092,8.849912,0.169638,11640.092,8.849912,0.169638,0.28118,13264.0,0.122430,11640.092
cora,False,False,0.000,gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.493,0.373,0.541627,0.601882,0.987620,...,164.208,2.920812,0.141127,164.208,2.920812,0.141127,3.62851,13264.0,0.987620,164.208
cora,False,False,0.000,gradcam,100.0,-1.000,-1.000,-1.000000,-1.000000,0.215,0.036,0.498248,0.365465,0.761656,...,3161.393,4.747347,0.135885,3161.393,4.747347,0.135885,0.00431,13264.0,0.761656,3161.393
cora,False,False,0.000,ig,100.0,-1.000,-1.000,-1.000000,-1.000000,0.337,0.031,0.556903,0.264937,0.251848,...,9923.491,7.807732,0.146000,9923.491,7.807732,0.146000,0.15615,13264.0,0.251848,9923.491
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
facebook,True,True,0.997,occlusion,100.0,0.862,0.012,0.751350,-0.059571,-1.000,-1.000,-1.000000,-1.000000,0.998051,...,7832.505,7.007714,0.889634,709.722,6.057597,0.889796,21.22537,364116.0,0.978489,7832.505
facebook,True,True,0.997,pagerank,100.0,0.786,0.044,0.685483,-0.027353,-1.000,-1.000,-1.000000,-1.000000,0.997045,...,23741.544,8.251815,0.001836,1076.078,6.233838,0.027866,2.46246,364116.0,0.934797,23741.544
facebook,True,True,0.997,pgmexplainer,100.0,0.550,0.040,0.454271,-0.032108,-1.000,-1.000,-1.000000,-1.000000,0.997471,...,18670.273,8.227726,0.371260,920.693,6.582991,0.643379,88.90563,364116.0,0.948724,18670.273
facebook,True,True,0.997,random,100.0,0.072,0.785,0.002038,0.683861,-1.000,-1.000,-1.000000,-1.000000,0.997001,...,363750.047,12.612083,0.503269,1092.000,6.995766,0.998317,0.00279,364116.0,0.001005,363750.047


In [10]:
avg_scores = avg_scores.reset_index()
avg_scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_avg10expe.csv', index=False)

### GNN scores

In [11]:
GNN_SCORES = []
for data in datasets:

    logdir = f'/cluster/home/kamara/checkpoints/node_classification/real/sparsity_real_avg10expe_{data}/logs'
    jsonpath = f'/cluster/home/kamara/Explain/configs/config_real_sparsity_avg10expe_{data}.json'
    ranges = get_param_ranges(jsonpath)
    gnn_train, gnn_test = get_gnn_info(logdir)
    
    df_gnn_train = get_df_results(gnn_train, ranges, name, metrics = ["prec", "recall", "acc"])
    df_gnn_test = get_df_results(gnn_test, ranges, name, metrics = ["prec", "recall", "acc"])
    df_gnn_train['f1_score'] = 2*df_gnn_train['recall']*df_gnn_train['prec']/(df_gnn_train['recall']+df_gnn_train['prec'])
    df_gnn_test['f1_score'] = 2*df_gnn_test['recall']*df_gnn_test['prec']/(df_gnn_test['recall']+df_gnn_test['prec'])

    df_gnn_train = df_gnn_train.groupby(by=['dataset']).mean().reset_index()
    df_gnn_test = df_gnn_test.groupby(by=['dataset']).mean().reset_index()

    gnn_scores = pd.merge(df_gnn_train, df_gnn_test, on=['dataset', 'sparsity', 'num_test'], suffixes=['_train', '_test'])
    gnn_scores = gnn_scores.drop(columns=["sparsity", "num_test", "seed_train", "seed_test"])
    print('res', gnn_scores)

    GNN_SCORES.append(gnn_scores)
    print(GNN_SCORES)

res   dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0    cora         1.0           1.0        1.0             1.0   0.781341   

   recall_test  acc_test  f1_score_test  
0     0.817384    0.8035       0.798947  
[  dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0    cora         1.0           1.0        1.0             1.0   0.781341   

   recall_test  acc_test  f1_score_test  
0     0.817384    0.8035       0.798947  ]
res   dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0  pubmed         1.0           1.0        1.0             1.0   0.771646   

   recall_test  acc_test  f1_score_test  
0     0.781766    0.7785        0.77667  
[  dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0    cora         1.0           1.0        1.0             1.0   0.781341   

   recall_test  acc_test  f1_score_test  
0     0.817384    0.8035       0.798947  ,   dataset  prec_train  recall_tr

res      dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0  chameleon    0.817928      0.806324    0.80641        0.812068   0.646635   

   recall_test  acc_test  f1_score_test  
0     0.634489  0.632237       0.640485  
[  dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0    cora         1.0           1.0        1.0             1.0   0.781341   

   recall_test  acc_test  f1_score_test  
0     0.817384    0.8035       0.798947  ,   dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0  pubmed         1.0           1.0        1.0             1.0   0.771646   

   recall_test  acc_test  f1_score_test  
0     0.781766    0.7785        0.77667  ,     dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0  citeseer         1.0           1.0        1.0             1.0   0.651307   

   recall_test  acc_test  f1_score_test  
0     0.652012    0.6764       0.651656  ,    dataset  prec_train  re

In [12]:
avg_gnn_scores = pd.concat(GNN_SCORES)
avg_gnn_scores

Unnamed: 0,dataset,prec_train,recall_train,acc_train,f1_score_train,prec_test,recall_test,acc_test,f1_score_test
0,cora,1.0,1.0,1.0,1.0,0.781341,0.817384,0.8035,0.798947
0,pubmed,1.0,1.0,1.0,1.0,0.771646,0.781766,0.7785,0.77667
0,citeseer,1.0,1.0,1.0,1.0,0.651307,0.652012,0.6764,0.651656
0,cornell,0.927224,0.908135,0.974713,0.917484,0.354914,0.338861,0.532432,0.344153
0,texas,0.972625,0.966848,0.991954,0.969666,0.296897,0.277057,0.510811,0.284965
0,wisconsin,0.980174,0.955045,0.975833,0.967295,0.397882,0.406152,0.535294,0.39667
0,actor,0.594783,0.480918,0.538185,0.531375,0.260683,0.249017,0.285921,0.253762
0,chameleon,0.817928,0.806324,0.80641,0.812068,0.646635,0.634489,0.632237,0.640485
0,squirrel,0.469282,0.474848,0.475527,0.467952,0.333126,0.373366,0.376114,0.350042
0,facebook,0.931204,0.926199,0.933335,0.928694,0.923283,0.917607,0.926346,0.920435


In [13]:
#gnn_scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_gnn_scores.csv', index=False)

## Node Classification - Synthetic

### Sparsity

In [6]:
logdir = f'/cluster/home/kamara/checkpoints/node_classification/sparsity_mask/logs'
jsonpath = f'/cluster/home/kamara/checkpoints/node_classification/sparsity_mask/sweep.json'
infos, accuracy, fidelity = get_info(logdir)

print(list(accuracy.keys())[1])
name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_hard_mask={hard_mask}'

ranges = get_param_ranges(jsonpath)
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
#df_acc_top = get_df_results(accuracy_top, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'ged', 'auc'])
df_acc = get_df_results(accuracy, ranges, name, metrics = ['f1_score', 'recall', 'precision'])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 'mask_sparsity', 'expl_edges'])

#scores = pd.concat([df_acc, df_fid], axis=1, join="inner")
#scores = pd.merge(df_acc_top, df_acc, suffixes=['_top', ''], on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu", "sparsity"])
scores = df_acc
scores = pd.merge(scores, df_fid, on=["dataset", "explainer_name", "num_test", "data_save_dir", "sparsity", "hard_mask"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "sparsity", "hard_mask"])
scores


_explainer_name=random_sparsity=0_dataset=syn3_hard_mask=False


Unnamed: 0,none_x,explainer_name,sparsity,dataset,hard_mask,explain_graph_x,num_test,data_save_dir,f1_score,recall,...,fidelity_prob+,fidelity_prob-,mask_sparsity,expl_edges,none,explain_graph,time,number_of_edges,mask_sparsity_init,non_zero_values_init
0,,random,0.000,syn1,True,False,100,data,0.005822,1.000000,...,0.511743,-0.000995,0.000243,4109.00,,False,0.0001,4110,0.000243,4109.00
1,,random,0.000,syn1,False,False,100,data,0.005822,1.000000,...,0.229657,0.302100,0.000243,4109.00,,False,0.0001,4110,0.000243,4109.00
2,,random,0.000,syn3,True,False,100,data,0.009404,1.000000,...,0.010216,-0.000004,0.000197,5079.00,,False,0.0001,5080,0.000197,5079.00
3,,random,0.000,syn3,False,False,100,data,0.009404,1.000000,...,-0.001905,-0.001902,0.000197,5079.00,,False,0.0001,5080,0.000197,5079.00
4,,random,0.000,syn4,True,False,100,data,0.012320,1.000000,...,0.963050,0.000000,0.000517,1935.00,,False,0.0000,1936,0.000517,1935.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,,subgraphx,0.997,syn4,False,False,100,data,0.654525,0.516667,...,0.939375,0.959117,0.997417,5.00,,False,0.2794,1936,0.995610,8.50
356,,subgraphx,0.997,syn5,True,False,100,data,0.557429,0.424167,...,0.834673,0.853791,0.997144,9.74,,False,4.7674,3410,0.995595,15.02
357,,subgraphx,0.997,syn5,False,False,100,data,0.560954,0.425833,...,0.837413,0.848950,0.997144,9.74,,False,3.6892,3410,0.995601,15.00
358,,subgraphx,0.997,syn6,True,False,100,data,0.661056,0.634000,...,0.493473,0.183070,0.997829,8.57,,False,0.9027,3948,0.997822,8.60


In [7]:
res = scores.groupby(by=['dataset', 'sparsity', 'explainer_name', 'hard_mask']).mean()
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,num_test,f1_score,recall,precision,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,mask_sparsity,expl_edges,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,sparsity,explainer_name,hard_mask,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
syn1,0.000,distance,False,100.0,0.005823,1.000000,0.002920,0.28,0.64,0.102925,0.358078,0.010139,4068.33,0.0127,4110.0,0.010139,4068.33
syn1,0.000,distance,True,100.0,0.005823,1.000000,0.002920,0.76,0.00,0.511743,0.000000,0.010139,4068.33,0.0135,4110.0,0.010139,4068.33
syn1,0.000,gnnexplainer,False,100.0,0.444157,1.000000,0.328940,0.56,0.56,0.296190,0.348878,0.953431,191.40,2.6306,4110.0,0.953431,191.40
syn1,0.000,gnnexplainer,True,100.0,0.444157,1.000000,0.328940,0.76,0.00,0.511743,0.000000,0.953431,191.40,2.7273,4110.0,0.953431,191.40
syn1,0.000,ig,False,100.0,0.005099,0.873333,0.002557,0.35,0.64,0.042806,0.412742,0.002487,4099.78,0.1824,4110.0,0.002487,4099.78
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
syn6,0.997,random,True,100.0,0.002500,0.004000,0.001818,0.00,0.64,-0.000110,0.599028,0.997214,11.00,0.0000,3948.0,0.000253,3947.00
syn6,0.997,sa,False,100.0,0.000000,0.000000,0.000000,0.00,0.64,0.001296,0.599028,0.997214,11.00,0.0031,3948.0,0.000750,3945.04
syn6,0.997,sa,True,100.0,0.000000,0.000000,0.000000,0.00,0.64,0.001296,0.599028,0.997214,11.00,0.0032,3948.0,0.000750,3945.04
syn6,0.997,subgraphx,False,100.0,0.661056,0.634000,0.700500,0.54,0.20,0.493473,0.183070,0.997829,8.57,0.8231,3948.0,0.997822,8.60


In [8]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/sparsity_mask_nc.csv', index=False)

### Threshold

In [9]:
logdir = f'/cluster/home/kamara/checkpoints/threshold/logs'
jsonpath = f'/cluster/home/kamara/Explain/config_threshold.json'
infos, accuracy, fidelity = get_info(logdir)

print(list(accuracy.keys())[1])
name = '{none}_explainer_name={explainer_name}_threshold={threshold}_dataset={dataset}'

ranges = get_param_ranges(jsonpath)
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
#df_acc_top = get_df_results(accuracy_top, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'ged', 'auc'])
df_acc = get_df_results(accuracy, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'auc'])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 'mask_sparsity', 'expl_edges'])

#scores = pd.concat([df_acc, df_fid], axis=1, join="inner")
#scores = pd.merge(df_acc_top, df_acc, suffixes=['_top', ''], on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu", "sparsity"])
scores = df_acc
scores = pd.merge(scores, df_fid, on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu", "threshold"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu", "threshold"])


_explainer_name=random_threshold=0.5_dataset=syn3


In [10]:
res = scores.groupby(by=['dataset', 'threshold', 'explainer_name']).mean()
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_test_nodes,gpu,f1_score,recall,precision,auc,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,mask_sparsity,expl_edges,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,threshold,explainer_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
syn1,0.5,distance,100.0,1.0,0.563576,0.413333,0.939500,0.603303,0.05,0.53,-0.123116,0.487905,0.999336,2.73,0.0162,4110.0,0.009175,4072.29
syn1,0.5,gnnexplainer,100.0,1.0,0.365325,0.650000,0.355707,0.720080,0.44,0.29,0.285291,0.151176,0.941944,238.61,2.6174,4110.0,0.937606,256.44
syn1,0.5,ig_node,100.0,1.0,0.446275,0.436667,0.459524,0.478424,0.05,0.53,0.024774,0.445705,0.520219,1971.90,0.1674,4110.0,0.050822,3901.12
syn1,0.5,occlusion,100.0,1.0,0.001630,0.240000,0.000818,0.062967,0.48,0.05,0.433345,0.009594,0.062044,3855.00,0.7179,4110.0,0.000243,4109.00
syn1,0.5,pagerank,100.0,1.0,0.880128,0.861667,0.903381,0.930774,0.53,0.05,0.487905,-0.018173,0.997202,11.50,0.0198,4110.0,0.000487,4108.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
syn6,0.9,pagerank,100.0,1.0,0.423048,0.314000,0.957333,0.656954,0.63,0.36,0.523581,0.326768,0.999103,3.54,0.0209,3948.0,0.000507,3946.00
syn6,0.9,pgmexplainer,100.0,1.0,0.380048,0.376000,0.560708,0.687330,0.57,0.23,0.488138,0.253975,0.997219,10.98,3.8762,3948.0,0.780314,867.32
syn6,0.9,random,100.0,1.0,0.005078,0.194000,0.002573,0.500879,0.07,0.63,0.060975,0.577516,0.899491,396.81,0.0000,3948.0,0.000253,3947.00
syn6,0.9,sa_node,100.0,1.0,0.180224,0.112000,0.532635,0.336103,0.01,0.64,-0.014498,0.589573,0.560998,1733.18,0.0027,3948.0,0.000750,3945.04


In [11]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/threshold.csv', index=False)

### Topk

In [25]:
logdir = f'/cluster/scratch/kamara/checkpoints/node_classification/syn/topk/topk_syn_1/logs'
jsonpath = f'/cluster/home/kamara/Explain/configs/syn/topk/config_syn_topk_1expe.json'
infos, initial_mask_infos, transformed_mask_infos, fidelity = get_info(logdir)
gnn_train, gnn_test = get_gnn_info(logdir)

print(list(fidelity.keys())[1])
ranges = get_param_ranges(jsonpath)

IndexError: list index out of range

In [20]:
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'
name = '{none}_explainer_name={explainer_name}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'


In [21]:
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges"])
df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
df_tranformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg", "topk"], selection="all")
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-', 'topk'], selection="all")

In [22]:
df_mask_infos = pd.merge(df_initial_mask_infos, df_tranformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores = df_fid
scores = pd.merge(scores, df_mask_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "topk"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores
#scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_all.csv', index=False)


Unnamed: 0,none_x,explainer_name,dataset,topk_list_x,true_label_as_target,hard_mask,explain_graph_x,num_test,seed,data_save_dir,...,topk_list_transf,explain_graph_transf,mask_size_transf,mask_entropy_transf,max_avg_transf,none_y,topk_list_y,explain_graph_y,time,number_of_edges
0,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,1.00,0.000000,1.000000,,151015202550100,False,0.0002,13264
1,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,5.00,1.609438,0.999720,,151015202550100,False,0.0002,13264
2,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,10.00,2.302585,0.999468,,151015202550100,False,0.0002,13264
3,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,15.00,2.708050,0.999279,,151015202550100,False,0.0002,13264
4,,random,cora,151015202550100,True,True,False,100,0,data,...,151015202550100,False,20.00,2.995732,0.999034,,151015202550100,False,0.0002,13264
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2464,,pgmexplainer,wisconsin,151015202550100,False,False,False,100,0,data,...,151015202550100,False,15.00,2.694923,0.836447,,151015202550100,False,2.4069,1151
2465,,pgmexplainer,wisconsin,151015202550100,False,False,False,100,0,data,...,151015202550100,False,19.98,2.979302,0.810814,,151015202550100,False,2.4069,1151
2466,,pgmexplainer,wisconsin,151015202550100,False,False,False,100,0,data,...,151015202550100,False,24.82,3.193790,0.798190,,151015202550100,False,2.4069,1151
2467,,pgmexplainer,wisconsin,151015202550100,False,False,False,100,0,data,...,151015202550100,False,48.76,3.847085,0.689827,,151015202550100,False,2.4069,1151


In [23]:
res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'topk', 'explainer_name']).mean()
res = res.drop(columns=['seed'])
#res[['mask_size', "mask_entropy", "max_avg"]]
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,num_test,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,mask_size_init,mask_entropy_init,max_avg_init,mask_size_transf,mask_entropy_transf,max_avg_transf,time,number_of_edges
dataset,true_label_as_target,hard_mask,topk,explainer_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
actor,False,False,1.0,basic_gnnexplainer,100.0,-1.00,-1.00,-1.000000,-1.000000,0.00,0.42,0.002503,0.113221,1208.43,5.300966,0.696889,1.00,0.000000,1.000000,3.8270,60918.0
actor,False,False,1.0,distance,100.0,-1.00,-1.00,-1.000000,-1.000000,0.07,0.39,0.016935,0.106466,60913.74,10.960461,0.110511,1.00,0.000000,1.000000,0.8971,60918.0
actor,False,False,1.0,gnnexplainer,100.0,-1.00,-1.00,-1.000000,-1.000000,0.43,0.41,0.074586,0.098053,1208.43,4.690754,0.051125,1.00,0.000000,1.000000,7.4523,60918.0
actor,False,False,1.0,gradcam,100.0,-1.00,-1.00,-1.000000,-1.000000,0.16,0.38,0.037296,0.082071,39772.57,8.478164,0.149737,1.00,0.000000,1.000000,0.0079,60918.0
actor,False,False,1.0,occlusion,100.0,-1.00,-1.00,-1.000000,-1.000000,0.38,0.39,0.062185,0.083376,1208.25,5.610678,0.479297,1.00,0.000000,1.000000,3.3738,60918.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wisconsin,True,True,100.0,occlusion,100.0,0.67,0.13,0.133390,-0.034419,-1.00,-1.00,-1.000000,-1.000000,290.10,4.690044,0.418526,78.65,3.744467,0.420155,0.6651,1151.0
wisconsin,True,True,100.0,pagerank,100.0,0.70,0.09,0.117450,-0.001679,-1.00,-1.00,-1.000000,-1.000000,1082.04,5.922828,0.001028,100.00,4.446748,0.206235,0.0262,1151.0
wisconsin,True,True,100.0,pgmexplainer,100.0,0.63,0.13,0.108370,-0.005719,-1.00,-1.00,-1.000000,-1.000000,517.66,5.756787,0.298066,95.11,4.465948,0.550145,2.5542,1151.0
wisconsin,True,True,100.0,random,100.0,0.20,0.55,-0.008910,0.088731,-1.00,-1.00,-1.000000,-1.000000,1148.96,6.853682,0.490891,100.00,4.604816,0.943692,0.0001,1151.0


In [24]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/syn/topk/nc_syn_topk_1expe.csv', index=False)

In [14]:
logdir = f'/cluster/home/kamara/checkpoints/topk/logs'
jsonpath = f'/cluster/home/kamara/Explain/config_topk.json'
infos, accuracy, fidelity = get_info(logdir)

print(list(accuracy.keys())[1])
name = '{none}_explainer_name={explainer_name}_topk={topk}_dataset={dataset}'

ranges = get_param_ranges(jsonpath)
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
#df_acc_top = get_df_results(accuracy_top, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'ged', 'auc'])
df_acc = get_df_results(accuracy, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'auc'])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 'mask_sparsity', 'expl_edges'])

#scores = pd.concat([df_acc, df_fid], axis=1, join="inner")
#scores = pd.merge(df_acc_top, df_acc, suffixes=['_top', ''], on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu", "sparsity"])
scores = df_acc
scores = pd.merge(scores, df_fid, on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu", "topk"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu", "topk"])


_explainer_name=random_topk=10_dataset=syn5


In [7]:
res = scores.groupby(by=['dataset', 'topk', 'explainer_name']).mean()
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_test_nodes,gpu,f1_score,recall,precision,auc,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,mask_sparsity,expl_edges,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,topk,explainer_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
syn1,10,distance,100.0,1.0,0.827099,0.895000,0.777849,0.838573,0.38,0.33,0.327711,0.226022,0.997567,10.00,0.0162,4110.0,0.009175,4072.29
syn1,10,gnnexplainer,100.0,1.0,0.393343,0.440000,0.360433,0.650486,0.21,0.52,0.079835,0.415991,0.997567,10.00,5.4596,4110.0,0.937606,256.44
syn1,10,ig_node,100.0,1.0,0.415758,0.381667,0.456667,0.690219,0.05,0.53,0.054472,0.435937,0.997567,10.00,0.1592,4110.0,0.050822,3901.12
syn1,10,occlusion,100.0,1.0,0.040000,0.053333,0.032000,0.512180,0.25,0.49,0.196120,0.447654,0.997567,10.00,0.5690,4110.0,0.000243,4109.00
syn1,10,pagerank,100.0,1.0,0.863636,0.791667,0.950000,0.895794,0.53,0.05,0.487905,-0.005111,0.997567,10.00,0.0200,4110.0,0.000487,4108.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
syn6,500,pagerank,100.0,1.0,0.039216,1.000000,0.020000,0.999761,0.64,0.01,0.589573,0.002842,0.873354,500.00,0.0275,3948.0,0.000507,3946.00
syn6,500,pgmexplainer,100.0,1.0,0.056839,0.978000,0.029418,0.976755,0.64,0.02,0.589573,0.008439,0.903652,380.38,5.5268,3948.0,0.780294,867.40
syn6,500,random,100.0,1.0,0.005317,0.252000,0.002687,0.505638,0.06,0.60,0.051902,0.568873,0.873354,500.00,0.0000,3948.0,0.000253,3947.00
syn6,500,sa_node,100.0,1.0,0.003187,0.112000,0.001619,0.499740,0.01,0.64,0.011466,0.589573,0.873354,500.00,0.0034,3948.0,0.000750,3945.04


In [8]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/topk/syn/.csv', index=False)

### Accuracy top

In [15]:
def get_info_top(logdir):
    infos, accuracy_top, accuracy, fidelity = {}, {}, {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__infos:')
            _, accuracy_top[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__accuracy_top:')
            _, accuracy[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__accuracy:')
            _, fidelity[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__fidelity:')             
    return(infos, accuracy_top, accuracy, fidelity)


In [16]:
logdir = f'/cluster/home/kamara/checkpoints/top_edges/logs'
jsonpath = f'/cluster/home/kamara/Explain/config_top_edges.json'
infos, accuracy_top, accuracy, fidelity = get_info_top(logdir)

print(list(accuracy.keys())[1])
name = '{none}_explainer_name={explainer_name}_dataset={dataset}'

ranges = get_param_ranges(jsonpath)
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
df_acc_top = get_df_results(accuracy_top, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'ged', 'auc'])
df_acc = get_df_results(accuracy, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'auc'])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 'mask_sparsity', 'expl_edges'])

#scores = pd.concat([df_acc, df_fid], axis=1, join="inner")
scores = pd.merge(df_acc_top, df_acc, suffixes=['_top', ''], on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu"])
scores = pd.merge(scores, df_fid, on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu"])


_explainer_name=random_dataset=syn3


In [17]:
res = scores.groupby(by=['dataset', 'explainer_name']).mean()
res

Unnamed: 0_level_0,Unnamed: 1_level_0,num_test_nodes,gpu,f1_score_top,recall_top,precision_top,ged,auc_top,f1_score,recall,precision,...,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,mask_sparsity,expl_edges,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,explainer_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
syn1,distance,200.0,1.0,0.698606,0.686667,0.713333,-1.0,-1.0,0.005823,1.0,0.00292,...,0.555,0.0,0.510232,0.0,0.010618,4066.36,0.0171,4110.0,0.010618,4066.36
syn1,gnnexplainer,200.0,1.0,0.328182,0.32,0.338,-1.0,-1.0,0.447901,1.0,0.351001,...,0.555,0.0,0.510232,0.0,0.926178,303.41,5.5336,4110.0,0.926178,303.41
syn1,ig_node,200.0,1.0,0.438333,0.438333,0.438333,-1.0,-1.0,0.007163,0.915833,0.003618,...,0.555,0.24,0.507119,0.252697,0.040309,3944.33,0.2779,4110.0,0.040309,3944.33
syn1,occlusion,200.0,1.0,0.054167,0.054167,0.054167,-1.0,-1.0,0.005822,1.0,0.00292,...,0.555,0.0,0.356158,-0.096364,0.000243,4109.0,0.9668,4110.0,0.000243,4109.0
syn1,pagerank,200.0,1.0,0.868333,0.868333,0.868333,-1.0,-1.0,0.005825,1.0,0.002921,...,0.555,0.0,0.510232,0.0,0.000487,4108.0,0.0252,4110.0,0.000487,4108.0
syn1,pgmexplainer,200.0,1.0,0.568561,0.568333,0.568833,-1.0,-1.0,0.164252,1.0,0.108257,...,0.555,0.0,0.510232,-5.1e-05,0.802185,813.02,5.5697,4110.0,0.802185,813.02
syn1,random,200.0,1.0,0.0025,0.0025,0.0025,-1.0,-1.0,0.005822,1.0,0.00292,...,0.555,0.0,0.510232,-0.000157,0.000243,4109.0,0.0001,4110.0,0.000243,4109.0
syn1,sa_node,200.0,1.0,0.018788,0.018333,0.019333,-1.0,-1.0,0.004854,0.833333,0.002434,...,0.555,0.295,0.507088,0.276523,0.000487,4108.0,0.004,4110.0,0.000487,4108.0
syn1,subgraphx,200.0,1.0,0.680818,0.6425,0.72775,-1.0,-1.0,0.774763,0.769167,0.809762,...,0.45,0.23,0.408853,0.123255,0.927438,298.23,2.2555,4110.0,0.927438,10.53
syn3,distance,200.0,1.0,0.722813,0.71125,0.736154,-1.0,-1.0,0.009405,1.0,0.004725,...,0.0,0.0,0.018805,0.0,0.008171,5038.49,0.0225,5080.0,0.008171,5038.49


In [18]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/top_edges.csv', index=False)

### Gc Layers

In [50]:
expe = "gc_layers"
logdir = f'/cluster/home/kamara/checkpoints/{expe}/logs'
jsonpath = f'/cluster/home/kamara/Explain/config_{expe}.json'
infos, accuracy, fidelity = get_info(logdir)
ranges = get_param_ranges(jsonpath)

print(list(accuracy.keys())[1])
name = '{none}_explainer_name={explainer_name}_num_gc_layers={num_gc_layers}'


_explainer_name=pagerank_num_gc_layers=1


In [51]:
df_acc = get_df_results(accuracy, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'ged', 'auc'])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-'])
scores = pd.merge(df_acc, df_fid, on=["dataset", "num_gc_layers", "explainer_name", "num_test_nodes", "data_save_dir"])
scores = scores.groupby(by=['num_gc_layers', 'explainer_name']).mean()
scores

Unnamed: 0_level_0,Unnamed: 1_level_0,num_test_nodes,f1_score,recall,precision,ged,auc,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-
num_gc_layers,explainer_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2,gnnexplainer,200.0,0.682879,0.6525,0.720833,4.065,0.998892,0.835,0.0,0.650817,0.0
2,pagerank,200.0,0.864167,0.864167,0.864167,2.435,0.999885,0.835,0.0,0.650817,0.0
3,gnnexplainer,200.0,0.329136,0.323333,0.33625,8.16,0.972214,0.61,0.015,0.582833,0.014268
3,pagerank,200.0,0.864167,0.864167,0.864167,2.43,0.999885,0.61,0.0,0.589879,-2.1e-05
4,gnnexplainer,200.0,0.055833,0.055833,0.055833,12.475,0.834369,0.75,0.29,0.571764,0.246716
4,pagerank,200.0,0.86,0.86,0.86,2.515,0.999885,0.83,0.0,0.640806,0.005695
5,gnnexplainer,200.0,0.006667,0.006667,0.006667,14.085,0.656783,0.55,0.575,0.512901,0.548674
5,pagerank,200.0,0.859167,0.859167,0.859167,2.525,0.999883,0.64,0.005,0.60821,-0.001958
6,gnnexplainer,200.0,0.003333,0.003333,0.003333,14.345,0.57559,0.61,0.645,0.545983,0.581469
6,pagerank,200.0,0.859167,0.859167,0.859167,2.525,0.999883,0.64,0.005,0.579183,-0.000794


## Graph Classification

### Sparsity

In [13]:
logdir = f'/cluster/home/kamara/checkpoints/graph_classification/sparsity_mask/logs'
jsonpath = f'/cluster/home/kamara/checkpoints/graph_classification/sparsity_mask/sweep.json'
infos, accuracy, fidelity = get_info(logdir)

print(list(accuracy.keys())[1])
name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_hard_mask={hard_mask}'

ranges = get_param_ranges(jsonpath)
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
#df_acc_top = get_df_results(accuracy_top, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'ged', 'auc'])
df_acc = get_df_results(accuracy, ranges, name, metrics = ['f1_score', 'recall', 'precision'])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 'mask_sparsity', 'expl_edges'])

#scores = pd.concat([df_acc, df_fid], axis=1, join="inner")
#scores = pd.merge(df_acc_top, df_acc, suffixes=['_top', ''], on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu", "sparsity"])
scores = df_acc
print(scores)
scores = pd.merge(scores, df_fid, on=["explainer_name", "num_test", "data_save_dir", "sparsity", "hard_mask"])
scores = pd.merge(scores, df_infos, on=["explainer_name", "num_test", "data_save_dir", "sparsity", "hard_mask"])
scores


_explainer_name=random_sparsity=0.7_hard_mask=True
Empty DataFrame
Columns: []
Index: []


KeyError: 'explainer_name'

In [7]:
res = scores.groupby(by=['dataset', 'sparsity', 'explainer_name', 'hard_mask']).mean()
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,num_test,f1_score,recall,precision,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,mask_sparsity,expl_edges,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,sparsity,explainer_name,hard_mask,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
syn1,0.000,distance,False,100.0,0.005823,1.000000,0.002920,0.28,0.64,0.102925,0.358078,0.010139,4068.33,0.0127,4110.0,0.010139,4068.33
syn1,0.000,distance,True,100.0,0.005823,1.000000,0.002920,0.76,0.00,0.511743,0.000000,0.010139,4068.33,0.0135,4110.0,0.010139,4068.33
syn1,0.000,gnnexplainer,False,100.0,0.444157,1.000000,0.328940,0.56,0.56,0.296190,0.348878,0.953431,191.40,2.6306,4110.0,0.953431,191.40
syn1,0.000,gnnexplainer,True,100.0,0.444157,1.000000,0.328940,0.76,0.00,0.511743,0.000000,0.953431,191.40,2.7273,4110.0,0.953431,191.40
syn1,0.000,ig,False,100.0,0.005099,0.873333,0.002557,0.35,0.64,0.042806,0.412742,0.002487,4099.78,0.1824,4110.0,0.002487,4099.78
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
syn6,0.997,random,True,100.0,0.002500,0.004000,0.001818,0.00,0.64,-0.000110,0.599028,0.997214,11.00,0.0000,3948.0,0.000253,3947.00
syn6,0.997,sa,False,100.0,0.000000,0.000000,0.000000,0.00,0.64,0.001296,0.599028,0.997214,11.00,0.0031,3948.0,0.000750,3945.04
syn6,0.997,sa,True,100.0,0.000000,0.000000,0.000000,0.00,0.64,0.001296,0.599028,0.997214,11.00,0.0032,3948.0,0.000750,3945.04
syn6,0.997,subgraphx,False,100.0,0.661056,0.634000,0.700500,0.54,0.20,0.493473,0.183070,0.997829,8.57,0.8231,3948.0,0.997822,8.60


In [8]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/sparsity_mask_nc.csv', index=False)