In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import seaborn as sns
from matplotlib import axes
import pandas as pd
import os
from os.path import join
import json

In [2]:

import sys, os
sys.path.append('/cluster/home/kamara/Explain')
from clutils.nbutils import *
os.getcwd()

'/cluster/home/kamara/Explain/notebooks'

In [3]:
def parseLogs(logfile, kw="rawlogs:", **kwargs):
    logs = {}
    dicts = []
    with open(logfile) as f:
        for i_line, line in enumerate(f):
            pos = line.find(kw)
            if pos >= 0:
                pos += len(kw)
                pos_tensor = line.find("tensor(")
                while pos_tensor >= 0:
                    comma_end = line.find(",", pos_tensor)
                    tensor_end = line.find(")", pos_tensor)
                    line_new = line[:pos_tensor] + line[pos_tensor + len("tensor("):comma_end] + line[tensor_end+1:]
                    print("LINE", line)
                    print("NEW", line_new)
                    if len(line_new) >= len(line):
                        print("No line reduction")
                        break
                    line = line_new
                    pos_tensor = line.find("tensor(")
                    
                #print(ast.literal_eval(line[pos:]))
                #line[pos:] = line[pos:].replace("'", "")
                kline = line[pos:]
                kline = kline.replace("true", "True")
                try:
                    dic = ast.literal_eval(kline)
                except:
                    # print("Encountering weird patterns in logs")
                    # print("Line number %d" % i_line)
                    # print(line)
                    line = line.replace("nan,", "-1e8,")
                    line = line.replace("NaN", "-1e8")
                    try:
                        dic = ast.literal_eval(line[pos:])
                    except:
                        print("Unable to replace NaNs")
                        print(line)
                        continue
                
                for k in dic.keys():
                    if k not in logs:
                        logs[k] = []
                    logs[k].append(dic[k])
                dicts.append(dic)

    df = pd.DataFrame(dicts)

    return logs, df


In [4]:
def get_info_real(logdir):
    infos, fidelity, initial_mask_infos, transformed_mask_infos = {}, {}, {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__infos:')
            _, initial_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__initial_edge_mask_infos:')
            _, transformed_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__transformed_mask_infos:')
            _, fidelity[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__fidelity:')             
    return(infos, initial_mask_infos, transformed_mask_infos, fidelity)

def get_info_syn(logdir):
    infos, fidelity, accuracy, initial_mask_infos, transformed_mask_infos = {}, {}, {}, {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__infos:')
            _, initial_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__initial_edge_mask_infos:')
            _, transformed_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__transformed_mask_infos:')
            _, accuracy[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__accuracy:') 
            _, fidelity[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__fidelity:') 
    return(infos, initial_mask_infos, transformed_mask_infos, accuracy, fidelity)



def get_gnn_info(logdir):
    gnn_train, gnn_test = {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, gnn_train[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__gnn_train_scores: ')
            _, gnn_test[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__gnn_test_scores: ')
    return(gnn_train, gnn_test)


def get_param_ranges(jsonpath, dataset = ""):
    f = open(jsonpath,)
    data = json.load(f)
    ranges = data['params']
    ranges['none'] = ''
    return(ranges)   

In [5]:
def get_df_results(logs, ranges, name, metrics, selection = 'last'):
    dicts = []
    for params_set in enumerateParams(ranges):
        key = name.format(**params_set)
        # if key in logs and len(logs[key]) >= 1 and metric in logs[key]:
        any_metric = (key in logs) and (len(logs[key]) >= 1) and any([metric in logs[key] for metric in metrics])
        if any_metric:
            if selection == 'all':
                for index, row in logs[key].iterrows():
                    metrics_dict = {
                        metric: select_value(row.to_frame().T, metric, 'last') if key in logs and len(logs[key]) >= 1 and metric in logs[key] else -1
                        for metric in metrics
                    }
                    dicts.append(dictmerge(params_set, metrics_dict))
                    
            else:
                metrics_dict = {
                    metric: select_value(logs[key], metric, selection) if key in logs and len(logs[key]) >= 1 and metric in logs[key] else -1
                    for metric in metrics
                }
                dicts.append(dictmerge(params_set, metrics_dict))

    df = pd.DataFrame(dicts)
    #df = df.drop(columns=['none'])
    return df

## Node Classification - Real world

### GNN scores

In [28]:
df_gnn_train = get_df_results(gnn_train, ranges, name, metrics = ["prec", "recall", "f1_score", "acc"])
df_gnn_test = get_df_results(gnn_test, ranges, name, metrics = ["prec", "recall", "f1_score", "acc"])

df_gnn_train = df_gnn_train.groupby(by=['dataset']).mean().reset_index()
df_gnn_test = df_gnn_test.groupby(by=['dataset']).mean().reset_index()

gnn_scores = pd.merge(df_gnn_train, df_gnn_test, on=['dataset', 'sparsity', 'num_test'], suffixes=['_train', '_test'])
gnn_scores = gnn_scores.drop(columns=["sparsity", "num_test", "seed_train", "seed_test"])
gnn_scores

Unnamed: 0,dataset,prec_train,recall_train,acc_train,prec_test,recall_test,acc_test
0,actor,0.594775,0.480935,0.538197,0.260749,0.249035,0.285942
1,chameleon,0.817928,0.806324,0.80641,0.646635,0.634489,0.632237
2,citeseer,1.0,1.0,1.0,0.651307,0.652012,0.6764
3,cora,1.0,1.0,1.0,0.781336,0.817381,0.803496
4,cornell,0.92717,0.908066,0.974694,0.354803,0.338859,0.532487
5,facebook,0.931203,0.926196,0.933334,0.923301,0.917619,0.926356
6,pubmed,1.0,1.0,1.0,0.771646,0.781766,0.7785
7,texas,0.972625,0.966848,0.991954,0.296897,0.277057,0.510811
8,wisconsin,0.980174,0.955045,0.975833,0.397882,0.406152,0.535294


In [29]:
#gnn_scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_gnn_scores.csv', index=False)

### Explainability methods scores

In [58]:
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
df_tranformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-'])

In [31]:
df_mask_infos = pd.merge(df_initial_mask_infos, df_tranformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
scores = df_fid
scores = pd.merge(scores, df_mask_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
scores
#scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_all.csv', index=False)


Unnamed: 0,none_x,explainer_name,sparsity,dataset,true_label_as_target,hard_mask,explain_graph_x,num_test,seed,data_save_dir,...,explain_graph_transf,mask_size_transf,mask_entropy_transf,max_avg_transf,none_y,explain_graph_y,time,number_of_edges,mask_sparsity_init,non_zero_values_init
0,,random,0.00,cora,True,True,False,100,0,data,...,False,13249.92,9.299591,0.428312,,False,0.0002,13264,0.001062,13249.92
1,,random,0.00,cora,True,True,False,100,1,data,...,False,13250.48,9.299754,0.490545,,False,0.0002,13264,0.001019,13250.48
2,,random,0.00,cora,True,True,False,100,2,data,...,False,13250.34,9.299545,0.498040,,False,0.0002,13264,0.001030,13250.34
3,,random,0.00,cora,True,True,False,100,3,data,...,False,13249.36,9.299762,0.489739,,False,0.0002,13264,0.001104,13249.36
4,,random,0.00,cora,True,True,False,100,4,data,...,False,13249.54,9.299681,0.470068,,False,0.0002,13264,0.001090,13249.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12327,,pgmexplainer,0.99,facebook,False,True,False,100,1,data,...,False,2768.48,7.475550,0.516261,,False,89.3687,364116,0.944714,20130.39
12328,,pgmexplainer,0.99,facebook,False,True,False,100,2,data,...,False,2392.10,7.094828,0.522513,,False,87.7417,364116,0.948680,18686.40
12329,,pgmexplainer,0.99,facebook,False,True,False,100,3,data,...,False,2672.38,7.429812,0.492495,,False,91.9752,364116,0.957629,15427.86
12330,,pgmexplainer,0.99,facebook,False,True,False,100,4,data,...,False,2674.88,7.356349,0.536693,,False,87.2857,364116,0.951040,17826.97


In [None]:
df_nan = scores[scores.isna().any(axis=1)]


In [32]:
res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'sparsity', 'explainer_name']).mean()
res = res.drop(columns=['seed'])
#res[['mask_size', "mask_entropy", "max_avg"]]
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,num_test,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,mask_sparsity,...,mask_size_init,mask_entropy_init,max_avg_init,mask_size_transf,mask_entropy_transf,max_avg_transf,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,true_label_as_target,hard_mask,sparsity,explainer_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
actor,False,False,0.000,basic_gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.722,0.312,0.150069,0.088249,0.970945,...,1769.996,5.684779,0.713987,1769.996,5.684779,0.713987,3.79272,60918.0,0.970945,1769.996
actor,False,False,0.000,distance,100.0,-1.000,-1.000,-1.000000,-1.000000,0.820,0.687,0.182060,0.151183,0.000080,...,60913.117,10.958437,0.117579,60913.117,10.958437,0.117579,0.74688,60918.0,0.000080,60913.117
actor,False,False,0.000,gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.836,0.753,0.178340,0.175748,0.970945,...,1769.996,5.007442,0.062815,1769.996,5.007442,0.062815,3.76496,60918.0,0.970945,1769.996
actor,False,False,0.000,gradcam,100.0,-1.000,-1.000,-1.000000,-1.000000,0.770,0.693,0.174446,0.152992,0.419769,...,35346.504,8.168174,0.116363,35346.504,8.168174,0.116363,0.00529,60918.0,0.419769,35346.504
actor,False,False,0.000,ig,100.0,-1.000,-1.000,-1.000000,-1.000000,0.828,0.640,0.180331,0.140814,0.030613,...,59053.126,10.742004,0.185476,59053.126,10.742004,0.185476,0.25437,60918.0,0.030613,59053.126
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wisconsin,True,True,0.997,gnnexplainer,100.0,0.666,0.458,0.545173,0.308689,-1.000,-1.000,-1.000000,-1.000000,0.997393,...,287.783,3.921350,0.066819,3.000,0.774409,0.659021,3.83499,1151.0,0.749971,287.783
wisconsin,True,True,0.997,ig,100.0,0.366,0.063,0.246022,-0.068371,-1.000,-1.000,-1.000000,-1.000000,0.997393,...,1120.830,6.850792,0.210837,3.000,1.079416,0.719721,0.13043,1151.0,0.026212,1120.830
wisconsin,True,True,0.997,occlusion,100.0,0.667,0.205,0.545069,0.080260,-1.000,-1.000,-1.000000,-1.000000,0.997393,...,287.783,5.029174,0.727468,3.000,1.086211,0.795199,0.35526,1151.0,0.749971,287.783
wisconsin,True,True,0.997,pagerank,100.0,0.290,0.077,0.171710,-0.056536,-1.000,-1.000,-1.000000,-1.000000,0.997393,...,1087.551,5.931450,0.001056,3.000,1.088098,0.758350,0.02136,1151.0,0.055125,1087.551


In [33]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_avg10expe.csv', index=False)

## Topk

In [6]:
def get_info(logdir):
    infos, fidelity, initial_mask_infos, transformed_mask_infos= {}, {}, {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__infos:')
            _, initial_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__initial_edge_mask_infos:')
            _, transformed_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__transformed_mask_infos:')
            _, fidelity[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__fidelity:')             
    return(infos, initial_mask_infos, transformed_mask_infos, fidelity)



In [7]:
logdir = f'/cluster/work/zhang/kamara/checkpoints/node_classification/real/topk/topk_real_5/logs'
jsonpath = f'/cluster/home/kamara/Explain/configs/real/topk/config_real_topk_5expe.json'
infos, initial_mask_infos, transformed_mask_infos, fidelity = get_info(logdir)
gnn_train, gnn_test = get_gnn_info(logdir)

print(list(fidelity.keys())[1])
ranges = get_param_ranges(jsonpath)

_explainer_name=gradcam_dataset=squirrel_true_label_as_target=False_hard_mask=True_seed=1


In [8]:
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'
name = '{none}_explainer_name={explainer_name}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}_seed={seed}'


In [9]:
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges"])#, "edge_mask_sparsity_init", "edge_mask_size_init", "edge_mask_connected_init"])
df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
df_transformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg", "cc_ratio", "topk"], selection="all")

df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-', 'topk'], selection="all")

In [11]:
df_mask_infos = pd.merge(df_initial_mask_infos, df_transformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores = df_fid
scores = pd.merge(scores, df_mask_infos, on=["dataset", "explainer_name", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "topk"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores
#scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_all.csv', index=False)


Unnamed: 0,none_x,explainer_name,dataset,params_list_x,strategy_x,true_label_as_target,hard_mask,explain_graph_x,num_test_x,seed,...,mask_entropy_transf,max_avg_transf,cc_ratio,none_y,params_list_y,strategy_y,explain_graph_y,num_test_y,time,number_of_edges
0,,random,cora,151015202550100,topk,True,True,False,100,0,...,0.000000,1.000000,0.590000,,151015202550100,topk,False,100,0.0002,13264
1,,random,cora,151015202550100,topk,True,True,False,100,0,...,1.609438,0.999720,0.556905,,151015202550100,topk,False,100,0.0002,13264
2,,random,cora,151015202550100,topk,True,True,False,100,0,...,2.302585,0.999468,0.553509,,151015202550100,topk,False,100,0.0002,13264
3,,random,cora,151015202550100,topk,True,True,False,100,0,...,2.708050,0.999279,0.551098,,151015202550100,topk,False,100,0.0002,13264
4,,random,cora,151015202550100,topk,True,True,False,100,0,...,2.995732,0.999034,0.548972,,151015202550100,topk,False,100,0.0002,13264
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19845,,subgraphx,squirrel,151015202550100,topk,False,True,False,100,3,...,0.000000,1.000000,1.000000,,151015202550100,topk,False,100,36816.8081,401907
19846,,subgraphx,squirrel,151015202550100,topk,False,True,False,100,3,...,0.000000,1.000000,1.000000,,151015202550100,topk,False,100,36816.8081,401907
19847,,subgraphx,squirrel,151015202550100,topk,False,True,False,100,3,...,0.000000,1.000000,1.000000,,151015202550100,topk,False,100,36816.8081,401907
19848,,subgraphx,squirrel,151015202550100,topk,False,True,False,100,3,...,0.000000,1.000000,1.000000,,151015202550100,topk,False,100,36816.8081,401907


In [12]:
res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'topk', 'explainer_name']).apply(lambda x: x.mean(skipna=True))
res = res.drop(columns=['seed', 'topk'])
#res[['mask_size', "mask_entropy", "max_avg"]]
res

  res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'topk', 'explainer_name']).apply(lambda x: x.mean(skipna=True))


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,num_test_x,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,num_test_init,...,mask_entropy_init,max_avg_init,num_test_transf,mask_size_transf,mask_entropy_transf,max_avg_transf,cc_ratio,num_test_y,time,number_of_edges
dataset,true_label_as_target,hard_mask,topk,explainer_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
actor,False,False,1.0,basic_gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.012,0.352,0.001845,0.117761,100.0,...,5.728821,0.725068,100.0,1.000,0.000000,1.000000,0.616000,100.0,5.66230,60918.0
actor,False,False,1.0,distance,100.0,-1.000,-1.000,-1.000000,-1.000000,0.062,0.344,0.012301,0.108912,100.0,...,10.959247,0.116227,100.0,1.000,0.000000,1.000000,0.619000,100.0,0.79156,60918.0
actor,False,False,1.0,gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.522,0.346,0.075682,0.106058,100.0,...,5.047437,0.059011,100.0,1.000,0.000000,1.000000,0.607000,100.0,4.56018,60918.0
actor,False,False,1.0,gradcam,100.0,-1.000,-1.000,-1.000000,-1.000000,0.130,0.294,0.028060,0.087587,100.0,...,7.954254,0.151472,100.0,1.000,0.000000,1.000000,1.000000,100.0,0.00598,60918.0
actor,False,False,1.0,ig,100.0,-1.000,-1.000,-1.000000,-1.000000,0.400,0.400,0.108639,0.122489,100.0,...,10.405149,0.242850,100.0,1.000,0.000000,1.000000,1.000000,100.0,0.35788,60918.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wisconsin,True,True,100.0,pgexplainer,100.0,0.160,0.340,0.016246,0.048882,-1.000,-1.000,-1.000000,-1.000000,100.0,...,3.403713,0.001061,100.0,91.280,3.382241,0.002399,0.882448,100.0,8.39312,1151.0
wisconsin,True,True,100.0,pgmexplainer,100.0,0.448,0.094,0.085688,-0.007603,-1.000,-1.000,-1.000000,-1.000000,100.0,...,5.632501,0.273505,100.0,92.298,4.385277,0.508757,0.179930,100.0,2.26486,1151.0
wisconsin,True,True,100.0,random,100.0,0.136,0.424,0.003373,0.076974,-1.000,-1.000,-1.000000,-1.000000,100.0,...,6.853584,0.430377,100.0,100.000,4.604825,0.945740,0.332773,100.0,0.00010,1151.0
wisconsin,True,True,100.0,sa,100.0,0.542,0.382,0.129789,0.039249,-1.000,-1.000,-1.000000,-1.000000,100.0,...,6.324730,0.607385,100.0,97.860,4.490462,0.686671,0.175028,100.0,0.00410,1151.0


In [13]:
res.columns

Index(['num_test_x', 'fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+',
       'fidelity_prob-', 'fidelity_gnn_acc+', 'fidelity_gnn_acc-',
       'fidelity_gnn_prob+', 'fidelity_gnn_prob-', 'num_test_init',
       'mask_size_init', 'mask_entropy_init', 'max_avg_init',
       'num_test_transf', 'mask_size_transf', 'mask_entropy_transf',
       'max_avg_transf', 'cc_ratio', 'num_test_y', 'time', 'number_of_edges'],
      dtype='object')

In [14]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/topk/nc_real_topk_5expe.csv', index=False)

## Test Prediction Type: wrong or correct

In [6]:
def get_info(logdir):
    infos, fidelity, initial_mask_infos, transformed_mask_infos = {}, {}, {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__infos:')
            _, initial_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__initial_edge_mask_infos:')
            _, transformed_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__transformed_mask_infos:')
            _, fidelity[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__fidelity:')             
    return(infos, initial_mask_infos, transformed_mask_infos, fidelity)



In [26]:
logdir = f'/cluster/work/zhang/kamara/checkpoints/node_classification/real/prediction_type/pred_type_real_1/logs'
jsonpath = f'/cluster/home/kamara/Explain/configs/real/config_real_1expe_testpred.json'
infos, initial_mask_infos, transformed_mask_infos, fidelity = get_info(logdir)
gnn_train, gnn_test = get_gnn_info(logdir)

print(list(fidelity.keys())[1])
ranges = get_param_ranges(jsonpath)

_explainer_name=gradcam_dataset=facebook_testing_pred=wrong_true_label_as_target=True_hard_mask=True


In [27]:
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'
name = '{none}_explainer_name={explainer_name}_dataset={dataset}_testing_pred={testing_pred}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'


In [28]:
df_infos = get_df_results(infos, ranges, name, metrics = ['time', 'number_of_edges'])
df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
df_tranformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg", "topk"], selection="all")
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-', 'topk'], selection="all")

In [30]:
df_mask_infos = pd.merge(df_initial_mask_infos, df_tranformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "testing_pred"])
scores = df_fid
scores = pd.merge(scores, df_mask_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "topk", "testing_pred"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "testing_pred"])
scores
#scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_all.csv', index=False)


Unnamed: 0,none_x,explainer_name,params_list_x,strategy_x,dataset,testing_pred,true_label_as_target,hard_mask,explain_graph_x,num_test,...,explain_graph_transf,mask_size_transf,mask_entropy_transf,max_avg_transf,none_y,params_list_y,strategy_y,explain_graph_y,time,number_of_edges
0,,random,1251020100,topk,cora,correct,True,True,False,50,...,False,1.000000,0.000000,1.000000,,1251020100,topk,False,0.0002,13264
1,,random,1251020100,topk,cora,correct,True,True,False,50,...,False,2.000000,0.693147,0.999929,,1251020100,topk,False,0.0002,13264
2,,random,1251020100,topk,cora,correct,True,True,False,50,...,False,5.000000,1.609438,0.999720,,1251020100,topk,False,0.0002,13264
3,,random,1251020100,topk,cora,correct,True,True,False,50,...,False,10.000000,2.302585,0.999468,,1251020100,topk,False,0.0002,13264
4,,random,1251020100,topk,cora,correct,True,True,False,50,...,False,20.000000,2.995732,0.999034,,1251020100,topk,False,0.0002,13264
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3839,,subgraphx,1251020100,topk,chameleon,wrong,True,True,False,50,...,False,1.428571,0.297063,1.000000,,1251020100,topk,False,5117.4368,65019
3840,,subgraphx,1251020100,topk,chameleon,wrong,True,True,False,50,...,False,2.571429,0.657881,1.000000,,1251020100,topk,False,5117.4368,65019
3841,,subgraphx,1251020100,topk,chameleon,wrong,True,True,False,50,...,False,3.142857,0.754016,1.000000,,1251020100,topk,False,5117.4368,65019
3842,,subgraphx,1251020100,topk,chameleon,wrong,True,True,False,50,...,False,3.142857,0.754016,1.000000,,1251020100,topk,False,5117.4368,65019


In [31]:
res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'topk', 'explainer_name', 'testing_pred']).mean()
res = res.drop(columns=['seed'])
#res[['mask_size', "mask_entropy", "max_avg"]]
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,num_test,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,mask_size_init,mask_entropy_init,max_avg_init,mask_size_transf,mask_entropy_transf,max_avg_transf,time,number_of_edges
dataset,true_label_as_target,hard_mask,topk,explainer_name,testing_pred,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
actor,False,False,1.0,basic_gnnexplainer,correct,50.0,-1.00,-1.00,-1.000000,-1.000000,0.00,0.42,4.172325e-09,0.113389,2052.28,5.825808,0.729936,1.00,0.000000,1.000000,7.6932,60918.0
actor,False,False,1.0,basic_gnnexplainer,wrong,50.0,-1.00,-1.00,-1.000000,-1.000000,0.02,0.42,2.128666e-03,0.098162,2052.28,5.825808,0.729936,1.00,0.000000,1.000000,7.6932,60918.0
actor,False,False,1.0,distance,correct,50.0,-1.00,-1.00,-1.000000,-1.000000,0.07,0.39,1.693453e-02,0.106466,60913.74,10.960461,0.110511,1.00,0.000000,1.000000,0.9330,60918.0
actor,False,False,1.0,distance,wrong,50.0,-1.00,-1.00,-1.000000,-1.000000,0.00,0.42,1.022220e-07,0.097939,60913.74,10.960461,0.110511,1.00,0.000000,1.000000,0.9330,60918.0
actor,False,False,1.0,gnnexplainer,correct,50.0,-1.00,-1.00,-1.000000,-1.000000,0.41,0.41,6.899185e-02,0.096872,1208.43,4.693933,0.051251,1.00,0.000000,1.000000,7.2809,60918.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
facebook,True,True,100.0,occlusion,correct,50.0,0.81,0.09,0.212708,-0.000063,-1.00,-1.00,-1.000000e+00,-1.000000,7477.30,6.379203,0.358977,79.98,3.739757,0.367421,32.3461,364116.0
facebook,True,True,100.0,occlusion,wrong,50.0,0.11,0.24,-0.000172,-0.040893,-1.00,-1.00,-1.000000e+00,-1.000000,7477.30,6.379203,0.358977,79.98,3.739757,0.367421,32.3461,364116.0
facebook,True,True,100.0,pgmexplainer,wrong,50.0,0.11,0.24,-0.000172,-0.041453,-1.00,-1.00,-1.000000e+00,-1.000000,18851.14,8.422239,0.294665,99.36,4.585377,0.851328,142.4974,364116.0
facebook,True,True,100.0,sa,correct,50.0,0.86,0.50,0.238503,0.118860,-1.00,-1.00,-1.000000e+00,-1.000000,252675.78,11.283742,0.560497,100.00,4.573198,0.748864,0.0119,364116.0


In [33]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/prediction_type/nc_real_predtype_1expe.csv', index=False)

## Node Classification - Real world - 10 seeds

In [6]:
datasets = [
      "cora",
      "pubmed",
      "citeseer",
      "cornell",
      "texas",
      "wisconsin",
      "actor",
      "chameleon",
    "squirrel",
    "facebook"
    ]

### Sparsity

In [7]:
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}_seed={seed}'
name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}_seed={seed}'
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}'


In [8]:
versions = ["v0", "v1", "v2", "v3", "v4", "v5"]
SCORES = []
for v in version:

    logdir = f'/cluster/work/zhang/kamara/checkpoints/node_classification/real/topk/topk_real_5_{data}/logs'
    jsonpath = f'/cluster/home/kamara/Explain/configs/real/topk/config_real_topk_5expe_{data}.json'
    ranges = get_param_ranges(jsonpath)
    infos, initial_mask_infos, transformed_mask_infos, fidelity = get_info(logdir)
    
    df_infos = get_df_results(infos, ranges, name, metrics = ['dataset', 'time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
    print(df_infos)
    df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
    df_tranformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
    df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-',
                                                           'mask_sparsity', 'expl_edges'])
    
    df_mask_infos = pd.merge(df_initial_mask_infos, df_tranformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
    scores = df_fid
    scores = pd.merge(scores, df_mask_infos, on=["explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
    scores = pd.merge(scores, df_infos, on=["explainer_name", "num_test", "data_save_dir", "seed", "sparsity", "true_label_as_target", "hard_mask"])
    res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'sparsity', 'explainer_name']).mean()
    res = res.drop(columns=['seed'])
    print('res', res)

    SCORES.append(res)
    print(SCORES)
                                                                                                                     


     none explainer_name  sparsity dataset true_label_as_target hard_mask  \
0                 random     0.000    cora                 True      True   
1                 random     0.000    cora                 True      True   
2                 random     0.000    cora                 True      True   
3                 random     0.000    cora                 True      True   
4                 random     0.000    cora                 True      True   
...   ...            ...       ...     ...                  ...       ...   
1595        pgmexplainer     0.997    cora                False     False   
1596        pgmexplainer     0.997    cora                False     False   
1597        pgmexplainer     0.997    cora                False     False   
1598        pgmexplainer     0.997    cora                False     False   
1599        pgmexplainer     0.997    cora                False     False   

     explain_graph  num_test  seed data_save_dir     time  number_of_edges 

     none explainer_name  sparsity dataset true_label_as_target hard_mask  \
0                 random     0.000  pubmed                 True      True   
1                 random     0.000  pubmed                 True      True   
2                 random     0.000  pubmed                 True      True   
3                 random     0.000  pubmed                 True      True   
4                 random     0.000  pubmed                 True      True   
...   ...            ...       ...     ...                  ...       ...   
1595        pgmexplainer     0.997  pubmed                False     False   
1596        pgmexplainer     0.997  pubmed                False     False   
1597        pgmexplainer     0.997  pubmed                False     False   
1598        pgmexplainer     0.997  pubmed                False     False   
1599        pgmexplainer     0.997  pubmed                False     False   

     explain_graph  num_test  seed data_save_dir     time  number_of_edges 

     none explainer_name  sparsity   dataset true_label_as_target hard_mask  \
0                 random     0.000  citeseer                 True      True   
1                 random     0.000  citeseer                 True      True   
2                 random     0.000  citeseer                 True      True   
3                 random     0.000  citeseer                 True      True   
4                 random     0.000  citeseer                 True      True   
...   ...            ...       ...       ...                  ...       ...   
1593        pgmexplainer     0.997  citeseer                False     False   
1594        pgmexplainer     0.997  citeseer                False     False   
1595        pgmexplainer     0.997  citeseer                False     False   
1596        pgmexplainer     0.997  citeseer                False     False   
1597        pgmexplainer     0.997  citeseer                False     False   

     explain_graph  num_test  seed data_save_dir   

     none explainer_name  sparsity  dataset true_label_as_target hard_mask  \
0                 random     0.000  cornell                 True      True   
1                 random     0.000  cornell                 True      True   
2                 random     0.000  cornell                 True      True   
3                 random     0.000  cornell                 True      True   
4                 random     0.000  cornell                 True      True   
...   ...            ...       ...      ...                  ...       ...   
1595        pgmexplainer     0.997  cornell                False     False   
1596        pgmexplainer     0.997  cornell                False     False   
1597        pgmexplainer     0.997  cornell                False     False   
1598        pgmexplainer     0.997  cornell                False     False   
1599        pgmexplainer     0.997  cornell                False     False   

     explain_graph  num_test  seed data_save_dir    time  numbe

     none explainer_name  sparsity dataset true_label_as_target hard_mask  \
0                 random     0.000   texas                 True      True   
1                 random     0.000   texas                 True      True   
2                 random     0.000   texas                 True      True   
3                 random     0.000   texas                 True      True   
4                 random     0.000   texas                 True      True   
...   ...            ...       ...     ...                  ...       ...   
1595        pgmexplainer     0.997   texas                False     False   
1596        pgmexplainer     0.997   texas                False     False   
1597        pgmexplainer     0.997   texas                False     False   
1598        pgmexplainer     0.997   texas                False     False   
1599        pgmexplainer     0.997   texas                False     False   

     explain_graph  num_test  seed data_save_dir    time  number_of_edges  

     none explainer_name  sparsity    dataset true_label_as_target hard_mask  \
0                 random     0.000  wisconsin                 True      True   
1                 random     0.000  wisconsin                 True      True   
2                 random     0.000  wisconsin                 True      True   
3                 random     0.000  wisconsin                 True      True   
4                 random     0.000  wisconsin                 True      True   
...   ...            ...       ...        ...                  ...       ...   
1591        pgmexplainer     0.997  wisconsin                False     False   
1592        pgmexplainer     0.997  wisconsin                False     False   
1593        pgmexplainer     0.997  wisconsin                False     False   
1594        pgmexplainer     0.997  wisconsin                False     False   
1595        pgmexplainer     0.997  wisconsin                False     False   

     explain_graph  num_test  seed data

     none explainer_name  sparsity dataset true_label_as_target hard_mask  \
0                 random     0.000   actor                 True      True   
1                 random     0.000   actor                 True      True   
2                 random     0.000   actor                 True      True   
3                 random     0.000   actor                 True      True   
4                 random     0.000   actor                 True      True   
...   ...            ...       ...     ...                  ...       ...   
1595        pgmexplainer     0.997   actor                False     False   
1596        pgmexplainer     0.997   actor                False     False   
1597        pgmexplainer     0.997   actor                False     False   
1598        pgmexplainer     0.997   actor                False     False   
1599        pgmexplainer     0.997   actor                False     False   

     explain_graph  num_test  seed data_save_dir     time  number_of_edges 

     none explainer_name  sparsity    dataset true_label_as_target hard_mask  \
0                 random     0.000  chameleon                 True      True   
1                 random     0.000  chameleon                 True      True   
2                 random     0.000  chameleon                 True      True   
3                 random     0.000  chameleon                 True      True   
4                 random     0.000  chameleon                 True      True   
...   ...            ...       ...        ...                  ...       ...   
1595        pgmexplainer     0.997  chameleon                False     False   
1596        pgmexplainer     0.997  chameleon                False     False   
1597        pgmexplainer     0.997  chameleon                False     False   
1598        pgmexplainer     0.997  chameleon                False     False   
1599        pgmexplainer     0.997  chameleon                False     False   

     explain_graph  num_test  seed data

     none explainer_name  sparsity   dataset true_label_as_target hard_mask  \
0                 random     0.000  squirrel                 True      True   
1                 random     0.000  squirrel                 True      True   
2                 random     0.000  squirrel                 True      True   
3                 random     0.000  squirrel                 True      True   
4                 random     0.000  squirrel                 True      True   
...   ...            ...       ...       ...                  ...       ...   
1433        pgmexplainer     0.997  squirrel                False     False   
1434        pgmexplainer     0.997  squirrel                False     False   
1435        pgmexplainer     0.997  squirrel                False     False   
1436        pgmexplainer     0.997  squirrel                False     False   
1437        pgmexplainer     0.997  squirrel                False     False   

     explain_graph  num_test  seed data_save_dir   

     none explainer_name  sparsity   dataset true_label_as_target hard_mask  \
0                 random     0.000  facebook                 True      True   
1                 random     0.000  facebook                 True      True   
2                 random     0.000  facebook                 True      True   
3                 random     0.000  facebook                 True      True   
4                 random     0.000  facebook                 True      True   
...   ...            ...       ...       ...                  ...       ...   
1595        pgmexplainer     0.997  facebook                False     False   
1596        pgmexplainer     0.997  facebook                False     False   
1597        pgmexplainer     0.997  facebook                False     False   
1598        pgmexplainer     0.997  facebook                False     False   
1599        pgmexplainer     0.997  facebook                False     False   

     explain_graph  num_test  seed data_save_dir   

In [9]:
avg_scores = pd.concat(SCORES)
avg_scores

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,num_test,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,mask_sparsity,...,mask_size_init,mask_entropy_init,max_avg_init,mask_size_transf,mask_entropy_transf,max_avg_transf,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,true_label_as_target,hard_mask,sparsity,explainer_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
cora,False,False,0.000,basic_gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.376,0.379,0.384231,0.413214,0.987620,...,164.210,3.292074,0.368996,164.210,3.292074,0.368996,3.63883,13264.0,0.987620,164.210
cora,False,False,0.000,distance,100.0,-1.000,-1.000,-1.000000,-1.000000,0.162,0.068,0.539109,0.311883,0.122430,...,11640.092,8.849912,0.169638,11640.092,8.849912,0.169638,0.28118,13264.0,0.122430,11640.092
cora,False,False,0.000,gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.493,0.373,0.541627,0.601882,0.987620,...,164.208,2.920812,0.141127,164.208,2.920812,0.141127,3.62851,13264.0,0.987620,164.208
cora,False,False,0.000,gradcam,100.0,-1.000,-1.000,-1.000000,-1.000000,0.215,0.036,0.498248,0.365465,0.761656,...,3161.393,4.747347,0.135885,3161.393,4.747347,0.135885,0.00431,13264.0,0.761656,3161.393
cora,False,False,0.000,ig,100.0,-1.000,-1.000,-1.000000,-1.000000,0.337,0.031,0.556903,0.264937,0.251848,...,9923.491,7.807732,0.146000,9923.491,7.807732,0.146000,0.15615,13264.0,0.251848,9923.491
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
facebook,True,True,0.997,occlusion,100.0,0.862,0.012,0.751350,-0.059571,-1.000,-1.000,-1.000000,-1.000000,0.998051,...,7832.505,7.007714,0.889634,709.722,6.057597,0.889796,21.22537,364116.0,0.978489,7832.505
facebook,True,True,0.997,pagerank,100.0,0.786,0.044,0.685483,-0.027353,-1.000,-1.000,-1.000000,-1.000000,0.997045,...,23741.544,8.251815,0.001836,1076.078,6.233838,0.027866,2.46246,364116.0,0.934797,23741.544
facebook,True,True,0.997,pgmexplainer,100.0,0.550,0.040,0.454271,-0.032108,-1.000,-1.000,-1.000000,-1.000000,0.997471,...,18670.273,8.227726,0.371260,920.693,6.582991,0.643379,88.90563,364116.0,0.948724,18670.273
facebook,True,True,0.997,random,100.0,0.072,0.785,0.002038,0.683861,-1.000,-1.000,-1.000000,-1.000000,0.997001,...,363750.047,12.612083,0.503269,1092.000,6.995766,0.998317,0.00279,364116.0,0.001005,363750.047


In [10]:
avg_scores = avg_scores.reset_index()
avg_scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_avg10expe.csv', index=False)

### GNN scores

In [11]:
GNN_SCORES = []
for data in datasets:

    logdir = f'/cluster/home/kamara/checkpoints/node_classification/real/sparsity_real_avg10expe_{data}/logs'
    jsonpath = f'/cluster/home/kamara/Explain/configs/config_real_sparsity_avg10expe_{data}.json'
    ranges = get_param_ranges(jsonpath)
    gnn_train, gnn_test = get_gnn_info(logdir)
    
    df_gnn_train = get_df_results(gnn_train, ranges, name, metrics = ["prec", "recall", "acc"])
    df_gnn_test = get_df_results(gnn_test, ranges, name, metrics = ["prec", "recall", "acc"])
    df_gnn_train['f1_score'] = 2*df_gnn_train['recall']*df_gnn_train['prec']/(df_gnn_train['recall']+df_gnn_train['prec'])
    df_gnn_test['f1_score'] = 2*df_gnn_test['recall']*df_gnn_test['prec']/(df_gnn_test['recall']+df_gnn_test['prec'])

    df_gnn_train = df_gnn_train.groupby(by=['dataset']).mean().reset_index()
    df_gnn_test = df_gnn_test.groupby(by=['dataset']).mean().reset_index()

    gnn_scores = pd.merge(df_gnn_train, df_gnn_test, on=['dataset', 'sparsity', 'num_test'], suffixes=['_train', '_test'])
    gnn_scores = gnn_scores.drop(columns=["sparsity", "num_test", "seed_train", "seed_test"])
    print('res', gnn_scores)

    GNN_SCORES.append(gnn_scores)
    print(GNN_SCORES)

res   dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0    cora         1.0           1.0        1.0             1.0   0.781341   

   recall_test  acc_test  f1_score_test  
0     0.817384    0.8035       0.798947  
[  dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0    cora         1.0           1.0        1.0             1.0   0.781341   

   recall_test  acc_test  f1_score_test  
0     0.817384    0.8035       0.798947  ]
res   dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0  pubmed         1.0           1.0        1.0             1.0   0.771646   

   recall_test  acc_test  f1_score_test  
0     0.781766    0.7785        0.77667  
[  dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0    cora         1.0           1.0        1.0             1.0   0.781341   

   recall_test  acc_test  f1_score_test  
0     0.817384    0.8035       0.798947  ,   dataset  prec_train  recall_tr

res      dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0  chameleon    0.817928      0.806324    0.80641        0.812068   0.646635   

   recall_test  acc_test  f1_score_test  
0     0.634489  0.632237       0.640485  
[  dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0    cora         1.0           1.0        1.0             1.0   0.781341   

   recall_test  acc_test  f1_score_test  
0     0.817384    0.8035       0.798947  ,   dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0  pubmed         1.0           1.0        1.0             1.0   0.771646   

   recall_test  acc_test  f1_score_test  
0     0.781766    0.7785        0.77667  ,     dataset  prec_train  recall_train  acc_train  f1_score_train  prec_test  \
0  citeseer         1.0           1.0        1.0             1.0   0.651307   

   recall_test  acc_test  f1_score_test  
0     0.652012    0.6764       0.651656  ,    dataset  prec_train  re

In [12]:
avg_gnn_scores = pd.concat(GNN_SCORES)
avg_gnn_scores

Unnamed: 0,dataset,prec_train,recall_train,acc_train,f1_score_train,prec_test,recall_test,acc_test,f1_score_test
0,cora,1.0,1.0,1.0,1.0,0.781341,0.817384,0.8035,0.798947
0,pubmed,1.0,1.0,1.0,1.0,0.771646,0.781766,0.7785,0.77667
0,citeseer,1.0,1.0,1.0,1.0,0.651307,0.652012,0.6764,0.651656
0,cornell,0.927224,0.908135,0.974713,0.917484,0.354914,0.338861,0.532432,0.344153
0,texas,0.972625,0.966848,0.991954,0.969666,0.296897,0.277057,0.510811,0.284965
0,wisconsin,0.980174,0.955045,0.975833,0.967295,0.397882,0.406152,0.535294,0.39667
0,actor,0.594783,0.480918,0.538185,0.531375,0.260683,0.249017,0.285921,0.253762
0,chameleon,0.817928,0.806324,0.80641,0.812068,0.646635,0.634489,0.632237,0.640485
0,squirrel,0.469282,0.474848,0.475527,0.467952,0.333126,0.373366,0.376114,0.350042
0,facebook,0.931204,0.926199,0.933335,0.928694,0.923283,0.917607,0.926346,0.920435


In [13]:
#gnn_scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_gnn_scores.csv', index=False)

## Node Classification - Synthetic

In [6]:
def get_info_syn(logdir):
    infos, initial_mask_infos, transformed_mask_infos, accuracy, fidelity = {}, {}, {}, {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__infos:')
            _, initial_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__initial_edge_mask_infos:')
            _, transformed_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__transformed_mask_infos:')
            _, fidelity[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__fidelity:')             
            _, accuracy[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__accuracy:')             
    return(infos, initial_mask_infos, transformed_mask_infos, accuracy, fidelity)



### Topk

In [7]:
logdir = f'/cluster/work/zhang/kamara/checkpoints/node_classification/syn/topk/topk_syn_5/logs'
jsonpath = f'/cluster/home/kamara/Explain/configs/syn/topk/config_syn_topk_5expe.json'
infos, initial_mask_infos, transformed_mask_infos, accuracy, fidelity = get_info_syn(logdir)
gnn_train, gnn_test = get_gnn_info(logdir)

print(list(fidelity.keys())[1])
ranges = get_param_ranges(jsonpath)

_explainer_name=sa_dataset=syn6_true_label_as_target=True_hard_mask=True_seed=0


In [8]:
#name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}'
name = '{none}_explainer_name={explainer_name}_dataset={dataset}_true_label_as_target={true_label_as_target}_hard_mask={hard_mask}_seed={seed}'


#### GNN scores

In [9]:
df_gnn_train = get_df_results(gnn_train, ranges, name, metrics = ["prec", "recall", "f1_score", "acc"])
df_gnn_test = get_df_results(gnn_test, ranges, name, metrics = ["prec", "recall", "f1_score", "acc"])

df_gnn_train = df_gnn_train.groupby(by=['dataset']).mean().reset_index()
df_gnn_test = df_gnn_test.groupby(by=['dataset']).mean().reset_index()
df_gnn_train['f1_score'] = 2*df_gnn_train['recall']*df_gnn_train['prec']/(df_gnn_train['recall']+df_gnn_train['prec'])
df_gnn_test['f1_score'] = 2*df_gnn_test['recall']*df_gnn_test['prec']/(df_gnn_test['recall']+df_gnn_test['prec'])


gnn_scores = pd.merge(df_gnn_train, df_gnn_test, on=['dataset', 'num_test'], suffixes=['_train', '_test'])
gnn_scores = gnn_scores.drop(columns=["num_test", "seed_train", "seed_test"])
gnn_scores

Unnamed: 0,dataset,prec_train,recall_train,f1_score_train,acc_train,prec_test,recall_test,f1_score_test,acc_test
0,syn1,0.971151,0.976283,0.97371,0.979439,0.972222,0.979167,0.975682,0.985714
1,syn3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,syn4,0.98612,0.983051,0.984583,0.984962,1.0,1.0,1.0,1.0
3,syn5,0.902598,0.884369,0.89339,0.896809,0.925287,0.87,0.896792,0.895161
4,syn6,0.988189,0.987705,0.987947,0.988785,1.0,1.0,1.0,1.0


In [29]:
#gnn_scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_gnn_scores.csv', index=False)

#### Explainers

In [9]:
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges"])
df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg"])
df_tranformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg", "cc_ratio", "topk"], selection="all")
df_acc = get_df_results(accuracy, ranges, name, metrics = ["f1_score", "precision", "recall", "topk"], selection="all")
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-', 'topk'], selection="all")

In [10]:
df_mask_infos = pd.merge(df_initial_mask_infos, df_tranformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores = pd.merge(df_fid, df_acc, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "topk"])
scores = pd.merge(scores, df_mask_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "topk"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores
#scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_all.csv', index=False)


Unnamed: 0,none_x,explainer_name,params_list_x,dataset,true_label_as_target,hard_mask,explain_graph_x,num_test,seed,data_save_dir,...,explain_graph_transf,mask_size_transf,mask_entropy_transf,max_avg_transf,cc_ratio,none,params_list,explain_graph,time,number_of_edges
0,,random,151015202550100,syn1,True,True,False,100,0,data,...,False,1.00,0.000000,1.000000,0.500000,,151015202550100,False,0.0001,4110
1,,random,151015202550100,syn1,True,True,False,100,0,data,...,False,5.00,1.609438,0.999137,0.492778,,151015202550100,False,0.0001,4110
2,,random,151015202550100,syn1,True,True,False,100,0,data,...,False,10.00,2.302585,0.998306,0.484598,,151015202550100,False,0.0001,4110
3,,random,151015202550100,syn1,True,True,False,100,0,data,...,False,15.00,2.708050,0.997599,0.478049,,151015202550100,False,0.0001,4110
4,,random,151015202550100,syn1,True,True,False,100,0,data,...,False,20.00,2.995731,0.997061,0.469671,,151015202550100,False,0.0001,4110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8187,,subgraphx,151015202550100,syn6,False,False,False,100,3,data,...,False,6.36,1.745893,1.000000,0.277167,,151015202550100,False,64.2504,3948
8188,,subgraphx,151015202550100,syn6,False,False,False,100,3,data,...,False,6.36,1.745893,1.000000,0.277167,,151015202550100,False,64.2504,3948
8189,,subgraphx,151015202550100,syn6,False,False,False,100,3,data,...,False,6.36,1.745893,1.000000,0.277167,,151015202550100,False,64.2504,3948
8190,,subgraphx,151015202550100,syn6,False,False,False,100,3,data,...,False,6.36,1.745893,1.000000,0.277167,,151015202550100,False,64.2504,3948


In [11]:
res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'topk', 'explainer_name']).mean()
res = res.drop(columns=['seed'])
#res[['mask_size', "mask_entropy", "max_avg"]]
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,num_test,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,f1_score,...,recall,mask_size_init,mask_entropy_init,max_avg_init,mask_size_transf,mask_entropy_transf,max_avg_transf,cc_ratio,time,number_of_edges
dataset,true_label_as_target,hard_mask,topk,explainer_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
syn1,False,False,1.0,basic_gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.054,0.712,0.039625,0.614392,0.093714,...,0.054667,231.120,3.327350,0.320548,1.000,0.000000,1.000000,0.500000,3.29116,4110.0
syn1,False,False,1.0,distance,100.0,-1.000,-1.000,-1.000000,-1.000000,0.200,0.716,0.121379,0.618316,0.265143,...,0.154667,4057.256,8.182173,0.076772,1.000,0.000000,1.000000,0.500000,0.01372,4110.0
syn1,False,False,1.0,gnnexplainer,100.0,-1.000,-1.000,-1.000000,-1.000000,0.064,0.686,0.043124,0.601242,0.084000,...,0.049000,231.120,3.320252,0.289070,1.000,0.000000,1.000000,0.500000,3.21740,4110.0
syn1,False,False,1.0,ig,100.0,-1.000,-1.000,-1.000000,-1.000000,0.408,0.752,0.336523,0.645657,0.220000,...,0.128333,3897.432,7.975210,0.439951,1.000,0.000000,1.000000,0.500000,0.28142,4110.0
syn1,False,False,1.0,occlusion,100.0,-1.000,-1.000,-1.000000,-1.000000,0.356,0.716,0.289072,0.617895,0.127429,...,0.074333,4100.900,8.301666,0.873993,1.000,0.000000,1.000000,0.500000,0.51370,4110.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
syn6,True,True,100.0,pgexplainer,100.0,0.238,0.526,0.204853,0.414221,-1.000,-1.000,-1.000000,-1.000000,0.022283,...,0.124000,752.132,5.416001,0.007118,100.000,4.605009,0.967905,0.500000,70.08234,3948.0
syn6,True,True,100.0,pgmexplainer,100.0,0.616,0.058,0.510130,0.054867,-1.000,-1.000,-1.000000,-1.000000,0.143500,...,0.856000,810.576,6.040893,0.281390,99.828,4.557152,0.516066,0.030631,3.78876,3948.0
syn6,True,True,100.0,random,100.0,0.014,0.618,0.008127,0.518846,-1.000,-1.000,-1.000000,-1.000000,0.004898,...,0.050800,3942.996,8.086957,0.459848,100.000,4.605142,0.983331,0.355481,0.00010,3948.0
syn6,True,True,100.0,sa,100.0,0.280,0.614,0.271012,0.538878,-1.000,-1.000,-1.000000,-1.000000,0.018546,...,0.143600,3582.228,7.844675,0.814871,100.000,4.476540,0.827015,0.050861,0.00374,3948.0


In [13]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/syn/topk/nc_syn_topk_5expe.csv', index=False)

### Mask property (size, entropy, avg max value, connected components)

In [15]:
logdir = f'/cluster/work/zhang/kamara/checkpoints/node_classification/syn/topk/topk_syn_cc/logs'
jsonpath = f'/cluster/home/kamara/Explain/configs/syn/topk/config_syn_topk_cc.json'
infos, initial_mask_infos, transformed_mask_infos, accuracy, fidelity = get_info_syn(logdir)
gnn_train, gnn_test = get_gnn_info(logdir)

print(list(fidelity.keys())[1])
ranges = get_param_ranges(jsonpath)

_explainer_name=occlusion


In [16]:
name = '{none}_explainer_name={explainer_name}'

In [17]:
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges"])
df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg", "cc_ratio"])
df_tranformed_mask_infos = get_df_results(transformed_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg", "cc_ratio", "topk"], selection="all")
df_acc = get_df_results(accuracy, ranges, name, metrics = ["f1_score", "precision", "recall", "topk"], selection="all")
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 
                                                           'fidelity_gnn_acc+', 'fidelity_gnn_acc-', 'fidelity_gnn_prob+', 'fidelity_gnn_prob-', 'topk'], selection="all")

In [18]:
df_mask_infos = pd.merge(df_initial_mask_infos, df_tranformed_mask_infos, suffixes=['_init', '_transf'], on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores = pd.merge(df_fid, df_acc, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "topk"])
scores = pd.merge(scores, df_mask_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask", "topk"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "num_test", "data_save_dir", "seed", "true_label_as_target", "hard_mask"])
scores
#scores.to_csv('/cluster/home/kamara/Explain/csv/node_classification/real/nc_real_sparsity_all.csv', index=False)


Unnamed: 0,none_x,explainer_name,params_list_x,dataset,true_label_as_target,hard_mask,explain_graph_x,num_test,seed,data_save_dir,...,explain_graph_transf,mask_size_transf,mask_entropy_transf,max_avg_transf,cc_ratio_transf,none,params_list,explain_graph,time,number_of_edges
0,,random,1234567891015202550100,syn1,True,True,False,20,0,data,...,False,1.0,0.000000,1.000000,0.500000,,1234567891015202550100,False,0.0002,4110
1,,random,1234567891015202550100,syn1,True,True,False,20,0,data,...,False,2.0,0.693147,0.999795,0.500000,,1234567891015202550100,False,0.0002,4110
2,,random,1234567891015202550100,syn1,True,True,False,20,0,data,...,False,3.0,1.098612,0.999626,0.495000,,1234567891015202550100,False,0.0002,4110
3,,random,1234567891015202550100,syn1,True,True,False,20,0,data,...,False,4.0,1.386294,0.999397,0.492857,,1234567891015202550100,False,0.0002,4110
4,,random,1234567891015202550100,syn1,True,True,False,20,0,data,...,False,5.0,1.609438,0.999178,0.494444,,1234567891015202550100,False,0.0002,4110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,,subgraphx,1234567891015202550100,syn1,True,True,False,20,0,data,...,False,9.6,1.990807,1.000000,0.276667,,1234567891015202550100,False,0.1252,4110
161,,subgraphx,1234567891015202550100,syn1,True,True,False,20,0,data,...,False,9.6,1.990807,1.000000,0.276667,,1234567891015202550100,False,0.1252,4110
162,,subgraphx,1234567891015202550100,syn1,True,True,False,20,0,data,...,False,9.6,1.990807,1.000000,0.276667,,1234567891015202550100,False,0.1252,4110
163,,subgraphx,1234567891015202550100,syn1,True,True,False,20,0,data,...,False,9.6,1.990807,1.000000,0.276667,,1234567891015202550100,False,0.1252,4110


In [19]:
res = scores.groupby(by=['dataset', "true_label_as_target", "hard_mask", 'topk', 'explainer_name']).mean()
res = res.drop(columns=['seed'])
#res[['mask_size', "mask_entropy", "max_avg"]]
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,num_test,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,fidelity_gnn_acc+,fidelity_gnn_acc-,fidelity_gnn_prob+,fidelity_gnn_prob-,f1_score,...,mask_size_init,mask_entropy_init,max_avg_init,cc_ratio_init,mask_size_transf,mask_entropy_transf,max_avg_transf,cc_ratio_transf,time,number_of_edges
dataset,true_label_as_target,hard_mask,topk,explainer_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
syn1,True,True,1.0,basic_gnnexplainer,20.0,0.05,0.55,0.021082,0.434927,-1.0,-1.0,-1.0,-1.0,0.085714,...,281.8,3.813970,0.425575,0.064982,1.0,0.000000,1.000000,0.500000,3.2343,4110.0
syn1,True,True,1.0,distance,20.0,0.10,0.55,-0.024823,0.450872,-1.0,-1.0,-1.0,-1.0,0.242857,...,4052.1,8.178819,0.075163,0.001429,1.0,0.000000,1.000000,0.500000,0.0145,4110.0
syn1,True,True,1.0,gnnexplainer,20.0,0.05,0.55,-0.004908,0.450872,-1.0,-1.0,-1.0,-1.0,0.114286,...,281.8,3.693550,0.325276,0.064982,1.0,0.000000,1.000000,0.500000,3.1841,4110.0
syn1,True,True,1.0,ig,20.0,0.10,0.55,0.002297,0.459073,-1.0,-1.0,-1.0,-1.0,0.028571,...,4097.1,8.204257,0.892313,0.001643,1.0,0.000000,1.000000,0.500000,0.1724,4110.0
syn1,True,True,1.0,occlusion,20.0,0.45,0.55,0.326566,0.447400,-1.0,-1.0,-1.0,-1.0,0.128571,...,4109.0,8.309863,0.924946,0.001429,1.0,0.000000,1.000000,0.500000,0.6812,4110.0
syn1,True,True,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
syn1,True,True,100.0,pgexplainer,20.0,0.25,0.55,0.122988,0.409935,-1.0,-1.0,-1.0,-1.0,0.015821,...,923.7,5.823172,0.016531,0.194258,100.0,4.604380,0.883109,0.500000,49.9946,4110.0
syn1,True,True,100.0,pgmexplainer,20.0,0.45,0.15,0.363850,0.146160,-1.0,-1.0,-1.0,-1.0,0.234723,...,802.3,5.698170,0.249729,0.020465,85.7,4.290599,0.510493,0.044795,3.1267,4110.0
syn1,True,True,100.0,random,20.0,0.05,0.55,0.006008,0.450486,-1.0,-1.0,-1.0,-1.0,0.007647,...,4104.7,8.126336,0.426719,0.001429,100.0,4.605144,0.982484,0.367692,0.0002,4110.0
syn1,True,True,100.0,sa,20.0,0.55,0.55,0.354240,0.455535,-1.0,-1.0,-1.0,-1.0,0.015490,...,3728.3,7.837630,0.891943,0.002128,100.0,4.475744,0.900255,0.058967,0.0036,4110.0


In [20]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/syn/topk/nc_syn_topk_mask_properties.csv', index=False)

### Accuracy top

In [10]:
def get_info_top(logdir):
    infos, initial_mask_infos, accuracy_top = {}, {}, {}
    for filename in os.listdir(logdir):
        if filename.endswith(".stdout") and filename.startswith("_"):
            _, infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__infos:')
            _, initial_mask_infos[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__initial_edge_mask_infos:')
            _, accuracy_top[filename.replace(".stdout", "")] = parseLogs(join(logdir, filename), kw='__accuracy_top:')
    return(infos, initial_mask_infos, accuracy_top)


In [12]:
logdir = f'/cluster/work/zhang/kamara/checkpoints/node_classification/syn/topk/top_edges/logs'
jsonpath = f'/cluster/home/kamara/Explain/configs/syn/topk/config_top_edges.json'
infos, initial_mask_infos, accuracy_top = get_info_top(logdir)

ranges = get_param_ranges(jsonpath)
print(list(accuracy_top.keys())[1])

Unable to replace NaNs
__infos:{"dataset": "syn5", "explainer": "subgraphx", "number_of_edges": 3410, "num_test": 20, "num_test_final": 20, "groundtruth target": "True", "time": 4.651, "edge_mask_sparsity_init": 0.9968035190615836, "edge_mask_size_init": 10.9, "edge_mask_connected_init": null}

Unable to replace NaNs
__initial_edge_mask_infos:{"mask_size": 10.9, "mask_entropy": 2.4763032054249767, "max_avg": 1.0, "cc_ratio": null}

Unable to replace NaNs
__infos:{"dataset": "syn5", "explainer": "subgraphx", "number_of_edges": 3410, "num_test": 20, "num_test_final": 20, "groundtruth target": "True", "time": 5.6368, "edge_mask_sparsity_init": 0.99524926686217, "edge_mask_size_init": 16.2, "edge_mask_connected_init": null}

Unable to replace NaNs
__initial_edge_mask_infos:{"mask_size": 16.2, "mask_entropy": 2.697263310621022, "max_avg": 1.0, "cc_ratio": null}

Unable to replace NaNs
__infos:{"dataset": "syn5", "explainer": "subgraphx", "number_of_edges": 3410, "num_test": 20, "num_test_fi

In [13]:
name = '{none}_explainer_name={explainer_name}_dataset={dataset}_seed={seed}'

In [14]:
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges"])
df_initial_mask_infos = get_df_results(initial_mask_infos, ranges, name, metrics = ['mask_size', "mask_entropy", "max_avg", "cc_ratio"])
df_acc_top = get_df_results(accuracy_top, ranges, name, metrics = ['f1_score', 'recall', 'precision'])

In [16]:
scores = df_acc_top
scores = pd.merge(scores, df_initial_mask_infos, on=["dataset", "explainer_name", "data_save_dir", "seed"])
scores = pd.merge(scores, df_infos, on=["dataset", "explainer_name", "data_save_dir", "seed"])
scores

Unnamed: 0,none_x,explainer_name,dataset,top_acc_x,seed,num_test_x,data_save_dir,f1_score,recall,precision,...,num_test_y,mask_size,mask_entropy,max_avg,cc_ratio,none,top_acc,num_test,time,number_of_edges
0,,random,syn1,True,0,20,data,0.000000,0.000000,0.000000,...,20,4104.70,8.126336,0.426719,0.001429,,True,20,0.0002,4110
1,,random,syn1,True,1,20,data,0.008333,0.008333,0.008333,...,20,4104.55,8.126173,0.504091,0.001429,,True,20,0.0002,4110
2,,random,syn1,True,2,20,data,0.008333,0.008333,0.008333,...,20,4105.15,8.126489,0.508101,0.001429,,True,20,0.0003,4110
3,,random,syn1,True,3,20,data,0.000000,0.000000,0.000000,...,20,4104.80,8.127410,0.483644,0.001429,,True,20,0.0002,4110
4,,random,syn1,True,4,20,data,0.008333,0.008333,0.008333,...,20,4104.10,8.126385,0.496112,0.001429,,True,20,0.0002,4110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,,subgraphx,syn4,True,3,20,data,0.251364,0.208333,0.327500,...,20,8.60,2.124361,1.000000,0.195833,,True,20,4.9707,1950
256,,subgraphx,syn5,True,3,20,data,0.238430,0.200000,0.298683,...,20,11.80,2.122123,1.000000,0.234311,,True,20,3.9769,3410
257,,subgraphx,syn6,True,2,20,data,0.627778,0.550000,0.875000,...,20,6.80,1.730496,1.000000,0.290000,,True,20,273.4840,3948
258,,subgraphx,syn6,True,3,20,data,0.583333,0.490000,0.820833,...,20,6.20,1.709424,1.000000,0.288333,,True,20,54.9894,3948


In [17]:
res = scores.groupby(by=['dataset', 'explainer_name']).mean()
res

Unnamed: 0_level_0,Unnamed: 1_level_0,seed,num_test_x,f1_score,recall,precision,num_test_y,mask_size,mask_entropy,max_avg,cc_ratio,num_test,time,number_of_edges
dataset,explainer_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
syn1,basic_gnnexplainer,2.0,20.0,0.309697,0.303333,0.317333,20.0,234.96,3.415766,0.329041,0.065935,20.0,4.25918,4110.0
syn1,distance,2.0,20.0,0.678455,0.668333,0.691,20.0,4055.18,8.180945,0.077491,0.001429,20.0,0.01904,4110.0
syn1,gnnexplainer,2.0,20.0,0.297576,0.291667,0.304667,20.0,234.96,3.436876,0.309713,0.065935,20.0,5.60452,4110.0
syn1,ig,2.0,20.0,0.446576,0.443333,0.450667,20.0,3917.76,8.06259,0.427868,0.001903,20.0,0.214,4110.0
syn1,occlusion,2.0,20.0,0.171667,0.171667,0.171667,20.0,4108.97,8.308191,0.875819,0.001429,20.0,0.72046,4110.0
syn1,pagerank,2.0,20.0,0.861667,0.861667,0.861667,20.0,3007.76,6.364215,0.001006,0.002749,20.0,0.02858,4110.0
syn1,pgexplainer,2.0,20.0,0.017,0.016667,0.0175,20.0,934.08,5.879527,0.020612,0.193272,20.0,67.99226,4110.0
syn1,pgmexplainer,2.0,20.0,0.543333,0.543333,0.543333,20.0,744.8,5.711957,0.253675,0.015661,20.0,4.82576,4110.0
syn1,random,2.0,20.0,0.005,0.005,0.005,20.0,4104.66,8.126559,0.483733,0.001429,20.0,0.00022,4110.0
syn1,sa,2.0,20.0,0.043333,0.043333,0.043333,20.0,3995.06,8.179686,0.92767,0.00162,20.0,0.00462,4110.0


In [19]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/syn/topk/nc_syn_top_edges.csv', index=False)

### Gc Layers

In [50]:
expe = "gc_layers"
logdir = f'/cluster/home/kamara/checkpoints/{expe}/logs'
jsonpath = f'/cluster/home/kamara/Explain/config_{expe}.json'
infos, accuracy, fidelity = get_info(logdir)
ranges = get_param_ranges(jsonpath)

print(list(accuracy.keys())[1])
name = '{none}_explainer_name={explainer_name}_num_gc_layers={num_gc_layers}'


_explainer_name=pagerank_num_gc_layers=1


In [51]:
df_acc = get_df_results(accuracy, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'ged', 'auc'])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-'])
scores = pd.merge(df_acc, df_fid, on=["dataset", "num_gc_layers", "explainer_name", "num_test_nodes", "data_save_dir"])
scores = scores.groupby(by=['num_gc_layers', 'explainer_name']).mean()
scores

Unnamed: 0_level_0,Unnamed: 1_level_0,num_test_nodes,f1_score,recall,precision,ged,auc,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-
num_gc_layers,explainer_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2,gnnexplainer,200.0,0.682879,0.6525,0.720833,4.065,0.998892,0.835,0.0,0.650817,0.0
2,pagerank,200.0,0.864167,0.864167,0.864167,2.435,0.999885,0.835,0.0,0.650817,0.0
3,gnnexplainer,200.0,0.329136,0.323333,0.33625,8.16,0.972214,0.61,0.015,0.582833,0.014268
3,pagerank,200.0,0.864167,0.864167,0.864167,2.43,0.999885,0.61,0.0,0.589879,-2.1e-05
4,gnnexplainer,200.0,0.055833,0.055833,0.055833,12.475,0.834369,0.75,0.29,0.571764,0.246716
4,pagerank,200.0,0.86,0.86,0.86,2.515,0.999885,0.83,0.0,0.640806,0.005695
5,gnnexplainer,200.0,0.006667,0.006667,0.006667,14.085,0.656783,0.55,0.575,0.512901,0.548674
5,pagerank,200.0,0.859167,0.859167,0.859167,2.525,0.999883,0.64,0.005,0.60821,-0.001958
6,gnnexplainer,200.0,0.003333,0.003333,0.003333,14.345,0.57559,0.61,0.645,0.545983,0.581469
6,pagerank,200.0,0.859167,0.859167,0.859167,2.525,0.999883,0.64,0.005,0.579183,-0.000794


## Graph Classification

### Sparsity

In [13]:
logdir = f'/cluster/home/kamara/checkpoints/graph_classification/sparsity_mask/logs'
jsonpath = f'/cluster/home/kamara/checkpoints/graph_classification/sparsity_mask/sweep.json'
infos, accuracy, fidelity = get_info(logdir)

print(list(accuracy.keys())[1])
name = '{none}_explainer_name={explainer_name}_sparsity={sparsity}_hard_mask={hard_mask}'

ranges = get_param_ranges(jsonpath)
df_infos = get_df_results(infos, ranges, name, metrics = ['time', "number_of_edges", "mask_sparsity_init", "non_zero_values_init"])
#df_acc_top = get_df_results(accuracy_top, ranges, name, metrics = ['f1_score', 'recall', 'precision', 'ged', 'auc'])
df_acc = get_df_results(accuracy, ranges, name, metrics = ['f1_score', 'recall', 'precision'])
df_fid = get_df_results(fidelity, ranges, name, metrics = ['fidelity_acc+', 'fidelity_acc-', 'fidelity_prob+', 'fidelity_prob-', 'mask_sparsity', 'expl_edges'])

#scores = pd.concat([df_acc, df_fid], axis=1, join="inner")
#scores = pd.merge(df_acc_top, df_acc, suffixes=['_top', ''], on=["dataset", "explainer_name", "num_test_nodes", "data_save_dir", "gpu", "sparsity"])
scores = df_acc
print(scores)
scores = pd.merge(scores, df_fid, on=["explainer_name", "num_test", "data_save_dir", "sparsity", "hard_mask"])
scores = pd.merge(scores, df_infos, on=["explainer_name", "num_test", "data_save_dir", "sparsity", "hard_mask"])
scores


_explainer_name=random_sparsity=0.7_hard_mask=True
Empty DataFrame
Columns: []
Index: []


KeyError: 'explainer_name'

In [7]:
res = scores.groupby(by=['dataset', 'sparsity', 'explainer_name', 'hard_mask']).mean()
res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,num_test,f1_score,recall,precision,fidelity_acc+,fidelity_acc-,fidelity_prob+,fidelity_prob-,mask_sparsity,expl_edges,time,number_of_edges,mask_sparsity_init,non_zero_values_init
dataset,sparsity,explainer_name,hard_mask,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
syn1,0.000,distance,False,100.0,0.005823,1.000000,0.002920,0.28,0.64,0.102925,0.358078,0.010139,4068.33,0.0127,4110.0,0.010139,4068.33
syn1,0.000,distance,True,100.0,0.005823,1.000000,0.002920,0.76,0.00,0.511743,0.000000,0.010139,4068.33,0.0135,4110.0,0.010139,4068.33
syn1,0.000,gnnexplainer,False,100.0,0.444157,1.000000,0.328940,0.56,0.56,0.296190,0.348878,0.953431,191.40,2.6306,4110.0,0.953431,191.40
syn1,0.000,gnnexplainer,True,100.0,0.444157,1.000000,0.328940,0.76,0.00,0.511743,0.000000,0.953431,191.40,2.7273,4110.0,0.953431,191.40
syn1,0.000,ig,False,100.0,0.005099,0.873333,0.002557,0.35,0.64,0.042806,0.412742,0.002487,4099.78,0.1824,4110.0,0.002487,4099.78
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
syn6,0.997,random,True,100.0,0.002500,0.004000,0.001818,0.00,0.64,-0.000110,0.599028,0.997214,11.00,0.0000,3948.0,0.000253,3947.00
syn6,0.997,sa,False,100.0,0.000000,0.000000,0.000000,0.00,0.64,0.001296,0.599028,0.997214,11.00,0.0031,3948.0,0.000750,3945.04
syn6,0.997,sa,True,100.0,0.000000,0.000000,0.000000,0.00,0.64,0.001296,0.599028,0.997214,11.00,0.0032,3948.0,0.000750,3945.04
syn6,0.997,subgraphx,False,100.0,0.661056,0.634000,0.700500,0.54,0.20,0.493473,0.183070,0.997829,8.57,0.8231,3948.0,0.997822,8.60


In [8]:
res = res.reset_index()
res.to_csv('/cluster/home/kamara/Explain/csv/node_classification/sparsity_mask_nc.csv', index=False)