In [1]:
%load_ext autoreload
%autoreload 2

# Import libraries

In [2]:
import pandas as pd
from tabulate import tabulate
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
eval_metrics_path = 'eval_metrics/'
results_path = 'results/'
runs = 10

In [4]:
def compute_mean_std(csv_files, output_file, save=False):
    """
    Reads multiple CSV files, computes the mean and standard deviation of each column,
    and prints the results in a formatted table.
    
    Parameters:
        csv_files (list): List of file paths to CSV files.
    """
    # Read all dataframes into a list
    dataframes = [pd.read_csv(file, header=[0, 1], index_col=[0, 1]) for file in csv_files]
    
    # Concatenate along a new axis to align indices and columns
    combined_data = pd.concat(dataframes, axis=0, keys=range(len(csv_files)), names=['Run'])
    
    # Compute mean and std
    mean_df = combined_data.groupby(level=[1, 2]).mean()
    std_df = combined_data.groupby(level=[1, 2]).std()
    
    # Create formatted results
    if save:
        formatted_results = mean_df.copy()
        for col in mean_df.columns:
            formatted_results[col] = mean_df[col].map(lambda x: f"{x:.4f}") + " ± " + std_df[col].map(lambda x: f"{x:.4f}")
        
        # Convert to tabulated format
        table_str = tabulate(formatted_results, headers='keys', tablefmt='grid')
        # Save to text file
        with open(output_file, "w") as f:
            f.write("Results Summary (Mean ± Std)\n\n")
            f.write(table_str)
        
        print(f"Results saved to {output_file}")
    
    return mean_df, std_df, combined_data

In [5]:
def overall_rank(df: pd.DataFrame, metrics=None) -> pd.DataFrame:
    # List of metric columns to process.
    if metrics is None:
        metrics = ['accuracy', 'iou', 'fid+', 'fid-', 'characterization_score']
    
    # Initialize a dictionary to hold cumulative points per explainer.
    scores = {explainer: 0 for explainer in df['Explainer'].unique()}
    
    # Count the total number of groups (Model x Dataset x Metric) for scaling.
    count_groups = 0

    # Create a copy of the input dataframe to hold the point values.
    df_points = df.copy()

    # Iterate over each combination of Model and Dataset.
    for model in df['Model'].unique():
        for dataset in df['Dataset'].unique():
            # Get the subset for this combination.
            mask = (df['Model'] == model) & (df['Dataset'] == dataset)
            subset = df_points.loc[mask, :]
            # Process each metric.
            for metric in metrics:
                count_groups += 1
                # For 'fid-' lower is better; for all other metrics, higher is better.
                ascending = True if metric == 'fid-' else False
                # Compute dense ranking for the metric in the current group.
                ranks = subset[metric].rank(method='dense', ascending=ascending)
                # Convert rank to points: best (rank 1) gets 4, rank 2 gets 3, etc.
                # Points = 4 - (rank - 1)
                points = 4 - (ranks - 1)
                # Update the metric column in df_points with the computed points.
                df_points.loc[mask, metric] = points
                # Also accumulate points for the overall score per explainer.
                for idx, rank in ranks.items():
                    pts = 4 - (rank - 1)
                    explainer = df_points.loc[idx, 'Explainer']
                    scores[explainer] += pts

    # Maximum possible points across all groups.
    max_points = 4 * count_groups
    # Scale the cumulative points to a 0 to 5 range.
    final_scores = {explainer: (points / max_points) * 5 for explainer, points in scores.items()}
    # Compute the average inference time for each explainer.
    avg_inference = df.groupby('Explainer')['inference_time'].mean().to_dict()
    
    # Build the final ranking dataframe.
    result_df = pd.DataFrame([
        {'Explainer': explainer, 'Score': score, 'Avg_Inference_Time': avg_inference.get(explainer, None)}
        for explainer, score in final_scores.items()
    ])
    result_df = result_df.sort_values(by='Score', ascending=False).reset_index(drop=True)
    
    # Return both dataframes.
    return result_df, df_points

# Node classification

In [81]:
common_name = '_nc_metrics_'
nc_flat = []

## Random Explainer

In [9]:
explainer_name = 'random_explainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_community,ba_community,...,tree_cycles,tree_cycles,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid
Unnamed: 0_level_1,Metric,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,accuracy,auroc,...,characterization_score,inference_time,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
random_explainer,gcn,0.461018,0.441665,0.254073,0.47619,0.5,0.639424,0.487805,0.000284,0.496683,0.510893,...,0.107359,0.000285,0.518371,0.369671,0.479461,0.013699,0.0,0.0,0.027027,0.000284
random_explainer,graphsage,0.499523,0.486359,0.280671,0.095238,0.119048,0.37371,0.171893,0.000276,0.486872,0.47757,...,0.107937,0.000281,0.509192,0.349238,0.470256,0.0,0.0,0.0,0.0,0.000284
random_explainer,gat,0.539426,0.538457,0.315361,0.428571,0.5,0.485381,0.461538,0.000279,0.491706,0.495885,...,0.107359,0.000293,0.530805,0.376686,0.490907,0.068493,0.013699,0.0,0.128091,0.000301
random_explainer,gin,0.528903,0.530531,0.303469,0.428571,0.404762,0.618871,0.498339,0.000342,0.507687,0.511287,...,0.284082,0.000357,0.496613,0.343376,0.455248,0.068493,0.109589,0.0,0.127202,0.000359


In [10]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
rnd_mean, rnd_std, rnd_combined = compute_mean_std(csv_files, output_file)

Results saved to results/random_explainer_nc_metrics_.txt


In [60]:
rnd_flat = rnd_mean.stack(level='Dataset').reset_index()
nc_flat.append(rnd_flat)
rnd_flat.head()

  rnd_flat = rnd_mean.stack(level='Dataset').reset_index()


Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,random_explainer,gat,ba_community,0.498543,0.50012,0.192,0.53619,0.530476,0.600946,0.500534,0.000286
1,random_explainer,gat,ba_shapes,0.502459,0.485745,0.276251,0.435714,0.430952,0.551012,0.49146,0.000285
2,random_explainer,gat,tree_cycles,0.507439,0.450684,0.426426,0.082857,0.077143,0.0,0.149759,0.000282
3,random_explainer,gat,tree_grid,0.509687,0.365865,0.472222,0.046575,0.034247,0.0,0.088078,0.000286
4,random_explainer,gcn,ba_community,0.502229,0.500916,0.196751,0.306667,0.311429,0.747378,0.424188,0.00028


## GNNExplainer

In [50]:
explainer_name = 'gnnexplainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_community,ba_community,...,tree_cycles,tree_cycles,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid
Unnamed: 0_level_1,Metric,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,accuracy,auroc,...,characterization_score,inference_time,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
gnnexplainer,gcn,0.741706,0.882697,0.469492,0.857143,0.0,0.160748,0.923077,0.511251,0.733793,0.625971,...,0.372093,0.535952,0.160293,0.470224,0.037815,0.123288,0.0,0.0,0.219512,0.519916
gnnexplainer,graphsage,0.665665,0.636917,0.186602,0.190476,0.0,0.075634,0.32,0.350071,0.704885,0.5916,...,0.25,0.360251,0.145685,0.426857,0.01285,0.0,0.0,0.0,0.0,0.340981
gnnexplainer,gat,0.734155,0.800888,0.36342,0.857143,0.0,0.204113,0.923077,0.62932,0.76349,0.589794,...,0.372093,0.639199,0.185832,0.525875,0.076438,0.136986,0.0,0.0,0.240964,0.642041
gnnexplainer,gin,0.685555,0.724772,0.310692,0.619048,0.0,0.278256,0.764706,0.331286,0.548054,0.529453,...,0.331707,0.348196,0.19065,0.479028,0.083641,0.136986,0.013699,0.0,0.240561,0.377107


In [51]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
gnn_mean, gnn_std, gnn_combined = compute_mean_std(csv_files, output_file)

In [61]:
gnn_flat = gnn_mean.stack(level='Dataset').reset_index()
nc_flat.append(gnn_flat)
gnn_flat.head()

  gnn_flat = gnn_mean.stack(level='Dataset').reset_index()


Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,gnnexplainer,gat,ba_community,0.763954,0.601594,0.155657,0.533333,0.561905,0.389495,0.481046,0.635675
1,gnnexplainer,gat,ba_shapes,0.734542,0.807019,0.365208,0.857143,0.0,0.207722,0.923077,0.625128
2,gnnexplainer,gat,tree_cycles,0.496197,0.654484,0.345502,0.228571,0.0,0.0,0.372093,0.625312
3,gnnexplainer,gat,tree_grid,0.185758,0.520721,0.076457,0.136986,0.0,0.0,0.240964,0.62874
4,gnnexplainer,gcn,ba_community,0.733309,0.629905,0.239633,0.428571,0.295238,0.395531,0.533013,0.524321


## PGExplainer

In [62]:
explainer_name = 'pgexplainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_community,ba_community,...,tree_cycles,tree_cycles,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid
Unnamed: 0_level_1,Metric,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,accuracy,auroc,...,characterization_score,inference_time,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
pgexplainer,gcn,0.413121,0.505789,0.405751,0.0,0.571429,0.736564,0.0,0.00291,0.407486,0.536988,...,0.0,0.002859,0.861814,0.356164,0.861814,0.0,0.0,0.0,0.0,0.002861
pgexplainer,graphsage,0.403842,0.5,0.403842,0.0,0.119048,0.245204,0.0,0.002158,0.399106,0.522157,...,0.0,0.002128,0.861814,0.356164,0.861814,0.0,0.0,0.0,0.0,0.00212
pgexplainer,gat,0.403842,0.5,0.403842,0.0,0.404762,0.786625,0.0,0.00344,0.325811,0.485933,...,0.0,0.003321,0.861814,0.356164,0.861814,0.0,0.0,0.0,0.0,0.003338
pgexplainer,gin,0.191323,0.209867,0.052373,0.0,0.619048,0.663773,0.0,0.002343,0.430218,0.592865,...,0.0,0.002331,0.753798,0.303612,0.753798,0.0,0.986301,0.0,0.0,0.002355


In [64]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
pge_mean, pge_std, pge_combined = compute_mean_std(csv_files, output_file)

In [65]:
pge_flat = pge_mean.stack(level='Dataset').reset_index()
nc_flat.append(pge_flat)
pge_flat.head()

  pge_flat = pge_mean.stack(level='Dataset').reset_index()


Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,pgexplainer,gat,ba_community,0.326722,0.493658,0.219914,0.0,0.533333,0.795337,0.0,0.00333
1,pgexplainer,gat,ba_shapes,0.403842,0.5,0.403842,0.0,0.404762,0.786622,0.0,0.003304
2,pgexplainer,gat,tree_cycles,0.727444,0.442857,0.727444,0.0,0.0,0.0,0.0,0.003272
3,pgexplainer,gat,tree_grid,0.861814,0.356164,0.861814,0.0,0.0,0.0,0.0,0.003258
4,pgexplainer,gcn,ba_community,0.363395,0.533533,0.252387,0.010476,0.602857,0.919676,0.020114,0.002837


## SubgraphX

In [66]:
from data_store import nc_datasets
datasets = nc_datasets
datasets.remove('ba_community')
datasets, nc_datasets

(['ba_shapes', 'tree_cycles', 'tree_grid'],
 ['ba_shapes', 'tree_cycles', 'tree_grid'])

In [67]:
explainer_name = 'subgraphX'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

FileNotFoundError: [Errno 2] No such file or directory: 'eval_metrics/subgraphX_nc_metrics_0.csv'

In [69]:
subx_means = []
for dataset in datasets:
    csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{dataset}_{i}.csv' for i in range(runs)]
    output_file = f'{results_path}{explainer_name}{common_name}{dataset}.txt'
    subx_mean, subx_std, subx_combined = compute_mean_std(csv_files, output_file)
    subx_means.append(subx_mean)

In [74]:
subx_flat = pd.concat([subx_mean.stack(level='Dataset').reset_index() for subx_mean in subx_means], axis=0).reset_index(drop=True)
nc_flat.append(subx_flat)
subx_flat.head()

  subx_flat = pd.concat([subx_mean.stack(level='Dataset').reset_index() for subx_mean in subx_means], axis=0).reset_index(drop=True)
  subx_flat = pd.concat([subx_mean.stack(level='Dataset').reset_index() for subx_mean in subx_means], axis=0).reset_index(drop=True)
  subx_flat = pd.concat([subx_mean.stack(level='Dataset').reset_index() for subx_mean in subx_means], axis=0).reset_index(drop=True)


Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,subgraphx,gat,ba_shapes,0.764747,0.678469,0.495948,0.797619,0.359524,0.255479,0.70991,18.96497
1,subgraphx,gcn,ba_shapes,0.780949,0.684938,0.519742,0.85,0.385714,0.37037,0.712221,17.306501
2,subgraphx,gin,ba_shapes,0.834538,0.753091,0.652661,0.883333,0.092857,0.126145,0.893297,15.541903
3,subgraphx,graphsage,ba_shapes,0.793288,0.725661,0.547145,0.659524,0.0,0.002561,0.794682,15.272357
4,subgraphx,gat,tree_cycles,0.757308,0.704224,0.673102,0.591429,0.022857,0.0,0.735778,0.225922


## CIExplainer

In [75]:
explainer_name = 'ciexplainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_shapes,ba_community,ba_community,...,tree_cycles,tree_cycles,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid,tree_grid
Unnamed: 0_level_1,Metric,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,accuracy,auroc,...,characterization_score,inference_time,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
ciexplainer,gcn,0.380453,0.77276,0.589558,0.0,0.0,0.53729,0.0,0.089431,0.106709,0.594899,...,0.0,0.03751,0.880755,0.683772,0.935489,0.0,0.0,0.0,0.0,0.03924
ciexplainer,graphsage,0.380453,0.872378,0.752646,0.0,0.0,0.812944,0.0,0.054234,0.106709,0.600988,...,0.157895,0.023266,0.880755,0.673573,0.930941,0.0,0.0,0.0,0.0,0.024334
ciexplainer,gat,0.380453,0.679372,0.435025,0.0,0.0,0.765896,0.0,0.117609,0.106709,0.570315,...,0.0,0.048991,0.880755,0.646745,0.911652,0.013699,0.0,0.0,0.027027,0.052016
ciexplainer,gin,0.380453,0.802617,0.625736,0.0,0.0,0.62811,0.0,0.057344,0.10657,0.594459,...,0.571429,0.023936,0.769795,0.676928,0.870401,0.69863,0.0,0.0,0.822581,0.025334


In [76]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
ci_mean, ci_std, ci_combined = compute_mean_std(csv_files, output_file)

In [77]:
ci_flat = ci_mean.stack(level='Dataset').reset_index()
nc_flat.append(ci_flat)
ci_flat.head()

  ci_flat = ci_mean.stack(level='Dataset').reset_index()


Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,ciexplainer,gat,ba_community,0.106709,0.576003,0.180856,0.0,0.0,0.751071,0.0,5.04674
1,ciexplainer,gat,ba_shapes,0.380453,0.745986,0.535092,0.0,0.0,0.722992,0.0,0.119378
2,ciexplainer,gat,tree_cycles,0.680646,0.770457,0.826939,0.0,0.0,0.0,0.0,0.046867
3,ciexplainer,gat,tree_grid,0.880755,0.659811,0.919534,0.009589,0.0,0.0,0.018847,0.049912
4,ciexplainer,gcn,ba_community,0.106709,0.583855,0.202321,0.0,0.0,0.905675,0.0,3.766492


## Ranking Explainers for Node Classification

In [83]:
nc_flat = pd.concat(nc_flat, axis=0).reset_index(drop=True)
nc_flat = nc_flat[~nc_flat['Dataset'].isin(['ba_community', 'tree_cycles'])].reset_index(drop=True).drop(['auroc', 'unfaithfulness'], axis=1)
nc_flat.head()

array(['ba_shapes', 'tree_grid'], dtype=object)

In [189]:
nc_rank, nc_points = overall_rank(nc_flat)
# nc_points = nc_points[['Model', 'Explainer', 'Dataset', 'accuracy', 'iou', 'fid+', 'fid-', 'characterization_score', 'inference_time']]
# nc_points.sort_values(by=['Dataset', 'Model']).to_csv('nc_points.csv', index=False)
# nc_rank.to_csv('nc_rank.csv', index=False)
nc_rank

Unnamed: 0,Explainer,Score,Avg_Inference_Time
0,subgraphx,3.75,8.497048
1,ciexplainer,3.25,0.056781
2,gnnexplainer,3.125,0.460729
3,random_explainer,2.1875,0.000298
4,pgexplainer,1.9375,0.002607


# Graph classification

In [11]:
common_name = '_gc_metrics_'
gc_flat = []

## Random Explainer

In [12]:
explainer_name = 'random_explainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,mutag,mutag,mutag,mutag,mutag,mutag,mutag,mutag
Unnamed: 0_level_1,Metric,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
random_explainer,gcn,0.4984,0.4936,0.170185,0.0,0.0,0.0,0.0,0.000264,0.503845,0.511774,0.146086,0.598039,0.627451,0.0,0.459101,0.000258
random_explainer,graphsage,0.49,0.4754,0.158976,0.06,0.05,0.0,0.112871,0.000267,0.498666,0.515265,0.15063,0.676471,0.656863,0.0,0.455317,0.00026
random_explainer,gat,0.5124,0.5161,0.181516,0.07,0.08,0.0,0.130101,0.000254,0.496342,0.513759,0.151074,0.607843,0.568627,0.0,0.504624,0.000253
random_explainer,gin,0.4852,0.4669,0.152349,0.08,0.04,0.0,0.147692,0.000332,0.515654,0.49886,0.141601,0.872549,0.882353,0.0,0.207338,0.000329


In [13]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
rnd_mean, rnd_std, rnd_combined = compute_mean_std(csv_files, output_file)

In [14]:
rnd_flat = rnd_mean.stack(level='Dataset').reset_index()
gc_flat.append(rnd_flat)
rnd_flat.head()

  rnd_flat = rnd_mean.stack(level='Dataset').reset_index()


Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,random_explainer,gat,ba_2motif,0.4982,0.4992,0.170766,0.053,0.055,0.0,0.099748,0.000254
1,random_explainer,gat,mutag,0.501376,0.50051,0.14695,0.557843,0.560784,0.0,0.490966,0.000256
2,random_explainer,gcn,ba_2motif,0.4948,0.49353,0.166638,0.002,0.0,0.0,0.00396,0.000257
3,random_explainer,gcn,mutag,0.496521,0.495646,0.140957,0.584314,0.559804,0.0,0.501393,0.000256
4,random_explainer,gin,ba_2motif,0.5,0.49127,0.1668,0.068,0.074,0.0,0.126122,0.000327


## GNNExplainer

In [27]:
explainer_name = 'gnnexplainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif
Unnamed: 0_level_1,Metric,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
gnnexplainer,gcn,0.4296,0.3321,0.018791,0.07,0.0,0.0,0.130841,1.586144
gnnexplainer,graphsage,0.5468,0.4522,0.003024,0.02,0.0,0.0,0.039216,1.009598
gnnexplainer,gat,0.6216,0.4437,0.030756,0.0,0.0,0.0,0.0,1.806368
gnnexplainer,gin,0.7972,0.4869,0.0,0.0,0.26,0.0,0.0,1.024715


In [28]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
gnn_mean, gnn_std, gnn_combined = compute_mean_std(csv_files, output_file)

Results saved to results/gnnexplainer_ba2_gc_metrics_.txt


In [23]:
gnn_flat = gnn_mean.stack(level='Dataset').reset_index()
gc_flat.append(gnn_flat)
gnn_flat.head()

Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,gnnexplainer,gat,mutag,0.728559,0.519515,0.158449,0.54902,0.418627,0.0,0.563478,0.601023
1,gnnexplainer,gcn,mutag,0.885144,0.918891,0.448298,0.584314,0.272549,0.0,0.647648,0.51855
2,gnnexplainer,gin,mutag,0.6041,0.39502,0.051281,0.934314,0.940196,0.0,0.11226,0.346988
3,gnnexplainer,graphsage,mutag,0.75772,0.691789,0.084475,0.498039,0.035294,0.0,0.656428,0.33642
4,gnnexplainer,gat,ba_2motif,0.61816,0.4281,0.030247,0.0,0.0,0.0,0.0,1.831169


## PGExplainer

In [29]:
explainer_name = 'pgexplainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,mutag,mutag,mutag,mutag,mutag,mutag,mutag,mutag
Unnamed: 0_level_1,Metric,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
pgexplainer,gcn,0.9792,0.948,0.896,0.0,0.0,0.0,0.0,0.002145,0.794918,0.689089,0.303918,0.196078,0.284314,0.0,0.307822,0.002215
pgexplainer,graphsage,0.9796,0.9865,0.914667,0.0,0.47,0.0,0.0,0.001421,0.208067,0.314066,0.091442,0.029412,0.764706,0.0,0.052288,0.001475
pgexplainer,gat,0.8132,0.88325,0.711524,0.0,0.57,0.0,0.0,0.00258,0.544204,0.496015,0.240211,0.0,0.588235,0.0,0.0,0.002652
pgexplainer,gin,0.5336,0.69875,0.336089,0.0,0.55,0.0,0.0,0.001573,0.453216,0.275335,0.000613,0.0,0.960784,0.0,0.0,0.001643


In [31]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
pge_mean, pge_std, pge_combined = compute_mean_std(csv_files, output_file)

In [33]:
pge_flat = pge_mean.stack(level='Dataset').reset_index()
gc_flat.append(pge_flat)
pge_flat.head()

Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,pgexplainer,gat,mutag,0.568331,0.612259,0.311178,0.0,0.588235,0.0,0.0,0.002654
1,pgexplainer,gcn,mutag,0.823391,0.51682,0.030392,0.019608,0.584314,0.0,0.030782,0.00221
2,pgexplainer,gin,mutag,0.411994,0.334913,0.042408,0.14902,0.785294,0.0,0.163578,0.00164
3,pgexplainer,graphsage,mutag,0.261614,0.493933,0.15059,0.787255,0.178431,0.0,0.794844,0.001469
4,pgexplainer,gat,ba_2motif,0.2,0.5,0.2,0.0,0.0,0.0,0.0,0.002614


## SubgraphX

In [34]:
explainer_name = 'subgraphx'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,mutag,mutag,mutag,mutag,mutag,mutag,mutag,mutag
Unnamed: 0_level_1,Metric,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
subgraphx,gcn,0.7028,0.515,0.182698,0.0,0.14,0.0,0.0,2.109811,0.680954,0.453722,0.081307,0.156863,0.5,0.0,0.238806,2.051497
subgraphx,graphsage,0.7172,0.5375,0.223321,0.0,0.22,0.0,0.0,1.849006,0.693008,0.463227,0.088524,0.127451,0.470588,0.0,0.205443,1.895684
subgraphx,gat,0.7112,0.53075,0.214024,0.01,0.23,0.0,0.019744,2.348726,0.680667,0.449292,0.075117,0.264706,0.470588,0.0,0.352941,2.24622
subgraphx,gin,0.7272,0.55125,0.243738,0.0,0.27,0.0,0.0,1.942103,0.670749,0.435641,0.054928,0.156863,0.872549,0.0,0.140636,1.900374


In [36]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
subx_mean, subx_std, subx_combined = compute_mean_std(csv_files, output_file)

In [38]:
subx_flat = subx_mean.stack(level='Dataset').reset_index()
gc_flat.append(subx_flat)
subx_flat.head()

Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,subgraphx,gat,mutag,0.681245,0.447141,0.07174,0.256863,0.485294,0.0,0.342126,2.216659
1,subgraphx,gcn,mutag,0.687051,0.457653,0.080293,0.17549,0.47451,0.0,0.261608,2.094001
2,subgraphx,gin,mutag,0.680503,0.444928,0.065365,0.166667,0.836275,0.0,0.163633,1.950709
3,subgraphx,graphsage,mutag,0.686904,0.458026,0.081083,0.134314,0.503922,0.0,0.210319,1.933733
4,subgraphx,gat,ba_2motif,0.72096,0.5406,0.223126,0.0,0.264,0.0,0.0,1.051946


## CIExplainer

In [40]:
explainer_name = 'ciexplainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,ba_2motif,mutag,mutag,mutag,mutag,mutag,mutag,mutag,mutag
Unnamed: 0_level_1,Metric,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
ciexplainer,gcn,0.2,0.47315,0.106111,0.0,0.0,0.0,0.0,0.102509,0.411281,0.413316,0.092463,0.0,0.0,0.0,0.0,0.049631
ciexplainer,graphsage,0.2,0.52145,0.195476,0.0,0.0,0.0,0.0,0.057841,0.411281,0.393242,0.082924,0.0,0.0,0.0,0.0,0.029857
ciexplainer,gat,0.2,0.4947,0.166786,0.0,0.0,0.0,0.0,0.122662,0.411281,0.416959,0.094535,0.0,0.0,0.0,0.0,0.064182
ciexplainer,gin,0.2,0.51035,0.183095,0.0,0.0,0.0,0.0,0.057581,0.411281,0.394822,0.083929,0.0,0.0,0.0,0.0,0.030125


In [41]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
ci_mean, ci_std, ci_combined = compute_mean_std(csv_files, output_file)

In [42]:
ci_flat = ci_mean.stack(level='Dataset').reset_index()
gc_flat.append(ci_flat)
ci_flat.head()

  ci_flat = ci_mean.stack(level='Dataset').reset_index()


Metric,Explainer,Model,Dataset,accuracy,auroc,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,ciexplainer,gat,ba_2motif,0.2,0.50598,0.171794,0.0,0.0,0.0,0.0,0.123686
1,ciexplainer,gat,mutag,0.411281,0.407818,0.08946,0.0,0.0,0.0,0.0,0.065192
2,ciexplainer,gcn,ba_2motif,0.2,0.48764,0.151861,0.0,0.0,0.0,0.0,0.095096
3,ciexplainer,gcn,mutag,0.409615,0.408473,0.089303,0.0,0.0,0.0,0.0,0.049808
4,ciexplainer,gin,ba_2motif,0.2,0.541795,0.230476,0.0,0.0,0.0,0.0,0.058015


## Ranking Explainers for Graph Classification

In [43]:
gc_flat = pd.concat(gc_flat, axis=0).reset_index(drop=True)
gc_flat.drop(['auroc', 'unfaithfulness'], axis=1, inplace=True)
gc_flat.head()

Metric,Explainer,Model,Dataset,accuracy,iou,fid+,fid-,characterization_score,inference_time
0,random_explainer,gat,ba_2motif,0.4982,0.170766,0.053,0.055,0.099748,0.000254
1,random_explainer,gat,mutag,0.501376,0.14695,0.557843,0.560784,0.490966,0.000256
2,random_explainer,gcn,ba_2motif,0.4948,0.166638,0.002,0.0,0.00396,0.000257
3,random_explainer,gcn,mutag,0.496521,0.140957,0.584314,0.559804,0.501393,0.000256
4,random_explainer,gin,ba_2motif,0.5,0.1668,0.068,0.074,0.126122,0.000327


In [45]:
gc_rank, gc_points = overall_rank(gc_flat)
# gc_points = gc_points[['Model', 'Explainer', 'Dataset', 'accuracy', 'iou', 'fid+', 'fid-', 'characterization_score', 'inference_time']]
# gc_points.sort_values(by=['Dataset', 'Model']).to_csv('gc_points.csv', index=False)
# gc_rank.to_csv('gc_rank.csv', index=False)
gc_rank

Unnamed: 0,Explainer,Score,Avg_Inference_Time
0,gnnexplainer,3.34375,0.904211
1,random_explainer,3.1875,0.000275
2,subgraphx,2.9375,1.476803
3,pgexplainer,2.59375,0.001967
4,ciexplainer,2.28125,0.063673


# Link Prediction

In [10]:
common_name = '_lp_metrics_'
lp_flat = []

## Random Explainer

In [133]:
explainer_name = 'random_explainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,medref,medref,medref,medref,medref,cora,cora,cora,cora,cora
Unnamed: 0_level_1,Metric,fid+,fid-,unfaithfulness,characterization_score,inference_time,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
random_explainer,gcn,0.03,0.025,0.0,0.058209,0.000262,0.21,0.2,0.0,0.332673,0.000257
random_explainer,graphsage,0.025,0.025,0.0,0.04875,0.000268,0.135,0.105,0.0,0.234612,0.000508
random_explainer,gat,0.03,0.03,0.0,0.0582,0.000266,0.09,0.08,0.0,0.16396,0.000329
random_explainer,gin,0.04,0.035,0.0,0.076816,0.000313,0.145,0.125,0.0,0.248775,0.000282


In [134]:
csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}{common_name}.txt'
rnd_mean, rnd_std, rnd_combined = compute_mean_std(csv_files, output_file)

In [135]:
rnd_flat = rnd_mean.stack(level='Dataset').reset_index()
lp_flat.append(rnd_flat)
rnd_flat.head()

  rnd_flat = rnd_mean.stack(level='Dataset').reset_index()


Metric,Explainer,Model,Dataset,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,random_explainer,gat,cora,0.0765,0.073,0.0,0.141129,0.000264
1,random_explainer,gat,medref,0.03,0.03,0.0,0.0582,0.000256
2,random_explainer,gcn,cora,0.2045,0.21,0.0,0.324554,0.000254
3,random_explainer,gcn,medref,0.0285,0.028,0.0,0.055257,0.000255
4,random_explainer,gin,cora,0.1415,0.1375,0.0,0.242785,0.000285


## GNNExplainer

In [6]:
explainer_name = 'gnnexplainer'
filepath = f'{eval_metrics_path}{explainer_name}_2{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,cora,cora,cora,cora,cora,cora,medref,medref,medref,medref,medref,medref
Unnamed: 0_level_1,Metric,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
gnnexplainer,gcn,0,0.135,0.065,0.0,0.235935,1.373453,0,0.015,0.015,0.0,0.02955,1.367822
gnnexplainer,graphsage,0,0.035,0.07,0.0,0.067461,2.953802,0,0.025,0.025,0.0,0.04875,0.868911
gnnexplainer,gat,0,0.04,0.08,0.0,0.076667,1.536021,0,0.03,0.03,0.0,0.0582,1.72224
gnnexplainer,gin,0,0.085,0.18,0.0,0.154033,3.019116,0,0.06,0.03,0.0,0.11301,0.932831


In [9]:
csv_files = [f'{eval_metrics_path}{explainer_name}_2{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}_2{common_name}.txt'
gnn_mean, gnn_std, gnn_combined = compute_mean_std(csv_files, output_file, save=True)

Results saved to results/gnnexplainer_2_lp_metrics_.txt


In [138]:
gnn_flat = gnn_mean.stack(level='Dataset').reset_index()
lp_flat.append(gnn_flat)
gnn_flat.head()

  gnn_flat = gnn_mean.stack(level='Dataset').reset_index()


Metric,Explainer,Model,Dataset,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,gnnexplainer,gat,cora,0.046,0.1,0.0,0.087462,0.534283
1,gnnexplainer,gat,medref,0.03,0.03,0.0,0.0582,0.583074
2,gnnexplainer,gcn,cora,0.1335,0.1,0.0,0.232481,0.448388
3,gnnexplainer,gcn,medref,0.01,0.015,0.0,0.019799,0.485992
4,gnnexplainer,gin,cora,0.1895,0.215,0.0,0.305277,1.003182


## PGExplainer

In [11]:
explainer_name = 'pgexplainer'
filepath = f'{eval_metrics_path}{explainer_name}_2{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

Unnamed: 0_level_0,Dataset,cora,cora,cora,cora,cora,cora,medref,medref,medref,medref,medref,medref
Unnamed: 0_level_1,Metric,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
Explainer,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
pgexplainer,gcn,0,0.0,0.095,0.0,0.0,0.002368,0,0.0,0.015,0.0,0.0,0.002591
pgexplainer,graphsage,0,0.0,0.19,0.0,0.0,0.003596,0,0.0,0.025,0.0,0.0,0.00206
pgexplainer,gat,0,0.0,0.09,0.0,0.0,0.002748,0,0.0,0.03,0.0,0.0,0.002786
pgexplainer,gin,0,0.0,0.175,0.0,0.0,0.003704,0,0.0,0.03,0.0,0.0,0.002146


In [13]:
csv_files = [f'{eval_metrics_path}{explainer_name}_2{common_name}{i}.csv' for i in range(runs)]
output_file = f'{results_path}{explainer_name}_2{common_name}.txt'
pge_mean, pge_std, pge_combined = compute_mean_std(csv_files, output_file, save=True)

Results saved to results/pgexplainer_2_lp_metrics_.txt


In [141]:
pge_flat = pge_mean.stack(level='Dataset').reset_index()
lp_flat.append(pge_flat)
pge_flat.head()

  pge_flat = pge_mean.stack(level='Dataset').reset_index()


Metric,Explainer,Model,Dataset,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,pgexplainer,gat,cora,0.0,0.105,0.0,0.0,0.002744
1,pgexplainer,gat,medref,0.0,0.03,0.0,0.0,0.002882
2,pgexplainer,gcn,cora,0.0,0.14,0.0,0.0,0.002355
3,pgexplainer,gcn,medref,0.0,0.015,0.0,0.0,0.002576
4,pgexplainer,gin,cora,0.0,0.205,0.0,0.0,0.003704


## CIExplainer

In [142]:
explainer_name = 'ciexplainer'
filepath = f'{eval_metrics_path}{explainer_name}{common_name}{0}.csv'
eval_df = pd.read_csv(filepath, header=[0, 1], index_col=[0, 1])
eval_df

FileNotFoundError: [Errno 2] No such file or directory: 'eval_metrics/ciexplainer_lp_metrics_0.csv'

In [144]:
from data_store import lp_datasets
ci_means = []
for dataset in lp_datasets:
    csv_files = [f'{eval_metrics_path}{explainer_name}{common_name}{dataset}_{i}.csv' for i in range(runs)]
    output_file = f'{results_path}{explainer_name}{common_name}{dataset}.txt'
    ci_mean, ci_std, ci_combined = compute_mean_std(csv_files, output_file)
    ci_means.append(ci_mean)

In [145]:
ci_flat = pd.concat([ci_mean.stack(level='Dataset').reset_index() for ci_mean in ci_means], axis=0).reset_index(drop=True)
lp_flat.append(ci_flat)
ci_flat.head()

  ci_flat = pd.concat([ci_mean.stack(level='Dataset').reset_index() for ci_mean in ci_means], axis=0).reset_index(drop=True)
  ci_flat = pd.concat([ci_mean.stack(level='Dataset').reset_index() for ci_mean in ci_means], axis=0).reset_index(drop=True)


Metric,Explainer,Model,Dataset,iou,fid+,fid-,unfaithfulness,characterization_score,inference_time
0,ciexplainer,gat,cora,0.0,0.0,0.0,0.0,0.0,0.150288
1,ciexplainer,gcn,cora,0.0,0.0,0.0,0.0,0.0,0.11505
2,ciexplainer,gin,cora,0.0,0.0,0.0055,0.0,0.0,0.073897
3,ciexplainer,graphsage,cora,0.0,0.0,0.0,0.0,0.0,0.076307
4,ciexplainer,gat,medref,0.0,0.002,0.03,0.0,0.003979,6.9057


## Ranking Explainers for Link Prediction

In [148]:
lp_flat = pd.concat(lp_flat, axis=0).reset_index(drop=True)
lp_flat.drop(['unfaithfulness', 'iou'], axis=1, inplace=True)
lp_flat.head()

Metric,Explainer,Model,Dataset,fid+,fid-,characterization_score,inference_time,iou
0,random_explainer,gat,cora,0.0765,0.073,0.141129,0.000264,
1,random_explainer,gat,medref,0.03,0.03,0.0582,0.000256,
2,random_explainer,gcn,cora,0.2045,0.21,0.324554,0.000254,
3,random_explainer,gcn,medref,0.0285,0.028,0.055257,0.000255,
4,random_explainer,gin,cora,0.1415,0.1375,0.242785,0.000285,


In [191]:
lp_rank, lp_points = overall_rank(lp_flat, metrics=['fid+', 'fid-', 'characterization_score'])
# lp_points = lp_points[['Model', 'Explainer', 'Dataset', 'fid+', 'fid-', 'characterization_score', 'inference_time']]
# lp_points.sort_values(by=['Dataset', 'Model']).to_csv('lp_points.csv', index=False)
# lp_rank.to_csv('lp_rank.csv', index=False)
lp_rank

Unnamed: 0,Explainer,Score,Avg_Inference_Time
0,gnnexplainer,4.270833,0.586458
1,random_explainer,4.166667,0.00027
2,ciexplainer,3.4375,2.446676
3,pgexplainer,2.8125,0.00277


In [194]:
medref_flat = lp_flat[lp_flat['Dataset'] == 'medref'].reset_index(drop=True)
medref_rank, medref_points = overall_rank(medref_flat, metrics=['fid+', 'fid-', 'characterization_score'])
medref_rank

Unnamed: 0,Explainer,Score,Avg_Inference_Time
0,gnnexplainer,4.791667,0.422761
1,random_explainer,4.375,0.000271
2,ciexplainer,3.541667,4.789466
3,pgexplainer,3.333333,0.002445
