In [2]:
import pandas as pd
import numpy as np

no_of_runs = 10

## MODIFIED GNN-SUBNET

The following cell obtains all data from the results files and pre-comutes it. The pre-processed data will then be aggregated into a single table of results

In [3]:
def obtain_results_from_folder(path_to_results):
    fidelities = []
    sparsities = []
    validities_plus = []
    validities_minus = []

    for i in range(0, no_of_runs):

        # Save the fidelity
        fidelity_file = f'{path_to_results}/{i}_fidelities.csv'
        current_fidelity = pd.read_csv(fidelity_file, index_col = False, header=None)
        fidelities.append(current_fidelity)

        # Save the sparsity
        sparsity_file = f'{path_to_results}/{i}_sparsities.csv'
        current_sparsity = pd.read_csv(sparsity_file, index_col = False, header=None)
        sparsities.append(current_sparsity.mean(axis=None))

        # Pre-process and save Validity+
        validity_plus_file = f'{path_to_results}/{i}_validity_plus.csv'
        current_validity_plus = pd.read_csv(validity_plus_file, index_col=False, header=None)
        validities_plus.append(current_validity_plus)

        # Pre-process and save Validity-
        validity_minus_file = f'{path_to_results}/{i}_validity_minus.csv'
        current_validity_minus = pd.read_csv(validity_minus_file, index_col=False, header=None)
        validities_minus.append(current_validity_minus)

    # Obtain a single data frame per metric
    validities_plus = pd.concat(validities_plus)
    validities_plus = validities_plus.drop_duplicates()

    validities_minus = pd.concat(validities_minus)
    validities_minus = validities_minus.drop_duplicates()

    fidelities = pd.concat(fidelities)
    fidelities = fidelities.drop_duplicates()

    final_table = validities_plus

    final_table = final_table.rename(columns={0:'Iteration', 1:'Model Accuracy', 2:'Threshold Value', 3: 'validity+'})

    final_table['validity-'] = validities_minus.iloc[:, 3]

    fidelities_to_append = []
    sparsities_to_append = []
    for i in final_table['Iteration']:
        fidelities_to_append.append(fidelities.iloc[np.int_(i), 2])
        sparsities_to_append.append(sparsities[np.int_(i)])
    final_table['fidelity'] = fidelities_to_append
    final_table['sparsity'] = sparsities_to_append

    return final_table

The table below indicates the four metrics for each iteration of the explainer, as well as the threshold values 30 and 50.

In [4]:
path_to_results_modified = "results_modified_alg_final"

final_table_modified_alg = obtain_results_from_folder(path_to_results_modified)

final_table_modified_alg

Unnamed: 0,Iteration,Model Accuracy,Threshold Value,validity+,validity-,fidelity,sparsity
0,0.0,86.25,30.0,0.15,0.95,0.8725,0.01638
1,0.0,86.25,50.0,0.1125,0.9125,0.8725,0.01638
2,1.0,68.75,30.0,0.1125,0.9,0.9425,0.015952
3,1.0,68.75,50.0,0.1125,0.9125,0.9425,0.015952
4,2.0,76.25,30.0,0.85,0.9375,0.9425,0.013023
5,2.0,76.25,50.0,0.925,0.9375,0.9425,0.013023
6,3.0,78.75,30.0,0.25,0.8375,0.86375,0.025844
7,3.0,78.75,50.0,0.5875,0.85,0.86375,0.025844
8,4.0,62.5,30.0,0.175,0.9125,0.92875,0.02552
9,4.0,62.5,50.0,0.175,0.925,0.92875,0.02552


This table contains an aggregation of all metrics by mean for each run, per threshold.

In [5]:
final_table_reduced_modified_alg = final_table_modified_alg.groupby(['Threshold Value']).mean()
final_table_reduced_modified_alg = final_table_reduced_modified_alg.drop(['Iteration'], axis=1)
final_table_reduced_modified_alg

Unnamed: 0_level_0,Model Accuracy,validity+,validity-,fidelity,sparsity
Threshold Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
30.0,75.75,0.32375,0.815,0.861375,0.019166
50.0,75.75,0.3975,0.8725,0.861375,0.019166


## GNN-SUBNET

The following code cells repeat the process to extract data related to gnn-subnet.

In [6]:
path_to_results_gnn_subnet = "results_gnn_subnet_final"

final_table_gnn_subnet = obtain_results_from_folder(path_to_results_gnn_subnet)

final_table_gnn_subnet

Unnamed: 0,Iteration,Model Accuracy,Threshold Value,validity+,validity-,fidelity,sparsity
0,0.0,63.75,30.0,0.1125,0.5625,0.67,0.04468
1,0.0,63.75,50.0,0.075,0.65,0.67,0.04468
2,1.0,76.25,30.0,0.2375,0.775,0.83625,0.050157
3,1.0,76.25,50.0,0.25,0.9,0.83625,0.050157
4,2.0,86.25,30.0,0.5375,0.95,0.97375,0.038774
5,2.0,86.25,50.0,0.875,0.9875,0.97375,0.038774
6,3.0,76.25,30.0,0.2125,0.4375,0.87875,0.029608
7,3.0,76.25,50.0,0.4875,0.8875,0.87875,0.029608
8,4.0,73.75,30.0,0.2875,0.625,0.85625,0.045901
9,4.0,73.75,50.0,0.675,0.7875,0.85625,0.045901


In [7]:
final_table_reduced_gnn_subnet = final_table_gnn_subnet.groupby(['Threshold Value']).mean()
final_table_reduced_gnn_subnet = final_table_reduced_gnn_subnet.drop(['Iteration'], axis=1)
final_table_reduced_gnn_subnet

Unnamed: 0_level_0,Model Accuracy,validity+,validity-,fidelity,sparsity
Threshold Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
30.0,75.0,0.25875,0.73625,0.855625,0.042664
50.0,75.0,0.47125,0.85375,0.855625,0.042664


## Other results

The code cells below obtain the results from other runs of the two explainers, pre-process and aggregate them
These results require more pre-processing than the other two folders

In [8]:
def validity_plus_aggregation(validity_plus_row):
    """
    Computes Validity+ in terms of its confusion matrix as follows:
        validity+ = (mat[1,0] + mat[0,1] ) / (mat[0,0] + mat[0,1] + mat[1,0] + mat[1,1])
    """
    return (validity_plus_row[4] + validity_plus_row[5]) / (validity_plus_row[3:7].sum())

def validity_minus_aggregation(validity_minus_row):
    """
    Computes Validity- in terms of its confusion matrix as follows:
        validity- = (mat[0,0] + mat[1,1] ) /  (mat[0,0] + mat[0,1] + mat[1,0] + mat[1,1])
    """
    return (validity_minus_row[3] + validity_minus_row[6]) / (validity_minus_row[3:7].sum())

In [47]:
def obtain_processed_results(path_to_results, pre_process_fidelities, pre_process_validities):

    fidelities = []
    fidelity_values_only = []
    sparsities = []
    validities_plus = []
    validities_minus = []

    for i in range(0, no_of_runs):

        # Save the fidelity
        fidelity_file = f'{path_to_results}/{i}_fidelities.csv'
        current_fidelity = pd.read_csv(fidelity_file, index_col = False, header=None)
        if i in pre_process_fidelities:
            fidelity_values_only.append(pd.DataFrame([[i, current_fidelity.mean(axis=None)]], columns=['Iteration', 'Fidelity']))
        else:
            fidelities.append(current_fidelity.drop(columns=[1]).rename(columns={0:'Iteration', 2:'Fidelity'}))

        # Save the sparsity
        sparsity_file = f'{path_to_results}/{i}_sparsities.csv'
        current_sparsity = pd.read_csv(sparsity_file, index_col = False, header=None)
        sparsities.append(current_sparsity.mean(axis=None))

        validity_plus_file = f'{path_to_results}/{i}_validity_plus.csv'
        current_validity_plus = pd.read_csv(validity_plus_file, index_col=False, header=None)

        validity_minus_file = f'{path_to_results}/{i}_validity_minus.csv'
        current_validity_minus = pd.read_csv(validity_minus_file, index_col=False, header=None)

        if i not in pre_process_validities:
            # Save Validity+
            current_validity_plus = current_validity_plus.rename(columns={0:'Iteration', 1:'Model Accuracy', 2:'Threshold Value', 3: 'Validity+'})
            validities_plus.append(current_validity_plus)

            # Save Validity-
            current_validity_minus = current_validity_minus.rename(columns={0:'Iteration', 1:'Model Accuracy', 2:'Threshold Value', 3: 'Validity-'})
            validities_minus.append(current_validity_minus)
        else:
            # Pre-process and save Validity+
            validity_plus_to_append = current_validity_plus.apply(validity_plus_aggregation, axis = 1)
            current_validity_plus = current_validity_plus.rename(columns={0:'Iteration', 1:'Model Accuracy', 2:'Threshold Value'})
            current_validity_plus["Validity+"] = validity_plus_to_append
            current_validity_plus = current_validity_plus.drop(current_validity_plus.iloc[:, [3, 4, 5, 6]], axis=1)
            validities_plus.append(current_validity_plus)
            # Pre-process and save Validity-
            validity_minus_to_append = current_validity_minus.apply(validity_minus_aggregation, axis = 1)
            current_validity_minus = current_validity_minus.rename(columns={0:'Iteration', 1:'Model Accuracy', 2:'Threshold Value'})
            current_validity_minus["Validity-"] = validity_minus_to_append
            current_validity_minus = current_validity_minus.drop(current_validity_minus.iloc[:, [3, 4, 5, 6]], axis=1)
            validities_minus.append(current_validity_minus)

    # Obtain a single data frame per metric
    validities_plus = pd.concat(validities_plus)
    validities_plus = validities_plus.drop_duplicates()

    validities_minus = pd.concat(validities_minus)
    validities_minus = validities_minus.drop_duplicates()

    fidelities = pd.concat(fidelities)

    if fidelity_values_only:
        fidelity_values_only = pd.concat(fidelity_values_only)
        fidelities = pd.concat([fidelities, fidelity_values_only])
    fidelities = fidelities.sort_values(by='Iteration')
    fidelities = fidelities.drop_duplicates()

    final_table = validities_plus

    final_table['validity-'] = validities_minus['Validity-']

    fidelities_to_append = []
    sparsities_to_append = []
    for i in final_table['Iteration']:
        fidelities_to_append.append(fidelities.iloc[np.int_(i), 1])
        sparsities_to_append.append(sparsities[np.int_(i)])
    final_table['fidelity'] = fidelities_to_append
    final_table['sparsity'] = sparsities_to_append

    return final_table

In [43]:
path_to_results = "results_gnn_subnet"
final_table_gnn_subnet2 = obtain_processed_results(path_to_results, [], [5, 6, 7, 8, 9])

final_table_gnn_subnet2

   Iteration  Fidelity
0        0.0   0.93000
0        0.0   0.93000
1        1.0   0.73000
0        0.0   0.93000
1        1.0   0.73000
2        2.0   0.90125
0        0.0   0.93000
1        1.0   0.73000
2        2.0   0.90125
3        3.0   0.94250
0        0.0   0.93000
1        1.0   0.73000
2        2.0   0.90125
3        3.0   0.94250
4        4.0   0.93500
0        5.0   0.95000
0        5.0   0.95000
1        6.0   0.86875
0        5.0   0.95000
1        6.0   0.86875
2        7.0   0.85125
0        8.0   0.81375
0        8.0   0.81375
1        9.0   0.82250
[]


Unnamed: 0,Iteration,Model Accuracy,Threshold Value,Validity+,validity-,fidelity,sparsity
0,0.0,78.75,30.0,0.4375,0.7625,0.93,0.044254
1,0.0,78.75,50.0,0.7375,0.9375,0.93,0.044254
2,1.0,62.5,30.0,0.0625,0.575,0.73,0.03315
3,1.0,62.5,50.0,0.15,0.6375,0.73,0.03315
4,2.0,65.0,30.0,0.2125,0.9,0.90125,0.03754
5,2.0,65.0,50.0,0.375,0.95,0.90125,0.03754
6,3.0,83.75,30.0,0.8,0.925,0.9425,0.041623
7,3.0,83.75,50.0,0.925,0.925,0.9425,0.041623
8,4.0,80.0,30.0,0.4,0.925,0.935,0.042375
9,4.0,80.0,50.0,0.6375,0.9375,0.935,0.042375


In [48]:
final_table_reduced_gnn_subnet = final_table_gnn_subnet2.groupby(['Threshold Value']).mean()
final_table_reduced_gnn_subnet = final_table_reduced_gnn_subnet.drop(['Iteration'], axis=1)
final_table_reduced_gnn_subnet

Unnamed: 0_level_0,Model Accuracy,Validity+,validity-,fidelity,sparsity
Threshold Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
30.0,73.0,0.43,0.85,0.8745,0.043467
50.0,73.0,0.585,0.86625,0.8745,0.043467


In [50]:
path_to_results = "results_modified_alg"
final_table_modified_alg2 = obtain_processed_results(path_to_results, [0, 1, 2, 3, 4, 7, 8, 9], [0, 1, 2, 3, 4, 7, 8, 9])

final_table_modified_alg2

   Iteration  Model Accuracy  Threshold Value  Validity+
0        0.0           80.00             30.0     0.0500
1        0.0           80.00             50.0     0.2375
2        1.0           70.00             30.0     0.2000
3        1.0           70.00             50.0     0.2750
4        2.0           83.75             30.0     0.3250
5        2.0           83.75             50.0     0.3375
6        3.0           76.25             30.0     0.3750
7        3.0           76.25             50.0     0.3750
0        4.0           73.75             30.0     0.2125
1        4.0           73.75             50.0     0.5375
0        5.0           81.25             30.0     0.2500
1        5.0           81.25             50.0     0.3875
2        6.0           83.75             30.0     0.9125
3        6.0           83.75             50.0     0.9250
0        7.0           72.50             30.0     0.4125
1        7.0           72.50             50.0     0.7500
0        8.0           75.00   

Unnamed: 0,Iteration,Model Accuracy,Threshold Value,Validity+,validity-,fidelity,sparsity
0,0.0,80.0,30.0,0.05,0.8875,0.86375,0.021414
1,0.0,80.0,50.0,0.2375,0.95,0.86375,0.021414
2,1.0,70.0,30.0,0.2,0.8375,0.79,0.018214
3,1.0,70.0,50.0,0.275,0.8375,0.79,0.018214
4,2.0,83.75,30.0,0.325,0.675,0.83875,0.019441
5,2.0,83.75,50.0,0.3375,0.7625,0.83875,0.019441
6,3.0,76.25,30.0,0.375,0.8875,0.85125,0.013724
7,3.0,76.25,50.0,0.375,0.9125,0.85125,0.013724
0,4.0,73.75,30.0,0.2125,0.575,0.8825,0.023686
1,4.0,73.75,50.0,0.5375,0.925,0.8825,0.023686


In [51]:
final_table_reduced_modified_alg = final_table_modified_alg2.groupby(['Threshold Value']).mean()
final_table_reduced_modified_alg = final_table_reduced_modified_alg.drop(['Iteration'], axis=1)
final_table_reduced_modified_alg

Unnamed: 0_level_0,Model Accuracy,Validity+,validity-,fidelity,sparsity
Threshold Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
30.0,77.75,0.3525,0.82875,0.874,0.019571
50.0,77.75,0.5175,0.89375,0.874,0.019571
