In [1]:
import pandas as pd
import numpy as np

no_of_runs = 10

In [2]:
def validity_plus_aggregation(validity_plus_row):
    """
    Computes Validity+ in terms of its confusion matrix as follows:
        validity+ = (mat[1,0] + mat[0,1] ) / (mat[0,0] + mat[0,1] + mat[1,0] + mat[1,1])
    """
    return (validity_plus_row[4] + validity_plus_row[5]) / (validity_plus_row[3:7].sum())

def validity_minus_aggregation(validity_minus_row):
    """
    Computes Validity- in terms of its confusion matrix as follows:
        validity- = (mat[0,0] + mat[1,1] ) /  (mat[0,0] + mat[0,1] + mat[1,0] + mat[1,1])
    """
    return (validity_minus_row[3] + validity_minus_row[6]) / (validity_minus_row[3:7].sum())

## MODIFIED GNN-SUBNET

The following cell obtains all data from the results files and pre-comutes it. The pre-processed data will then be aggregated into a single table of results

In [6]:
path_to_results_modified = "results_modified_alg_final"


fidelities = []
sparsities = []
validities_plus = []
validities_minus = []

for i in range(0, no_of_runs):

    # Save the fidelity
    fidelity_file = f'{path_to_results_modified}/{i}_fidelities.csv'
    current_fidelity = pd.read_csv(fidelity_file, index_col = False, header=None)
    fidelities.append(current_fidelity)

    # Save the sparsity
    sparsity_file = f'{path_to_results_modified}/{i}_sparsities.csv'
    current_sparsity = pd.read_csv(sparsity_file, index_col = False, header=None)
    sparsities.append(current_sparsity.mean(axis=None))

    # Pre-process and save Validity+
    validity_plus_file = f'{path_to_results_modified}/{i}_validity_plus.csv'
    current_validity_plus = pd.read_csv(validity_plus_file, index_col=False, header=None)
    validities_plus.append(current_validity_plus)

    # Pre-process and save Validity-
    validity_minus_file = f'{path_to_results_modified}/{i}_validity_minus.csv'
    current_validity_minus = pd.read_csv(validity_minus_file, index_col=False, header=None)
    validities_minus.append(current_validity_minus)

# Obtain a single data frame per metric
validities_plus = pd.concat(validities_plus)
validities_plus = validities_plus.drop_duplicates()

validities_minus = pd.concat(validities_minus)
validities_minus = validities_minus.drop_duplicates()

fidelities = pd.concat(fidelities)
fidelities = fidelities.drop_duplicates()

sparsities

[0.01637989266073474,
 0.0159515081775615,
 0.01302330640739981,
 0.02584427372519224,
 0.02551984140854875,
 0.02182792921683577,
 0.01556192674999296,
 0.01798695200716152,
 0.01265484935079548,
 0.026909672432245336]

The table below indicates the four metrics for each iteration of the explainer, as well as the threshold values 30 and 50.

In [10]:
final_table_modified_alg = validities_plus

final_table_modified_alg = final_table_modified_alg.rename(columns={0:'Iteration', 1:'Model Accuracy', 2:'Threshold Value', 3: 'validity+'})

final_table_modified_alg['validity-'] = validities_minus.iloc[:, 3]

fidelities_to_append = []
sparsities_to_append = []
for i in final_table_modified_alg['Iteration']:
    fidelities_to_append.append(fidelities.iloc[np.int_(i), 2])
    sparsities_to_append.append(sparsities[np.int_(i)])
final_table_modified_alg['fidelity'] = fidelities_to_append
final_table_modified_alg['sparsity'] = sparsities_to_append

final_table_modified_alg

Unnamed: 0,Iteration,Model Accuracy,Threshold Value,validity+,validity-,fidelity,sparsity
0,0.0,86.25,30.0,0.15,0.95,0.8725,0.01638
1,0.0,86.25,50.0,0.1125,0.9125,0.8725,0.01638
2,1.0,68.75,30.0,0.1125,0.9,0.9425,0.015952
3,1.0,68.75,50.0,0.1125,0.9125,0.9425,0.015952
4,2.0,76.25,30.0,0.85,0.9375,0.9425,0.013023
5,2.0,76.25,50.0,0.925,0.9375,0.9425,0.013023
6,3.0,78.75,30.0,0.25,0.8375,0.86375,0.025844
7,3.0,78.75,50.0,0.5875,0.85,0.86375,0.025844
8,4.0,62.5,30.0,0.175,0.9125,0.92875,0.02552
9,4.0,62.5,50.0,0.175,0.925,0.92875,0.02552


This table contains an aggregation of all metrics by mean for each run, per threshold.

In [11]:
final_table_reduced_modified_alg = final_table_modified_alg.groupby(['Threshold Value']).mean()
final_table_reduced_modified_alg = final_table_reduced_modified_alg.drop(['Iteration'], axis=1)
final_table_reduced_modified_alg

Unnamed: 0_level_0,Model Accuracy,validity+,validity-,fidelity,sparsity
Threshold Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
30.0,75.75,0.32375,0.815,0.861375,0.019166
50.0,75.75,0.3975,0.8725,0.861375,0.019166


## GNN-SUBNET

The following code cells repeat the process to extract data related to gnn-subnet.

In [12]:
path_to_results_gnn_subnet = "results_gnn_subnet_final"

fidelities = []
sparsities = []
validities_plus = []
validities_minus = []

for i in range(0, no_of_runs):

    # Save the fidelity
    fidelity_file = f'{path_to_results_gnn_subnet}/{i}_fidelities.csv'
    current_fidelity = pd.read_csv(fidelity_file, index_col = False, header=None)
    fidelities.append(current_fidelity)

    # Save the sparsity
    sparsity_file = f'{path_to_results_gnn_subnet}/{i}_sparsities.csv'
    current_sparsity = pd.read_csv(sparsity_file, index_col = False, header=None)
    sparsities.append(current_sparsity.mean(axis=None))

    # Pre-process and save Validity+
    validity_plus_file = f'{path_to_results_gnn_subnet}/{i}_validity_plus.csv'
    current_validity_plus = pd.read_csv(validity_plus_file, index_col=False, header=None)
    validities_plus.append(current_validity_plus)

    # Pre-process and save Validity-
    validity_minus_file = f'{path_to_results_gnn_subnet}/{i}_validity_minus.csv'
    current_validity_minus = pd.read_csv(validity_minus_file, index_col=False, header=None)
    validities_minus.append(current_validity_minus)

# Obtain a single data frame per metric
validities_plus = pd.concat(validities_plus)
validities_plus = validities_plus.drop_duplicates()

validities_minus = pd.concat(validities_minus)
validities_minus = validities_minus.drop_duplicates()

fidelities = pd.concat(fidelities)
fidelities = fidelities.drop_duplicates()

validities_plus

Unnamed: 0,0,1,2,3
0,0.0,63.75,30.0,0.1125
1,0.0,63.75,50.0,0.075
2,1.0,76.25,30.0,0.2375
3,1.0,76.25,50.0,0.25
4,2.0,86.25,30.0,0.5375
5,2.0,86.25,50.0,0.875
6,3.0,76.25,30.0,0.2125
7,3.0,76.25,50.0,0.4875
8,4.0,73.75,30.0,0.2875
9,4.0,73.75,50.0,0.675


In [13]:
final_table_gnn_subnet = validities_plus

final_table_gnn_subnet = final_table_gnn_subnet.rename(columns={0:'Iteration', 1:'Model Accuracy', 2:'Threshold Value', 3:'Validity+'})

final_table_gnn_subnet['validity-'] = validities_minus.iloc[:, 3]

fidelities_to_append = []
sparsities_to_append = []
for i in final_table_gnn_subnet['Iteration']:
    fidelities_to_append.append(fidelities.iloc[np.int_(i), 2])
    sparsities_to_append.append(sparsities[np.int_(i)])
final_table_gnn_subnet['fidelity'] = fidelities_to_append
final_table_gnn_subnet['sparsity'] = sparsities_to_append

final_table_gnn_subnet

Unnamed: 0,Iteration,Model Accuracy,Threshold Value,Validity+,validity-,fidelity,sparsity
0,0.0,63.75,30.0,0.1125,0.5625,0.67,0.04468
1,0.0,63.75,50.0,0.075,0.65,0.67,0.04468
2,1.0,76.25,30.0,0.2375,0.775,0.83625,0.050157
3,1.0,76.25,50.0,0.25,0.9,0.83625,0.050157
4,2.0,86.25,30.0,0.5375,0.95,0.97375,0.038774
5,2.0,86.25,50.0,0.875,0.9875,0.97375,0.038774
6,3.0,76.25,30.0,0.2125,0.4375,0.87875,0.029608
7,3.0,76.25,50.0,0.4875,0.8875,0.87875,0.029608
8,4.0,73.75,30.0,0.2875,0.625,0.85625,0.045901
9,4.0,73.75,50.0,0.675,0.7875,0.85625,0.045901


In [14]:
final_table_reduced_gnn_subnet = final_table_gnn_subnet.groupby(['Threshold Value']).mean()
final_table_reduced_gnn_subnet = final_table_reduced_gnn_subnet.drop(['Iteration'], axis=1)
final_table_reduced_gnn_subnet

Unnamed: 0_level_0,Model Accuracy,Validity+,validity-,fidelity,sparsity
Threshold Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
30.0,75.0,0.25875,0.73625,0.855625,0.042664
50.0,75.0,0.47125,0.85375,0.855625,0.042664
