In [1]:
# See description above for what these variables represent
big_loop_iterations = 10
explainer_runs = 10
thresholds = [30, 50]
samples = [10]

# Used to label the resulting files -
date = "modified_alg_final"

# Set up directory for result files
import os
dir = f'./results_{date}'
if not os.path.exists(dir):
    os.mkdir(dir)

In [2]:
from GNNSubNet import GNNSubNet as gnn
import pandas as pd
import numpy as np

# # Kidney data set  ------------------------- #
loc   = "../TCGA"
ppi   = f'{loc}/KIDNEY_RANDOM_PPI.txt'
feats = [f'{loc}/KIDNEY_RANDOM_Methy_FEATURES.txt', f'{loc}/KIDNEY_RANDOM_mRNA_FEATURES.txt']
targ  = f'{loc}/KIDNEY_RANDOM_TARGET.txt'

# # Synthetic data set  ------------------------- #
# loc   = "../GNNSubNet/datasets/synthetic/"
# ppi   = f'{loc}/NETWORK_synthetic.txt'
# feats = [f'{loc}/FEATURES_synthetic.txt']
# targ  = f'{loc}/TARGET_synthetic.txt'

# Read in the synthetic data
g = gnn.GNNSubNet(loc, ppi, feats, targ, normalize=False)

# Get some general information about the data dimension
g.summary()

Graph is connected  False
Calculate subgraph ...
Number of subgraphs:  118
Size of subgraph:  2049
Graph is connected  True
##################
# DATASET LOADED #
##################

Number of nodes: 2049
Number of edges: 13588
Number of modalities: 2


In [9]:
def obtain_BAGEL_scores(loc, ppi, feats, targ, gnn_subnet):
    model_info = []
    fidelity = []
    validity_plus = []
    validity_minus = []
    validity_plus_matrix = []
    validity_minus_matrix = []
    sparsity = []

    for i in range(9, big_loop_iterations):
        print(i)
        g = gnn.GNNSubNet(loc, ppi, feats, targ, normalize=False)
        g.train()

        # Check the performance of the classifier
        accuracy = g.accuracy

        # Run the explainer the desired number of times
        g.explain(explainer_runs, gnn_subnet=gnn_subnet)

        # Fidelity
        f = g.evaluate_RDT_fidelity_soft()
        fidelity.append([i, accuracy, np.mean(f)])
        # Save mean fidelity for each sample for further analysis
        filename = f"results_{date}/{i}_fidelities.csv"
        np.savetxt(filename, fidelity, delimiter=',', fmt ='% s')

        # Sparsity
        sparsities = g.evaluate_sparsity()
        # Save raw results in case needed for further analysis
        filename = f"results_{date}/{i}_sparsities.csv"
        np.savetxt(filename, sparsities, delimiter=',', fmt ='% s')
        # Save mean sparsity to list to create processed table
        sparsity.append([i, accuracy, np.mean(sparsities)])

        # Validity at varying thresholds
        for t in thresholds:
            v_plus, v_minus, mat_plus, mat_minus = g.evaluate_validity(threshold=t, confusion_matrix=True)
            validity_plus.append([i, accuracy, t, v_plus])
            validity_minus.append([i, accuracy, t, v_minus])
            validity_plus_matrix.append([i, accuracy, t, mat_plus[0,0], mat_plus[0,1], mat_plus[1,0], mat_plus[1,1]])
            validity_minus_matrix.append([i, accuracy, t, mat_minus[0,0], mat_minus[0,1], mat_minus[1,0], mat_minus[1,1]])

        filename = f"results_{date}/{i}_validity_plus.csv"
        np.savetxt(filename, validity_plus, delimiter=',', fmt ='% s')

        filename = f"results_{date}/{i}_validity_minus.csv"
        np.savetxt(filename, validity_minus, delimiter=',', fmt ='% s')

In [10]:
print(f"results_{date}")

results_modified_alg_final


In [None]:
# Runs the experiment for a single explainer
# Toggle the gnn_subnet parameter for the desired explainer
obtain_BAGEL_scores(loc, ppi, feats, targ, gnn_subnet=False)