# Collect and sort Tr, Val, Te results from IMPAC study

#### Conda environment:
#### /project/bioinformatics/DLLab/shared/CondaEnvironments/CooperAuttfGPUv4/bin/python

In [3]:
import os
import pandas as pd
import numpy as np
import pickle as pkl
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
# Path to oriignal set of experiments
sDataPath = '/project/bioinformatics/DLLab/Cooper/Code/AutismProject/Parallelization/TrainedModels/ISBIRerun/'

In [9]:
# Generate dictionary to contain results
lsAtlases = ['basc064', 'basc122', 'basc197', 'craddock_scorr_mean', 'harvard_oxford_cort_prob_2mm', 'msdl', 'power_2011']
lsModalities = ['anatomy', 'connectivity', 'combined']
lsNetworks = ['Dense', 'LSTM', 'BrainNetCNN']
dResults={}
for sNetwork in lsNetworks:
    dResults[sNetwork]={}
    for sModality in lsModalities[1:]:
        dResults[sNetwork][sModality]={}

In [17]:
def fFetch(sFile):
    """
    This function tries to fetch the specified file, but if it can't,
    it returns 0
    :param sFile: the file location (full path)
    :return: either the pickled file or 0 if unable to load with pickle
    """
    try:
        return pkl.load(open(sFile, 'rb'))
    except:
        try:
            return pkl.load(open(sFile, 'rb'), encoding='bytes')
        except:
            return 0

In [33]:
def fFetchAcc(sType, iModelNum, sModality, sAtlas, iCV):
    """
    fetches the training accuracy of the NN
    :param: sType (str) Dense or LSTM
    :param: iModelNum (int or str): model number, any of 0-49
    :param: sModality (str) anatomy, connectivity, or combined
    :param: sAtlas (str) atlas name
    :param: iCV (int or str) number of the cross validation (1,2, or 3)
    """
    sRootDir='/project/bioinformatics/DLLab/Cooper/Code/AutismProject/Parallelization/TrainedModels/ISBIRerun'
    sModelHistDir=os.path.join(sRootDir, f'{sType}/{sType}_{int(iModelNum):02}{sModality}{sAtlas}ModelHistoryCrossVal{iCV}.p')
    cModelHist=pkl.load(open(sModelHistDir,'rb'))
    flTrainAcc=cModelHist.history['acc'][np.argmax(cModelHist.history['val_acc'])]
    return flTrainAcc

In [None]:
# loop through all NN types
for sNetworkName in lsNetworks:
    sNetwork = sNetworkName
    sNetworkPath = os.path.join(sDataPath, sNetwork)
    #First, we loop over the possible atlases
    for sAtlas in lsAtlases:
        # then we loop over the possible modes (i.e. anatomical alone,
        # then connectivity matrices alone, then combined)
        for sModality in lsModalities:
            # initialize dataframes to hold results
            pdNetworkResults = pd.DataFrame(index=range(50),
                columns=['Avg CV Train ROCAUC', 'Avg CV Validation ROC AUC', 'Test ROC AUC'])

            # Loop over each model tested
            for iModel in range(50):

                #format model number correctly for loading
                if iModel <10:
                    sModel = '0' + str(iModel)
                else:
                    sModel = str(iModel)

                sNetwork = sNetworkName + '_' + sModel

                # generate correct file name
                if sModality=='anatomy':
                    sNetwork = sNetwork+sModality+'ROCScore'
                else:
                    if not 'BrainNetCNN' in sNetwork:
                        sNetwork = sNetwork+sModality+sAtlas+'ROCScore'
                    elif sModality=='connectivity':
                        sNetwork = sNetwork + sAtlas + 'ROCScore'

                flNetworkAvg=0
                flNetworkTrainAvg=0

                # fetch the average performance across 3 cross-validations (tr and val)
                for iCV in range(3):
                    iCV=iCV + 1
                    sCV=str(iCV)
                    
                    if sModality=='anatomy':
                        flNetworkTrainCV=fFetchAcc(sNetworkName, iModel, sModality, '', iCV)
                    else:
                        flNetworkTrainCV=fFetchAcc(sNetworkName, iModel, sModality, sAtlas, iCV)
                    flNetworkCV=fFetch(os.path.join(sNetworkPath, sNetwork) + 'CrossVal' + sCV + '.p')

                    flNetworkTrainAvg = flNetworkTrainAvg + flNetworkTrainCV
                    flNetworkAvg = flNetworkAvg + flNetworkCV

                # Fetch the results of a specified model, modality, and atlas
                flNetworkTrainAvg = (flNetworkTrainAvg / (3.0))
                flNetworkAvg = (flNetworkAvg / (3.0))
                flNetworkTest = fFetch(os.path.join(sNetworkPath, sNetwork) + 'Test.p')

                pdNetworkResults.iloc[iModel]['Avg CV Train ROC AUC'] = flNetworkTrainAvg
                pdNetworkResults.iloc[iModel]['Avg CV Validation ROC AUC'] = flNetworkAvg
                pdNetworkResults.iloc[iModel]['Test ROC AUC'] = flNetworkTest

                #save the results in a dictionary
                if sModality=='anatomy':
                    dResults[sNetworkName].update({sModality: pdNetworkResults})
                else:
                    dResults[sNetworkName][sModality].update({sAtlas: pdNetworkResults})
                # Final dictionary is of the form:
                # dict[dense/lstm/BrainNet][anatomy/connectivity/combined][atlas(no key for anatomy)]
                # and each entry in this form is a pandas dataframe with row= model number,
                # col = average CV Train ROC AUC, average CV Validation ROC AUC, test ROC AUC

In [20]:
dResults

{'Dense': {'connectivity': {'basc064':    Avg CV ROC AUC Test ROC AUC
   0        0.722138     0.761905
   1        0.721744     0.748557
   2        0.721063     0.752165
   3        0.705363     0.688793
   4        0.717488     0.726431
   5        0.712993          0.5
   6        0.705097     0.753728
   7        0.724253     0.760823
   8        0.723369     0.758778
   9        0.672802     0.579245
   10       0.714288     0.738456
   11       0.606732     0.654762
   12       0.721043     0.755291
   13        0.71948      0.73545
   14       0.726077     0.741222
   15       0.722002     0.742785
   16       0.713196     0.754089
   17       0.715383          0.5
   18       0.721514     0.744468
   19       0.638118     0.708995
   20       0.614104     0.699735
   21       0.719004      0.76419
   22       0.723271     0.742665
   23       0.724634     0.746753
   24       0.616548     0.569565
   25       0.722431     0.722222
   26       0.704587      0.71645
   27       

## Now, we do some plotting

In [None]:
att=sns.load_dataset("attention")
type(att)

In [None]:
g=sns.FacetGrid(att, col="subject", col_wrap=5, height=1.5)

In [None]:
g=g.map(plt.plot, "solutions", "score", marker='.')