Retrieve experiment results

In [1]:
import json
from os.path import join
import os

import pandas as pd

from cogspaces.pipeline import get_output_dir

from json import JSONDecodeError

basedir_ids = [25, 28]
basedirs = [join(get_output_dir(), 'predict_multi', str(_id), 'run') for _id in basedir_ids]
res_list = []
for basedir in basedirs:
    for exp_dir in os.listdir(basedir):
        exp_dir = join(basedir, exp_dir)
        try:
            config = json.load(open(join(exp_dir, 'config.json'), 'r'))
            info = json.load(open(join(exp_dir, 'info.json'), 'r'))
        except (JSONDecodeError, FileNotFoundError):
            continue
        datasets = config['datasets']
        datasets = '__'.join(datasets)
        alpha = config['alpha']
        beta = config['beta']
        seed = config['seed']
        model = config['model']
        source = config['source']
        score = info['score']
        rank = info['rank']
        normalized = config['with_std']
        res = {'datasets': datasets, 'alpha': alpha, 'beta': beta, 'seed': seed, 'rank': rank,
               'source': source, 'normalized': normalized,
              'model': model}
        for key, value in score.items():
            res[key] = value
        if basedir == join(get_output_dir(), 'predict_multi', '11', 'run') and model == 'logistic':
            continue
        res_list.append(res)
res = pd.DataFrame(res_list)

Compare results with trace norm, with and without transfer

In [2]:
res.query("alpha == 0 and datasets == 'brainomics__archi__hcp'")

Unnamed: 0,alpha,beta,datasets,model,normalized,rank,seed,source,test_archi,test_brainomics,test_hcp,test_mean,train_archi,train_brainomics,train_hcp,train_mean
1565,0.0,0.0,brainomics__archi__hcp,trace,False,71,1230275727,hcp_rs_positive_single,0.867521,0.84279,0.920198,0.887034,1.0,1.0,0.992248,0.993103
1575,0.0,0.0,brainomics__archi__hcp,trace,False,71,636201818,hcp_rs_positive_single,0.859829,0.879433,0.924051,0.8946,1.0,1.0,0.993232,0.993979
1591,0.0,0.0,brainomics__archi__hcp,trace,False,71,1864370243,hcp_rs_positive_single,0.868376,0.87234,0.915245,0.891469,1.0,1.0,0.992494,0.993322
1602,0.0,0.0,brainomics__archi__hcp,trace,False,71,3330543729,hcp_rs_positive_single,0.840171,0.875887,0.931205,0.891208,1.0,1.0,0.992371,0.993213
1616,0.0,0.0,brainomics__archi__hcp,trace,False,71,1635570368,hcp_rs_positive_single,0.857265,0.867612,0.906371,0.882753,1.0,1.0,0.992438,0.993272
1630,0.0,0.0,brainomics__archi__hcp,trace,False,71,4166818564,hcp_rs_positive_single,0.854701,0.875887,0.917816,0.889266,1.0,1.0,0.992926,0.993707
1644,0.0,0.0,brainomics__archi__hcp,trace,False,71,1268797620,hcp_rs_positive_single,0.84359,0.836879,0.921299,0.878946,1.0,1.0,0.99274,0.993541
1658,0.0,0.0,brainomics__archi__hcp,trace,False,71,2525524634,hcp_rs_positive_single,0.852991,0.888889,0.920748,0.893034,1.0,1.0,0.992494,0.993322
1672,0.0,0.0,brainomics__archi__hcp,trace,False,71,3540609864,hcp_rs_positive_single,0.848718,0.855792,0.922013,0.884937,1.0,1.0,0.992498,0.993325
1686,0.0,0.0,brainomics__archi__hcp,trace,False,71,2832439569,hcp_rs_positive_single,0.846154,0.856974,0.930105,0.888338,1.0,1.0,0.992001,0.992884


In [3]:
df_agg = res.groupby(by=['normalized', 'source', 'model', 'datasets', 'alpha', 'beta']).aggregate(['mean', 'std'])

df_agg = df_agg.fillna(0)


dfs = {}
for dataset in ['brainomics', 'archi']:
    this_df = df_agg.query("datasets in ['%s__hcp', '%s']" % (dataset, dataset))
    test_mean = sum(this_df[('test_%s' % dataset, 'mean')] for dataset in [dataset, 'hcp']) / 2
    this_df = this_df.assign(test_mean=test_mean)
    indices = this_df[('test_%s' % dataset, 'mean')].groupby(level=['normalized', 'source', 'model', 'datasets']).aggregate('idxmax')
    this_df = this_df.loc[indices.values]
    dfs[dataset] = this_df

Compare results with 3 datasets

In [4]:
this_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,rank,rank,seed,seed,test_archi,test_archi,test_brainomics,test_brainomics,test_hcp,test_hcp,test_mean,test_mean,train_archi,train_archi,train_brainomics,train_brainomics,train_hcp,train_hcp,train_mean,train_mean
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
normalized,source,model,datasets,alpha,beta,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2
False,hcp_rs_positive_single,logistic,archi,0.0,0.0001,29.05,0.223607,2330081002,1242420000.0,0.866966,0.014498,0.0,0.0,0.0,0.0,0.433483,0.433483,0.984701,0.006081,0.0,0.0,0.0,0.0,0.0,0.0
False,hcp_rs_positive_single,trace,archi,0.001,0.0,18.85,0.67082,2330081002,1242420000.0,0.864274,0.013974,0.0,0.0,0.0,0.0,0.432137,0.432137,0.989786,0.004902,0.0,0.0,0.0,0.0,0.0,0.0
False,hcp_rs_positive_single,trace,archi__hcp,0.000316,0.0,38.05,1.050063,2330081002,1242420000.0,0.874615,0.011558,0.0,0.0,0.923511,0.008763,0.899063,0.899063,0.997479,0.002209,0.0,0.0,0.971923,0.000634,0.0,0.0


In [5]:
indices.values

array([(False, 'hcp_rs_positive_single', 'logistic', 'archi', 0.0, 0.0001),
       (False, 'hcp_rs_positive_single', 'trace', 'archi', 0.001, 0.0),
       (False, 'hcp_rs_positive_single', 'trace', 'archi__hcp', 0.00031622776601683794, 0.0)], dtype=object)

In [6]:
this_df = df_agg.query("datasets == 'brainomics__archi__hcp'")
test_mean = sum(this_df[('test_%s' % dataset, 'mean')] for dataset in ['brainomics', 'archi', 'hcp']) / 3
this_df = this_df.assign(test_mean=test_mean)
indices = this_df[('test_mean', 'mean')].groupby(level=['normalized', 'source', 'model', 'datasets']).aggregate('idxmax')
this_df = this_df.loc[indices.values]
dfs['archi+brainomics+hcp'] = this_df

In [7]:
result_df = pd.concat(dfs.values(), keys=dfs.keys(), names=['target'])

In [8]:
result_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,rank,rank,seed,seed,test_archi,test_archi,test_brainomics,test_brainomics,test_hcp,test_hcp,test_mean,test_mean,train_archi,train_archi,train_brainomics,train_brainomics,train_hcp,train_hcp,train_mean,train_mean
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
target,normalized,source,model,datasets,alpha,beta,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2
brainomics,False,hcp_rs_positive_single,logistic,brainomics,0.0,0.0,17.05,0.223607,2330081002,1242420000.0,0.0,0.0,0.903251,0.016384,0.0,0.0,0.451625,0.451625,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
brainomics,False,hcp_rs_positive_single,trace,brainomics,0.003162,0.0,9.5,0.512989,2330081002,1242420000.0,0.0,0.0,0.913652,0.01771,0.0,0.0,0.456826,0.456826,0.0,0.0,0.998582,0.001125,0.0,0.0,0.0,0.0
brainomics,False,hcp_rs_positive_single,trace,brainomics__hcp,0.003162,0.0,19.05,0.825578,2330081002,1242420000.0,0.0,0.0,0.914125,0.018616,0.87149,0.012074,0.892808,0.892808,0.0,0.0,0.990426,0.0022,0.880174,0.002216,0.0,0.0
archi,False,hcp_rs_positive_single,logistic,archi,0.0,0.0001,29.05,0.223607,2330081002,1242420000.0,0.866966,0.014498,0.0,0.0,0.0,0.0,0.433483,0.433483,0.984701,0.006081,0.0,0.0,0.0,0.0,0.0,0.0
archi,False,hcp_rs_positive_single,trace,archi,0.001,0.0,18.85,0.67082,2330081002,1242420000.0,0.864274,0.013974,0.0,0.0,0.0,0.0,0.432137,0.432137,0.989786,0.004902,0.0,0.0,0.0,0.0,0.0,0.0
archi,False,hcp_rs_positive_single,trace,archi__hcp,0.000316,0.0,38.05,1.050063,2330081002,1242420000.0,0.874615,0.011558,0.0,0.0,0.923511,0.008763,0.899063,0.899063,0.997479,0.002209,0.0,0.0,0.971923,0.000634,0.0,0.0
archi+brainomics+hcp,False,hcp_rs_positive_single,trace,brainomics__archi__hcp,0.000316,0.0,51.45,0.944513,2330081002,1242420000.0,0.877564,0.012765,0.904255,0.014971,0.922079,0.00886,0.901299,0.901299,0.990214,0.004636,1.0,0.0,0.960742,0.000595,0.964447,0.000606


In [21]:
df_agg.query("datasets == 'brainomics__archi__hcp'")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,rank,rank,seed,seed,test_archi,test_archi,test_brainomics,test_brainomics,test_hcp,test_hcp,train_archi,train_archi,train_brainomics,train_brainomics,train_hcp,train_hcp
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
normalized,source,model,datasets,alpha,beta,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,0.0,0.0,71.0,0.0,1243615929,614192900.0,0.865242,0.004707,0.864854,0.019435,0.919831,0.004414,1.0,0.0,1.0,0.0,0.992658,0.000512
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,1e-06,0.0,71.0,0.0,1243615929,614192900.0,0.865242,0.004707,0.864066,0.019885,0.920198,0.004953,1.0,0.0,1.0,0.0,0.992617,0.000606
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,3e-06,0.0,71.0,0.0,1243615929,614192900.0,0.865527,0.005008,0.865642,0.019218,0.920382,0.005231,1.0,0.0,1.0,0.0,0.992576,0.000582
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,1e-05,0.0,71.0,0.0,1243615929,614192900.0,0.865812,0.004759,0.869976,0.01953,0.921482,0.00554,1.0,0.0,1.0,0.0,0.992453,0.000462
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,3.2e-05,0.0,71.0,0.0,1243615929,614192900.0,0.868376,0.003917,0.878251,0.017492,0.923867,0.004808,1.0,0.0,1.0,0.0,0.991632,0.000282
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,0.0001,0.0,70.666667,0.57735,1243615929,614192900.0,0.876923,0.003419,0.895587,0.017061,0.926619,0.004414,1.0,0.0,1.0,0.0,0.986874,0.000233
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,0.000316,0.0,52.0,1.0,1243615929,614192900.0,0.884615,0.008674,0.900709,0.013632,0.923867,0.005513,0.987179,0.000855,1.0,0.0,0.960787,0.000607
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,0.001,0.0,28.0,0.0,1243615929,614192900.0,0.873789,0.005008,0.912136,0.020756,0.897633,0.005586,0.924501,0.006183,0.999212,0.000682,0.913144,0.000784
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,0.003162,0.0,18.333333,1.154701,1243615929,614192900.0,0.822222,0.003419,0.908195,0.019109,0.833425,0.004275,0.84359,0.008882,0.980299,0.0038,0.84489,0.003004
False,hcp_rs_positive_single,trace,brainomics__archi__hcp,0.01,0.0,10.0,0.0,1243615929,614192900.0,0.624217,0.01135,0.851852,0.013074,0.671436,0.007705,0.630769,0.026163,0.907801,0.010099,0.668882,0.003271


In [22]:
df

NameError: name 'df' is not defined