# Import libraries

In [None]:
# System libraries
import sys
import os

# Processing libraries
import numpy as np
import pandas as pd
import scipy.io
import scipy.stats
sys.path.insert(0, '/home/leon_ooi/storage/CBIG_private/stable_projects/predict_phenotypes/Ooi2024_ME/curve_fitting')
import CBIG_ORSP_fns as orsp

# Plotting libraries
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.pylab as pl
import matplotlib.animation as animation
from matplotlib.ticker import ScalarFormatter
from matplotlib.ticker import FormatStrFormatter
import matplotlib.gridspec as gridspec
import seaborn as sns

In [None]:
import imp
imp.reload(orsp)

# Set directories and global variables

In [None]:
# important directories
rep_dir = '/home/leon_ooi/storage/optimal_prediction/replication/'
img_dir = rep_dir + 'Manuscript_Figures'
varname_dir = '/home/leon_ooi/storage/optimal_prediction/github/Ooi2024_ORSP/utilities/variable_names'

In [None]:
### HCP settings
# colors for different N (real data)
HCP_subcolors = sns.color_palette("blend:powderblue,darkslategrey", n_colors=6)
HCP_lgd = [plt.Line2D([], [], marker='.', color=HCP_subcolors[0], linestyle='None'),
          plt.Line2D([], [], marker='.', color=HCP_subcolors[1], linestyle='None'),
          plt.Line2D([], [], marker='.', color=HCP_subcolors[2], linestyle='None'),
          plt.Line2D([], [], marker='.', color=HCP_subcolors[3], linestyle='None'),
          plt.Line2D([], [], marker='.', color=HCP_subcolors[4], linestyle='None'),
          plt.Line2D([], [], marker='.', color=HCP_subcolors[5], linestyle='None')]
# colors for different N (theoretical fit)
HCP_theor_subcolors = sns.color_palette("blend:dodgerblue,black", n_colors=6)
# phenotype names
HCP_scores = np.genfromtxt(os.path.join(varname_dir,'HCP_variables_short_names.txt'),
                            dtype=str, delimiter='\n')
HCP_scores_short = np.genfromtxt(os.path.join(varname_dir,'HCP_variables_legend_names.txt'),
                            dtype=str, delimiter='\n')
# different sets of scores
# relaxed threshold
HCP_abv01_ind = [0,1,2,3,4,5,6,7,8,10,12,14,15,16,18,22,23,24,25,26,29,31,32,34,43,47,48,51]
HCP_sat = [0,1,2,3,4,5,6,7,8,10,14,15,16,18,22,23,25,26,29,31,32,34,43,47,48] # remove 12,24,51
# strict threshold and subsets
HCP_log_ind = [1,2,3,4,5,6,7,8,10,14,23,25,26,29,31,32,34,47]
HCP_cog_ind = [1,2,3,4,5,6,8,10,25,26,29,59]
HCP_emo_ind = [23]
HCP_pers_ind = [7,31,32,34]
HCP_phy_ind = [14]
HCP_wb_ind = [47]
# strict fit to theoretical model
HCP_behav_ind_fullstrict = np.array([31,47,9,10,16,5,6,7,59])
HCP_behav_ind_randomstrict = np.array([28,33,31,49,9,27,16,5,6,7,3,8,59])

# indices for control conditions
HCP_rs_log_ind = [1,2,3,4,5,6,7,8,10,14,23,25,26,29,31,32,34,47,59]
HCP_mixdays_log_ind = [1,4,5,7,8,10,12,14,15,16,25,26,29,31,34,59]
HCP_SC_log_ind = [1,3,4,5,7,8,14,22,26,29,31,50,59]
HCP_1000_log_ind = [1,2,3,4,5,6,7,8,10,12,14,16,18,23,24,25,26,29,31,32,34,46,47,59]

HCP_intersect = np.intersect1d(HCP_rs_log_ind, HCP_mixdays_log_ind)
HCP_intersect = np.intersect1d(HCP_intersect, HCP_SC_log_ind)
HCP_intersect = np.intersect1d(HCP_intersect, HCP_1000_log_ind)
HCP_log_ind = HCP_intersect

In [None]:
### ABCD settings
# colors for different N (real data)
ABCD_subcolors = sns.color_palette("blend:mistyrose,darkred", n_colors=9)
ABCD_lgd = [plt.Line2D([], [], marker='.', color=ABCD_subcolors[0], linestyle='None'),
          plt.Line2D([], [], marker='.', color=ABCD_subcolors[1], linestyle='None'),
          plt.Line2D([], [], marker='.', color=ABCD_subcolors[2], linestyle='None'),
          plt.Line2D([], [], marker='.', color=ABCD_subcolors[3], linestyle='None'),
          plt.Line2D([], [], marker='.', color=ABCD_subcolors[4], linestyle='None'),
          plt.Line2D([], [], marker='.', color=ABCD_subcolors[5], linestyle='None'),
          plt.Line2D([], [], marker='.', color=ABCD_subcolors[6], linestyle='None'),
          plt.Line2D([], [], marker='.', color=ABCD_subcolors[7], linestyle='None'),
          plt.Line2D([], [], marker='.', color=ABCD_subcolors[8], linestyle='None')]
# colors for different N (theoretical fit)
ABCD_theor_subcolors = sns.color_palette("blend:orangered,black", n_colors=9)
# phenotype names
ABCD_scores = np.genfromtxt(os.path.join(varname_dir,'ABCD_variables_short_names.txt'),
                            dtype=str, delimiter='\n')
ABCD_scores_short = np.genfromtxt(os.path.join(varname_dir,'ABCD_variables_legend_names.txt'),
                            dtype=str, delimiter='\n')
# different sets of scores
# relaxed threshold
ABCD_abv01_ind = [3,5,6,8,9,10,11,12,13,14,15,16,17,21,24,25,28,29,30,31,32,33]
ABCD_sat = [3,5,6,8,9,10,11,12,13,14,15,16,17,21,24,25,28,29,30,31,32,33] # no scores removed
# strict threshold and subsets
ABCD_log_ind = [3,5,6,8,10,11,13,14,15,16,17,29,30,31,32,33]
ABCD_cog_ind = [8,10,11,13,14,15,16,17,30,31,32,33,36]
ABCD_mh_ind = [5,29,6,3]
# strict fit to theoretical model
ABCD_behav_ind_fullstrict = np.array([16,17,12,35])
ABCD_behav_ind_randomstrict = np.array([15,16,17,12,35])

# indices for control conditions
ABCD_rs_log_ind = [3,5,6,8,10,11,13,14,15,16,17,29,30,31,32,33,36]
ABCD_MID_log_ind = [8,9,10,11,13,14,15,16,17,21,30,31,32,33,35,36]
ABCD_NBACK_log_ind = [5,8,9,10,11,13,14,15,16,17,25,27,28,30,31,32,33,35,36]
ABCD_SST_log_ind = [6,8,10,11,13,14,15,16,17,21,27,28,30,31,32,33,35,36]

ABCD_SC_log_ind = [6,8,10,13,14,15,16,17,25,29,31,32,33,36]
ABCD_1000_log_ind = [5,6,8,9,10,11,13,14,15,16,17,29,30,31,32,33,34,36]
ABCD_rst_intersect = np.intersect1d(ABCD_rs_log_ind, ABCD_SST_log_ind)
ABCD_rst_intersect = np.intersect1d(ABCD_rst_intersect, ABCD_NBACK_log_ind)
ABCD_rst_intersect = np.intersect1d(ABCD_rst_intersect, ABCD_MID_log_ind)
ABCD_sc1000_intersect = np.intersect1d(ABCD_rs_log_ind, ABCD_SC_log_ind)
ABCD_sc1000_intersect = np.intersect1d(ABCD_sc1000_intersect, ABCD_1000_log_ind)

In [None]:
# Dataset colors and indices
# SINGER
SINGER_subcolors = sns.color_palette("blend:thistle,deeppink", n_colors=6)
SINGER_theor_subcolors = sns.color_palette("blend:plum,darkmagenta", n_colors=6)
SINGER_lgd = [plt.Line2D([], [], marker='.', color=SINGER_subcolors[0], linestyle='None'),
          plt.Line2D([], [], marker='.', color=SINGER_subcolors[1], linestyle='None'),
          plt.Line2D([], [], marker='.', color=SINGER_subcolors[2], linestyle='None'),
          plt.Line2D([], [], marker='.', color=SINGER_subcolors[3], linestyle='None'),
          plt.Line2D([], [], marker='.', color=SINGER_subcolors[4], linestyle='None'),
          plt.Line2D([], [], marker='.', color=SINGER_subcolors[5], linestyle='None')]
SINGER_log_ind = [0,2,3,4,5,7,9,12,13,14,15,16,17,18]
SINGER_scores = np.genfromtxt(os.path.join(varname_dir,'SINGER_y_variables.txt'), \
                            dtype=str, delimiter='\n')

# TCP
TCP_subcolors = sns.color_palette("blend:lemonchiffon,goldenrod", n_colors=6)
TCP_theor_subcolors = sns.color_palette("blend:khaki,darkgoldenrod", n_colors=6)
TCP_lgd = [plt.Line2D([], [], marker='.', color=TCP_subcolors[0], linestyle='None'),
             plt.Line2D([], [], marker='.', color=TCP_subcolors[1], linestyle='None'),
             plt.Line2D([], [], marker='.', color=TCP_subcolors[2], linestyle='None'),
             plt.Line2D([], [], marker='.', color=TCP_subcolors[3], linestyle='None'),
             plt.Line2D([], [], marker='.', color=TCP_subcolors[4], linestyle='None'),
             plt.Line2D([], [], marker='.', color=TCP_subcolors[5], linestyle='None')]
TCP_log_ind = [0,5,7,9,10,14,18]
TCP_scores = np.genfromtxt(os.path.join(varname_dir,'TCP_y_variables.txt'), \
                            dtype=str, delimiter='\n')

# MDD
MDD_subcolors = sns.color_palette("blend:linen,darkorange", n_colors=10)
MDD_theor_subcolors = sns.color_palette("blend:bisque,chocolate", n_colors=10)
MDD_lgd = [plt.Line2D([], [], marker='.', color=MDD_subcolors[0], linestyle='None'),
             plt.Line2D([], [], marker='.', color=MDD_subcolors[1], linestyle='None'),
             plt.Line2D([], [], marker='.', color=MDD_subcolors[2], linestyle='None'),
             plt.Line2D([], [], marker='.', color=MDD_subcolors[3], linestyle='None'),
             plt.Line2D([], [], marker='.', color=MDD_subcolors[4], linestyle='None'),
             plt.Line2D([], [], marker='.', color=MDD_subcolors[5], linestyle='None'),
             plt.Line2D([], [], marker='.', color=MDD_subcolors[6], linestyle='None'),
             plt.Line2D([], [], marker='.', color=MDD_subcolors[7], linestyle='None'),
             plt.Line2D([], [], marker='.', color=MDD_subcolors[8], linestyle='None'),
             plt.Line2D([], [], marker='.', color=MDD_subcolors[9], linestyle='None')]
MDD_log_ind = [0,1,3,8,9,10,19] 
MDD_scores = np.genfromtxt(os.path.join(varname_dir,'MDD_y_variables.txt'), \
                            dtype=str, delimiter='\n')

# ADNI
ADNI_subcolors = sns.color_palette("blend:lightgreen,darkgreen", n_colors=5)
ADNI_theor_subcolors = sns.color_palette("blend:springgreen,darkolivegreen", n_colors=5)
ADNI_lgd = [plt.Line2D([], [], marker='.', color=ADNI_subcolors[0], linestyle='None'),
             plt.Line2D([], [], marker='.', color=ADNI_subcolors[1], linestyle='None'),
             plt.Line2D([], [], marker='.', color=ADNI_subcolors[2], linestyle='None'),
             plt.Line2D([], [], marker='.', color=ADNI_subcolors[3], linestyle='None'),
             plt.Line2D([], [], marker='.', color=ADNI_subcolors[4], linestyle='None')]
ADNI_log_ind = [0,1,3,4,5,6] # 2 is skipped because it is diagnosis
ADNI_scores = np.genfromtxt(os.path.join(varname_dir,'ADNI_y_variables.txt'), \
                            dtype=str, delimiter='\n')

# Grey plots of what the original 36 phenotypes predict
def theoretical_curve(X_fit, Y, sub_lvl, k0, ax_id):
    # theoretical projection
    HCP_behav_ind = np.append(HCP_log_ind, 59)
    ABCD_behav_ind = np.append(ABCD_log_ind, 36)
    theor_subcolors = sns.color_palette("blend:lightgray,dimgray", n_colors=8)
    proj_val = np.zeros((len(X_fit),(len(HCP_behav_ind) + len(ABCD_behav_ind))))
    w_r_all,w_hcp,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir)
    n=0
    for i in HCP_behav_ind:
        # Tom's equation fit to full duration
        proj_val[:,n] += np.sqrt(1/(1 + (w_hcp[i,-1,1]/Y[sub_lvl]) + (w_hcp[i,-1,2]/(Y[sub_lvl]*X_fit))))
        n += 1
    # load ABCD results
    w_r_all,w_abcd,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir)
    for i in ABCD_behav_ind:
        # Tom's equation fit to full duration
        proj_val[:,n] += np.sqrt(1/(1 + (w_abcd[i,-1,1]/Y[sub_lvl]) + (w_abcd[i,-1,2]/(Y[sub_lvl]*X_fit))))
        n += 1
    proj_val_mean = k0 * np.mean(proj_val,1)
    c_int = 1.96 * np.std(proj_val,1)/np.sqrt(len(HCP_behav_ind) + len(ABCD_behav_ind))
    #print(proj_val)
    ax_id.plot(Y[sub_lvl]*X_fit, proj_val_mean, color=theor_subcolors[sub_lvl])
    ax_id.fill_between(Y[sub_lvl]*X_fit, (proj_val_mean-c_int), (proj_val_mean+c_int), color=theor_subcolors[sub_lvl], alpha=.3)


# Fig 1: Contour Plots (HCP & ABCD / KRR / full / acc / corr / cognition)

In [None]:
#################################################
# plot contour plots
#################################################
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir)
# plot contour plot
con_lines = [0.3, 0.4, 0.45, 0.5]
manual_locations = [(1.5,0.3),(6.5,1.5),(12.5,1.5),(16,4)]
behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
fig,ax = plt.subplots(figsize=(3.5,3.5))
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, HCP_extent, fig, ax)
fig.savefig(os.path.join(img_dir, 'Fig1_' +
                    'HCP_KRR_full_acc_corr_cog_contour.svg'), bbox_inches='tight')

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir)
# plot contour plot
con_lines = [0.3, 0.4, 0.45, 0.5]
manual_locations = [(1,0.3),(4,1.5),(6,6),(8.5,8)]
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
fig,ax = plt.subplots(figsize=(3.5,3.5))
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, ABCD_extent, fig, ax)
fig.savefig(os.path.join(img_dir, 'Fig1_' +
                    'ABCD_KRR_full_acc_corr_cog_contour.svg'), bbox_inches='tight')

# Fig 2: Scatter Plots (HCP & ABCD component scores)

In [None]:
#################################################
# plot scatter plot against total scan time
#################################################
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir)
cog_all = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
# plot scatter plot
fig,ax = plt.subplots(figsize=(4, 3))
limit = 15
orsp.plot_scatter(6,cog_all,scan_duration,HCP_subcolors,limit,ax)
orsp.plot_scatter(6,cog_all,scan_duration,HCP_subcolors,limit,ax,outline='Y')
lgd = plt.legend(HCP_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,
                 handletextpad=0.05, frameon=False, fontsize=10)
orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                    'Prediction accuracy (r)', ax)
fig.savefig(os.path.join(img_dir,'Fig2_HCP_CogComp_58m.svg'), bbox_inches='tight')

### Extract points with same accuracy, different total scan time ####
print(scan_duration[5, 6], cog_all[5, 6]) # 700 subjects, 14 mins
print(scan_duration[1, 28], cog_all[1, 28]) # 300 subjects, 58 mins
###

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir)
cog_all = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
# plot scatter plot
fig,ax = plt.subplots(figsize=(4, 3))
limit = 10
orsp.plot_scatter(9,cog_all,scan_duration,ABCD_subcolors,limit,ax)
lgd = plt.legend(ABCD_lgd, Y, markerscale=2,
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                    'Prediction accuracy (r)', ax)
fig.savefig(os.path.join(img_dir, 'Fig2_ABCD_CogComp_20m.svg'), bbox_inches='tight')

In [None]:
#################################################
# plot individual scores
#################################################
fig,ax = plt.subplots(figsize=(8.5, 3.5))
all_scores = []
legend_handle = []
lgd_handles = []
limit = 10

### cognition
## ABCD
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
orsp.plot_norm_scatter('ABCD', 'predacc', rep_dir, 'full', limit,
                  ABCD_cog_ind, custom_colors, zorder=None)

## HCP
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_cog_ind, custom_colors, zorder=None)

### mental health
## ABCD
custom_colors = sns.color_palette("blend:lightgrey,darkgrey", n_colors=4)
orsp.plot_norm_scatter('ABCD', 'predacc', rep_dir, 'full', limit,
                  ABCD_mh_ind, custom_colors, zorder=-1)

### personality
## HCP
custom_colors = sns.color_palette("blend:powderblue,darkslateblue", n_colors=4)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_pers_ind, custom_colors, zorder=None)

### physical
## HCP
custom_colors = sns.color_palette("blend:gold,gold", n_colors=2)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_phy_ind, custom_colors, zorder=None)

### emotion
## HCP
custom_colors = sns.color_palette("blend:forestgreen,forestgreen", n_colors=2)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_emo_ind, custom_colors, zorder=None)

### well being
## HCP
custom_colors = sns.color_palette("blend:magenta,magenta", n_colors=2)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_wb_ind, custom_colors, zorder=None)

# plot fitted curve
orsp.plot_curve(200, 38000)

# figure parameters
ax.set_ylim([7, 16])
orsp.format_scatter_plot('Total scan duration (# training participants x scan time per participant)',
                    'Norm. prediction performance', ax)
fig.savefig(os.path.join(img_dir, 'Fig2_ScanTime_AllBehavCurves_20m.svg'), bbox_inches='tight')

In [None]:
#################################################
# plot individual scores
#################################################
fig,ax = plt.subplots(figsize=(8.5, 2.5))
all_scores = []
legend_handle = []
lgd_handles = []
limit = 10
    
### cognition
## ABCD
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
orsp.plot_norm_scatter('ABCD', 'predacc', rep_dir, 'full', limit,
                  ABCD_cog_ind, custom_colors, log_scale = True, zorder=None)
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_cog_ind[:-1]], ['ABCD Cog. Factor']))

## HCP
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_cog_ind, custom_colors, log_scale = True, zorder=None)
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_cog_ind[:-1]], ['Cog Factor (H)']))

### mental health
## ABCD
custom_colors = sns.color_palette("blend:lightgrey,darkgrey", n_colors=4)
orsp.plot_norm_scatter('ABCD', 'predacc', rep_dir, 'full', limit,
                  ABCD_mh_ind, custom_colors, log_scale = True, zorder=-1)
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_mh_ind]))

### personality
## HCP
custom_colors = sns.color_palette("blend:powderblue,darkslateblue", n_colors=4)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_pers_ind, custom_colors, log_scale = True, zorder=None)
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_pers_ind]))

### physical
## HCP
custom_colors = sns.color_palette("blend:gold,gold", n_colors=2)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_phy_ind, custom_colors, log_scale = True, zorder=None)
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_phy_ind]))
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_emo_ind]))

### emotion
## HCP
custom_colors = sns.color_palette("blend:forestgreen,forestgreen", n_colors=2)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_emo_ind, custom_colors, log_scale = True, zorder=None)

### well being
## HCP
custom_colors = sns.color_palette("blend:magenta,magenta", n_colors=2)
orsp.plot_norm_scatter('HCP', 'predacc', rep_dir, 'full', limit,
                  HCP_wb_ind, custom_colors, log_scale = True, zorder=None)
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_wb_ind]))

## Add legend
# Add the custom legend handle to the legend
lgd = plt.legend(all_scores, handletextpad=0.01, bbox_to_anchor=[1.06, -0.27],
           fontsize=9, ncol=6, columnspacing=0.5, frameon=False)

# plot log curve
X_fit = np.linspace(350, 38000, num=100, dtype=int)
curve_val = np.log(X_fit) / np.log(2)
plt.plot(np.log(X_fit)/ np.log(2), curve_val, color='k')

# figure parameters
ax.set_ylim([6, 16])
ax.set_xlim([8, 15.5])
orsp.format_scatter_plot('log\N{SUBSCRIPT TWO}(Total Scan Duration)',
                    'Norm. prediction performance', ax)

fig.savefig(os.path.join(img_dir, 'Fig2_ScanTime_Log_AllBehavCurves_20m.svg'), bbox_inches='tight')

In [None]:
### COD for 36 ABCD and HCP phenotypes for log (20mins)
log_cod = []
# ABCD
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir)
for b in ABCD_rs_log_ind:
    log_cod.append(loss_log_all[b,10-3])
print(len(log_cod), np.mean(log_cod))
# HCP
log_cod = []
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
for b in HCP_rs_log_ind:
    log_cod.append(loss_log_all[b,10-3])
print(len(log_cod), np.mean(log_cod))

# Fig 3: Acc N and T are not equivalent

In [None]:
#################################################
# Violin plot comparing same total scan time
#################################################
# HCP: 6000 total scan time but different N and T
b = 59
n_seeds = 50
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
mat = os.path.join(HCP_img_dir,'acc_KRR_avg_indiv_corr_landscape.mat')
res = scipy.io.loadmat(mat)

# extract points with 6000m of total scan time
full_df = pd.DataFrame()
tenm_vals = []
twenm_vals = []
thirm_vals = []
fourm_vals = []
fiftm_vals = []
sixtm_vals = []
for n in range(0,n_seeds):
    behav = res['acc_landscape'][:,:,n,b]
    dten = pd.DataFrame(data={'acc': [behav[4,1]]})
    dten['Time'] = '10'
    dten['Subs'] = '600subs'
    tenm_vals.append(behav[4,1])
    dtwen = pd.DataFrame(data={'acc': [behav[9,4]]})
    dtwen['Time'] = '20'
    dtwen['Subs'] = '300subs'
    twenm_vals.append(behav[9,4])
    dthir = pd.DataFrame(data={'acc': [behav[14,5]]})
    dthir['Time'] = '30'
    dthir['Subs'] = '200subs'
    thirm_vals.append(behav[14,5])
    dfour = pd.DataFrame(data={'acc': [behav[19,6]]})
    dfour['Time'] = '40'
    dfour['Subs'] = '150subs'
    fourm_vals.append(behav[19,6])
    dfift = pd.DataFrame(data={'acc': [behav[24,7]]})
    dfift['Time'] = '50'
    dfift['Subs'] = '120subs'
    fiftm_vals.append(behav[24,7])
    dsixt = pd.DataFrame(data={'acc': [behav[28,8]]})
    dsixt['Time'] = '~60'
    dsixt['Subs'] = '100subs'
    sixtm_vals.append(behav[28,8])
    full_df = pd.concat([full_df, dten, dtwen, dthir, dfour, dfift, dsixt])

# plot violin plot
fig,ax = plt.subplots(figsize=(4, 3))
vp = sns.violinplot(data=full_df, x="Time", y="acc", palette="Blues_r",orient='v', width=0.9)
orsp.format_scatter_plot("", 'Prediction accuracy (r)', ax)
vp.set(xticklabels=[])
ax.set_xlim([-0.7,5.7])
fig.savefig(os.path.join(img_dir,'Fig3_HCP_not1to1_violin.svg'), bbox_inches='tight')

# mean accuracy
print('10m mean:', np.mean(tenm_vals))
print('20m mean:', np.mean(twenm_vals))
print('30m mean:', np.mean(thirm_vals))
print('40m mean:', np.mean(fourm_vals))
print('50m mean:', np.mean(fiftm_vals))
print('60m mean:', np.mean(sixtm_vals))

# stats 
print('10m vs 20m:', orsp.corrected_resample_ttest([a - b for a, b in zip(tenm_vals, twenm_vals)], 1/9, 0))
print('20m vs 30m:', orsp.corrected_resample_ttest([a - b for a, b in zip(twenm_vals, thirm_vals)], 1/9, 0))
print('30m vs 40m:', orsp.corrected_resample_ttest([a - b for a, b in zip(thirm_vals, fourm_vals)], 1/9, 0))
print('40m vs 50m:', orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, fiftm_vals)], 1/9, 0))
print('50m vs 58m:', orsp.corrected_resample_ttest([a - b for a, b in zip(fiftm_vals, sixtm_vals)], 1/9, 0))

# start saving pvals for FDR
p_list = []
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(tenm_vals, twenm_vals)], 1/9, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(twenm_vals, thirm_vals)], 1/9, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(thirm_vals, fourm_vals)], 1/9, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, fiftm_vals)], 1/9, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(fiftm_vals, sixtm_vals)], 1/9, 0))

In [None]:
#################################################
# Tom's theoretical equations
#################################################
c_vers = 'full'
# load HCP results
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir)
b = 59 # Cognition Factor
behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,b].T),1)
# scatter plot with theoretical equation fit
fig,ax = plt.subplots(figsize=(4, 3))
orsp.plot_scatter(len(Y),behav,scan_duration,HCP_subcolors,len(X), ax)
# Tom's equation fit to full duration
w = w_pa_all[b,-1,:]
X_fit = np.linspace(2, 60, num=100, dtype=int)
for sub_lvl in range(0,len(Y)):
    curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
    plt.plot(Y[sub_lvl]*X_fit, curve_val, color=HCP_theor_subcolors[sub_lvl])
lgd = plt.legend(HCP_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,
                 handletextpad=0.05, frameon=False, fontsize=10)
orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                    'Prediction accuracy (r)', ax)
fig.savefig(os.path.join(img_dir,'Fig3_HCP_CogFac_Theoretical_acc.svg'), bbox_inches='tight')

In [None]:
### COD for 36 ABCD and HCP phenotypes for log (20mins)
theor_cod = []
# ABCD
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir)
for b in ABCD_rs_log_ind:
    theor_cod.append(loss_pa_all[b,-1])
print(len(theor_cod), np.mean(theor_cod))
# HCP
theor_cod = []
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
for b in HCP_rs_log_ind:
    theor_cod.append(loss_pa_all[b,-1])
print(len(theor_cod), np.mean(theor_cod))

In [None]:
### t-test comparing log and theoretical models for 36 ABCD and HCP phenotypes
# ABCD
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir)
theor_vals = []
log_vals = []
for b in ABCD_rs_log_ind:
    theor_vals.append(loss_pa_all[b,-1])
    log_vals.append(loss_log_all[b,-1])
print(len(theor_vals), np.mean(theor_vals),np.mean(log_vals))
print(scipy.stats.ttest_rel(log_vals, theor_vals))

# HCP
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir)
theor_vals = []
log_vals = []
for b in HCP_rs_log_ind:
    theor_vals.append(loss_pa_all[b,-1])
    log_vals.append(loss_log_all[b,-1])
print(len(theor_vals), np.mean(theor_vals),np.mean(log_vals))
print(scipy.stats.ttest_rel(log_vals, theor_vals))

# Fig 4: Why behaviors don't follow log curve

In [None]:
#################################################
# Example of high prediction acc vs low prediction acc
#################################################
c_vers = 'full'
# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir, vers=c_vers)
b=8
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
# plot scatter plot
fig,ax = plt.subplots(figsize=(4, 3))
orsp.plot_scatter(9,behav,scan_duration,ABCD_subcolors,10,ax)
lgd = plt.legend(ABCD_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,
                 handletextpad=0.05, columnspacing=0.7, frameon=False, fontsize=10)
orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                    'Prediction accuracy (r)', ax, fontsz=10)
fig.savefig(os.path.join(img_dir,'Fig4_ABCD_Vocabulary_HiAcc.svg'), bbox_inches='tight')

b=0
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
# plot scatter plot
fig,ax = plt.subplots(figsize=(4, 3))
orsp.plot_scatter(9,behav,scan_duration,ABCD_subcolors,10,ax)
lgd = plt.legend(ABCD_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,
                 handletextpad=0.05, columnspacing=0.7, frameon=False, fontsize=10)
orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                    'Prediction accuracy (r)', ax, fontsz=10)
fig.savefig(os.path.join(img_dir,'Fig4_ABCD_AnxDep_LoAcc.svg'), bbox_inches='tight')

In [None]:
#################################################
# Example of state effect
#################################################
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir, vers='full')
b=43
cog_all = np.flip(np.flip(HCP_res['acc_landscape'][:,:,b].T),1)
# plot scatter plot
fig,ax = plt.subplots(figsize=(4,3))
orsp.plot_scatter(6,cog_all,scan_duration,HCP_subcolors,58,ax)
lgd = plt.legend(HCP_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,
                 handletextpad=0.05, columnspacing=0.7, frameon=False, fontsize=10)
orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                    'Prediction accuracy (r)', ax, fontsz=10)
fig.savefig(os.path.join(img_dir,'Fig5_HCP_AngAgr_NoShuff.svg'), bbox_inches='tight')

HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir, vers='random')
cog_all = np.flip(np.flip(HCP_res['acc_landscape'][:,:,b].T),1)
# plot scatter plot
fig,ax = plt.subplots(figsize=(4,3))
orsp.plot_scatter(6,cog_all,scan_duration,HCP_subcolors,58,ax)
lgd = plt.legend(HCP_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,
                 handletextpad=0.05, columnspacing=0.7, frameon=False, fontsize=10)
orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                    'Prediction accuracy (r)', ax, fontsz=10)
fig.savefig(os.path.join(img_dir,'Fig5_HCP_AngAgr_Shuff.svg'), bbox_inches='tight')

In [None]:
b=43
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir,vers='full')
print(loss_log_all[b,10-3], loss_log_all[b,29-3], loss_pa_all[b,29-3])
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir,vers='random')
print(loss_log_all[b,10-3], loss_log_all[b,29-3], loss_pa_all[b,29-3])

In [None]:
#################################################
# Acc against fit (ABCD)
#################################################
c_vers = 'full'
# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir, vers=c_vers)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers=c_vers)
# score classifications
bpass = []
cog_ind = np.array([9,10,11,12,13,14,15,16,17,18,31,32,33,34,35,36,37])-1
per_ind = np.array([19,20,21,22,23,24,25,26,27])-1
mh_ind = np.array([1,2,3,4,5,6,7,8,28,29,30])-1
acc_all = np.array([])
log_loss = np.array([])
pa_loss = np.array([])
log_limit = 10
pa_limit = 10
# get scores with looser threshold
for b_idx in range(0,37):
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b_idx].T),1)
    if (np.sum(behav.flatten() < 0) < 10): # remove scores with > 10% negative predictions
        bpass.append(b_idx)
    acc_all = np.append(acc_all, behav[8,9])
    log_loss = np.append(log_loss, loss_log_all[b_idx,log_limit-3])
    pa_loss = np.append(pa_loss, loss_pa_all[b_idx,pa_limit-3])
for i in cog_ind:
    if i not in bpass:
        cog_ind = np.delete(cog_ind, np.where(cog_ind==i))
for i in per_ind:
    if i not in bpass:
        per_ind = np.delete(per_ind, np.where(per_ind==i))
for i in mh_ind:
    if i not in bpass:
        mh_ind = np.delete(mh_ind, np.where(mh_ind==i)) 
# final list of scores for reference
print("Scores < 10% negative:", bpass)
        
# log fit at 20m
print("Log Fit:", scipy.stats.spearmanr(log_loss[bpass],acc_all[bpass]))
fig,ax = plt.subplots(figsize=(4,3))
plt.scatter(acc_all[cog_ind],log_loss[cog_ind], c='orangered')
plt.scatter(acc_all[per_ind],log_loss[per_ind], c='darkslateblue')
plt.scatter(acc_all[mh_ind],log_loss[mh_ind], c='darkgray')
plt.legend(['Cognition', 'Personality', 'Mental Health'], loc="lower right", frameon=False,
             prop={'family' : 'Arial'}, labelspacing=0.1,handletextpad=0.05, fontsize=10)
orsp.format_scatter_plot('Prediction accuracy (r)','Goodness of fit (COD)',ax, fontsz=10)
# plot trend line
# calculate polynomial
y = log_loss[bpass]
x = acc_all[bpass]
z = np.polyfit(x, y, 4)
f = np.poly1d(z)
# calculate new x's and y's
x_new = np.linspace(np.min(x), np.max(x), 50)
y_new = f(x_new)
y_new = 1 - np.exp(-10*x_new)
plt.plot(x_new, y_new, color='k',linestyle='dashed')
fig.savefig(os.path.join(img_dir,'Fig4_ABCD_AccvsFit_Log.svg'), bbox_inches='tight')
# save pvals for FDR
c,p = scipy.stats.spearmanr(log_loss[bpass],acc_all[bpass])
p_list.append(p)
# Nichols fit at 20m
print("Theoretical Fit:", scipy.stats.spearmanr(pa_loss[bpass],acc_all[bpass]))
fig,ax = plt.subplots(figsize=(4,3))
plt.scatter(acc_all[cog_ind],pa_loss[cog_ind], c='orangered')
plt.scatter(acc_all[per_ind],pa_loss[per_ind], c='darkslateblue')
plt.scatter(acc_all[mh_ind],pa_loss[mh_ind], c='darkgray')
plt.legend(['Cognition', 'Personality', 'Mental Health'], loc="lower right", frameon=False,
           prop={'family' : 'Arial'}, labelspacing=0.1,handletextpad=0.05, fontsize=10)
orsp.format_scatter_plot('Prediction accuracy (r)','Goodness of fit (COD)',ax, fontsz=10)
# plot trend line
# calculate new x's and y's
x_new = np.linspace(np.min(x), np.max(x), 50)
y_new = 1 - np.exp(-10*x_new)
plt.plot(x_new, y_new, color='k',linestyle='dashed')
fig.savefig(os.path.join(img_dir,'Fig4_ABCD_AccvsFit_Theoretical.svg'), bbox_inches='tight')
# save pvals for FDR
c,p = scipy.stats.spearmanr(pa_loss[bpass],acc_all[bpass])
p_list.append(p)

#################################################
# Improvement to fit after shuffling (ABCD)
#################################################
# load ABCD data
w_r_f,w_pa_f,zk_f,loss_r_f,loss_pa_f,loss_log_f = orsp.load_fits('ABCD','predacc',rep_dir,vers='full')
w_r_r,w_pa_r,zk_r,loss_r_r,loss_pa_r,loss_log_r = orsp.load_fits('ABCD','predacc',rep_dir,vers='random')
full_df = pd.DataFrame()

# t test for selected behaviors
print("Log:", scipy.stats.ttest_rel(loss_log_f[bpass,pa_limit-3], loss_log_r[bpass,pa_limit-3]))
df = pd.DataFrame(data={'COD': loss_log_f[bpass,pa_limit-3]})
df['Domain'] = 'Log Fit'
df['Class'] = 'Original'
dr = pd.DataFrame(data={'COD': loss_log_r[bpass,pa_limit-3]})
dr['Domain'] = 'Log Fit'
dr['Class'] = 'Randomized'
full_df = pd.concat([full_df, df, dr])
print("Theoretical:", scipy.stats.ttest_rel(loss_pa_f[bpass,pa_limit-3], loss_pa_r[bpass,pa_limit-3]))
df = pd.DataFrame(data={'COD': loss_pa_f[bpass,pa_limit-3]})
df['Domain'] = 'Theoretical Fit'
df['Class'] = 'Original'
dr = pd.DataFrame(data={'COD': loss_pa_r[bpass,pa_limit-3]})
dr['Domain'] = 'Theoretical Fit'
dr['Class'] = 'Randomized'
full_df = pd.concat([full_df, df, dr])
# plot box plot
fig,ax = plt.subplots(figsize=(4, 3))
sns.boxplot(data=full_df, x="Domain", y="COD", hue="Class",palette="Reds",orient='v')
plt.legend(frameon=False, fontsize=10,  prop={'family' : 'Arial'}, bbox_to_anchor=(0.68,0.23))
orsp.format_scatter_plot('','Goodness of fit (COD)',ax, fontsz=10)
fig.savefig(os.path.join(img_dir,'Fig5_ABCD_origvsrandom.svg'), bbox_inches='tight')
# save pvals for FDR
print(np.mean(loss_log_f[bpass,pa_limit-3]), np.mean(loss_log_r[bpass,pa_limit-3]))
print(np.mean(loss_pa_f[bpass,pa_limit-3]), np.mean(loss_pa_r[bpass,pa_limit-3]))
log_p = scipy.stats.ttest_rel(loss_log_f[bpass,pa_limit-3], loss_log_r[bpass,pa_limit-3])
theor_p = scipy.stats.ttest_rel(loss_pa_f[bpass,pa_limit-3], loss_pa_r[bpass,pa_limit-3])
p_list.append(log_p[1])
p_list.append(theor_p[1])

In [None]:
w_r_f,w_pa_f,zk_f,loss_r_f,loss_pa_f,loss_log_f = orsp.load_fits('ABCD','predacc',rep_dir,vers='full')
timelimit = 10

# loose threshold
b_set = bpass
print("(loose threshold) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_pa_f[b_set,timelimit-3]))

# strict threshold
b_set =np.append(ABCD_abv01_ind, 37)
print("(strict threshold) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_pa_f[b_set,timelimit-3]))

# log ind
b_set =np.append(ABCD_log_ind, 37)
print("(log ind) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_pa_f[b_set,timelimit-3]))

In [None]:
w_r_f,w_pa_f,zk_f,loss_r_f,loss_pa_f,loss_log_f = orsp.load_fits('ABCD','tstats',rep_dir,vers='full')
timelimit = 5

# loose threshold
b_set = bpass
print("(loose threshold) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_r_f[b_set,timelimit-3]))

# strict threshold
b_set =np.append(ABCD_abv01_ind, 37)
print("(strict threshold) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_r_f[b_set,timelimit-3]))

# log ind
b_set =np.append(ABCD_log_ind, 37)
print("(log ind) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_r_f[b_set,timelimit-3]))

In [None]:
#################################################
# Acc against fit (HCP)
#################################################
c_vers = 'full'
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir, vers=c_vers)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir, vers=c_vers)
# score classifications
bpass = []
cog_ind = np.array([1,2,3,4,5,6,7,9,10,11,12,13,14,25,26,27,28,29,30,60])-1
per_ind = np.array([8,31,32,33,34,35])-1
emo_ind = np.array([24,36,37,38,39,40,41,42,43,44,45,46,47])-1
phy_ind = np.array([15,16,17,18,19,20,21,22,23])-1
wb_ind = np.array([48,49,50,51,52,53,54,55,56,57,58])-1
acc_all = np.array([])
log_loss = np.array([])
pa_loss = np.array([])
log_limit = 10
pa_limit = 29
# get scores with looser threshold
for b_idx in range(0,60):
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,b_idx].T),1)
    if (np.sum(behav.flatten() < 0) < 18) and b_idx != 58: # remove scores with > 10% negative predictions
        bpass.append(b_idx)
    acc_all = np.append(acc_all, behav[5,28])
    log_loss = np.append(log_loss, loss_log_all[b_idx,log_limit-3])
    pa_loss = np.append(pa_loss, loss_pa_all[b_idx,pa_limit-3])
for i in cog_ind:
    if i not in bpass:
        cog_ind = np.delete(cog_ind, np.where(cog_ind==i))
for i in per_ind:
    if i not in bpass:
        per_ind = np.delete(per_ind, np.where(per_ind==i))
for i in emo_ind:
    if i not in bpass:
        emo_ind = np.delete(emo_ind, np.where(emo_ind==i))
for i in phy_ind:
    if i not in bpass:
        phy_ind = np.delete(phy_ind, np.where(phy_ind==i))
for i in wb_ind:
    if i not in bpass:
        wb_ind = np.delete(wb_ind, np.where(wb_ind==i))  
# final list of scores for reference
print("Scores < 10% negative:", bpass)
        
# log fit at 20m
print("Log Fit:", scipy.stats.spearmanr(log_loss[bpass],acc_all[bpass]))
fig,ax = plt.subplots(figsize=(4,3))
plt.scatter(acc_all[cog_ind],log_loss[cog_ind], c='orangered')
plt.scatter(acc_all[per_ind],log_loss[per_ind], c='darkslateblue')
plt.scatter(acc_all[emo_ind],log_loss[emo_ind], c='forestgreen')
plt.scatter(acc_all[phy_ind],log_loss[phy_ind], c='goldenrod')
plt.scatter(acc_all[wb_ind],log_loss[wb_ind], c='deeppink')
plt.legend(['Cognition','Personality','Emotion', 'Physical','Well-being'], prop={'family' : 'Arial'},
           loc="lower right", labelspacing=0.1,handletextpad=0.05, frameon=False, fontsize=10)
orsp.format_scatter_plot('Prediction accuracy (r)','Goodness of fit (COD)',ax,fontsz=10)
# plot trend line
# calculate polynomial
y = log_loss[bpass]
x = acc_all[bpass]
z = np.polyfit(x, y, 2)
f = np.poly1d(z)
# calculate new x's and y's
x_new = np.linspace(np.min(x), np.max(x), 50)
y_new = f(x_new)
y_new = 0.95 - 1.3*np.exp(-14.5*x_new)
#y_new = 1 - np.exp(-10*x_new)
plt.plot(x_new, y_new, color='k',linestyle='dashed')
fig.savefig(os.path.join(img_dir,'Fig4_HCP_AccvsFit_Log.svg'), bbox_inches='tight')
# save pvals for FDR
c,p = scipy.stats.spearmanr(log_loss[bpass],acc_all[bpass])
p_list.append(p)

# Nichols fit at 58m
print("Theoretical Fit:", scipy.stats.spearmanr(pa_loss[bpass],acc_all[bpass]))
fig,ax = plt.subplots(figsize=(4,3))
plt.scatter(acc_all[cog_ind],pa_loss[cog_ind], c='orangered')
plt.scatter(acc_all[per_ind],pa_loss[per_ind], c='darkslateblue')
plt.scatter(acc_all[emo_ind],pa_loss[emo_ind], c='forestgreen')
plt.scatter(acc_all[phy_ind],pa_loss[phy_ind], c='goldenrod')
plt.scatter(acc_all[wb_ind],pa_loss[wb_ind], c='deeppink')
plt.legend(['Cognition','Personality','Emotion', 'Physical','Well-being'], prop={'family' : 'Arial'},
           loc="lower right", labelspacing=0.1,handletextpad=0.05, frameon=False, fontsize=10)
orsp.format_scatter_plot('Prediction accuracy (r)','Goodness of fit (COD)',ax, fontsz=10)
# plot trend line
# calculate polynomial
y = log_loss[bpass]
x = acc_all[bpass]
z = np.polyfit(x, y, 4)
f = np.poly1d(z)
# calculate new x's and y's
x_new = np.linspace(np.min(x), np.max(x), 50)
y_new = f(x_new)
y_new = 1 - np.exp(-10*x_new)
plt.plot(x_new, y_new, color='k',linestyle='dashed')
fig.savefig(os.path.join(img_dir,'Fig4_HCP_AccvsFit_Theoretical.svg'), bbox_inches='tight')
# save pvals for FDR
c,p = scipy.stats.spearmanr(pa_loss[bpass],acc_all[bpass])
p_list.append(p)


#################################################
# Improvement to fit after shuffling (HCP)
#################################################
# load HCP data
w_r_f,w_pa_f,zk_f,loss_r_f,loss_pa_f,loss_log_f = orsp.load_fits('HCP','predacc',rep_dir,vers='full')
w_r_r,w_pa_r,zk_r,loss_r_r,loss_pa_r,loss_log_r = orsp.load_fits('HCP','predacc',rep_dir,vers='random')
full_df = pd.DataFrame()

# t test for selected behaviors
# use all 58 min for comparison
print("Log:", scipy.stats.ttest_rel(loss_log_f[bpass,pa_limit-3], loss_log_r[bpass,pa_limit-3]))
df = pd.DataFrame(data={'COD': loss_log_f[bpass,pa_limit-3]})
df['Domain'] = 'Log Fit'
df['Class'] = 'Original'
dr = pd.DataFrame(data={'COD': loss_log_r[bpass,pa_limit-3]})
dr['Domain'] = 'Log Fit'
dr['Class'] = 'Randomized'
full_df = pd.concat([full_df, df, dr])

print("Theoretical:", scipy.stats.ttest_rel(loss_pa_f[bpass,pa_limit-3], loss_pa_r[bpass,pa_limit-3]))
df = pd.DataFrame(data={'COD': loss_pa_f[bpass,pa_limit-3]})
df['Domain'] = 'Theoretical Fit'
df['Class'] = 'Original'
dr = pd.DataFrame(data={'COD': loss_pa_r[bpass,pa_limit-3]})
dr['Domain'] = 'Theoretical Fit'
dr['Class'] = 'Randomized'
full_df = pd.concat([full_df, df, dr])
# plot box plot
fig,ax = plt.subplots(figsize=(4, 3))
sns.boxplot(data=full_df, x="Domain", y="COD", hue="Class",palette="Blues",orient='v')
plt.legend(frameon=False,  prop={'family' : 'Arial'}, fontsize=10)
orsp.format_scatter_plot('','Goodness of fit (COD)',ax,fontsz=10)
fig.savefig(os.path.join(img_dir,'Fig5_HCP_origvsrandom.svg'), bbox_inches='tight')
# save pvals for FDR
print(np.mean(loss_log_f[bpass,pa_limit-3]), np.mean(loss_log_r[bpass,pa_limit-3]))
print(np.mean(loss_pa_f[bpass,pa_limit-3]), np.mean(loss_pa_r[bpass,pa_limit-3]))
log_p = scipy.stats.ttest_rel(loss_log_f[bpass,pa_limit-3], loss_log_r[bpass,pa_limit-3])
theor_p = scipy.stats.ttest_rel(loss_pa_f[bpass,pa_limit-3], loss_pa_r[bpass,pa_limit-3])
p_list.append(log_p[1])
p_list.append(theor_p[1])

In [None]:
w_r_f,w_pa_f,zk_f,loss_r_f,loss_pa_f,loss_log_f = orsp.load_fits('HCP','predacc',rep_dir,vers='full')
timelimit = 10

# loose threshold
b_set = bpass
print("(loose threshold) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_pa_f[b_set,timelimit-3]))

# strict threshold
b_set =np.append(HCP_abv01_ind, 59)
print("(strict threshold) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_pa_f[b_set,timelimit-3]))

# log ind
b_set =np.append(HCP_log_ind, 59)
print("(log ind) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_pa_f[b_set,timelimit-3]))

In [None]:
w_r_f,w_pa_f,zk_f,loss_r_f,loss_pa_f,loss_log_f = orsp.load_fits('HCP','Haufe',rep_dir,vers='random')
timelimit = 5

# loose threshold
b_set = bpass
print("(loose threshold) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_r_f[b_set,timelimit-3]))

# strict threshold
b_set =np.append(HCP_abv01_ind, 59)
print("(strict threshold) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_r_f[b_set,timelimit-3]))

# log ind
b_set =np.append(HCP_log_ind, 59)
print("(log ind) log:", np.mean(loss_log_f[b_set,timelimit-3]), "theoretical:", np.mean(loss_r_f[b_set,timelimit-3]))

# Fig 6: Theoretical N vs T improvement

In [None]:
#################################################
# Tom's theoretical equations without overhead cost
#################################################
theoretical_extent = [1,120,10,10000]
theoretical_Y = np.array([10,50,100,200,300,400,500,1000,2000,3000,4000,5000,10000,20000,30000,400000,50000])
theoretical_Y = np.exp(np.linspace(2, 14, num=10000))
theoretical_X = np.linspace(1, 200, num=1000)

ABCD_theor = 0
HCP_theor = 0
num_scores = 0
training_factor = 0.9

# load HCP results
HCP_behav_ind = HCP_rs_log_ind
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir)
theor_vals = np.zeros((10000,1000))

for b in HCP_behav_ind:
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    for sub_lvl in range(0,len(theoretical_Y)):
        training_y = np.floor(training_factor*theoretical_Y[sub_lvl])
        theor_vals[sub_lvl,:] += np.sqrt(1/(1 + (w[1]/training_y) + (w[2]/(training_y*theoretical_X))))
    ABCD_theor += np.sqrt(1/(1 + (w[1]/2565) + (w[2]/(2565*20)))) 
    HCP_theor += np.sqrt(1/(1 + (w[1]/792) + (w[2]/(792*57.6)))) 
    num_scores += 1
    
# load ABCD results
ABCD_behav_ind = ABCD_rs_log_ind
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir)
for b in ABCD_behav_ind:
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    for sub_lvl in range(0,len(theoretical_Y)):
        training_y = np.floor(training_factor*theoretical_Y[sub_lvl])
        theor_vals[sub_lvl,:] += np.sqrt(1/(1 + (w[1]/training_y) + (w[2]/(training_y*theoretical_X))))
    ABCD_theor += np.sqrt(1/(1 + (w[1]/2565) + (w[2]/(2565*20)))) 
    HCP_theor += np.sqrt(1/(1 + (w[1]/792) + (w[2]/(792*57.6)))) 
    num_scores += 1

# load SINGER results
SINGER_behav_ind = SINGER_log_ind
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('SINGER', 'predacc', rep_dir)
for b in SINGER_behav_ind:
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    for sub_lvl in range(0,len(theoretical_Y)):
        training_y = np.floor(training_factor*theoretical_Y[sub_lvl])
        theor_vals[sub_lvl,:] += np.sqrt(1/(1 + (w[1]/training_y) + (w[2]/(training_y*theoretical_X))))
    ABCD_theor += np.sqrt(1/(1 + (w[1]/2565) + (w[2]/(2565*20)))) 
    HCP_theor += np.sqrt(1/(1 + (w[1]/792) + (w[2]/(792*57.6)))) 
    num_scores += 1
    
# load TCP results
TCP_behav_ind = TCP_log_ind
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('TCP', 'predacc', rep_dir)
for b in TCP_behav_ind:
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    for sub_lvl in range(0,len(theoretical_Y)):
        training_y = np.floor(training_factor*theoretical_Y[sub_lvl])
        theor_vals[sub_lvl,:] += np.sqrt(1/(1 + (w[1]/training_y) + (w[2]/(training_y*theoretical_X))))
    ABCD_theor += np.sqrt(1/(1 + (w[1]/2565) + (w[2]/(2565*20)))) 
    HCP_theor += np.sqrt(1/(1 + (w[1]/792) + (w[2]/(792*57.6)))) 
    num_scores += 1

# load MDD results
MDD_behav_ind = MDD_log_ind
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('MDD', 'predacc', rep_dir)
for b in MDD_behav_ind:
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    for sub_lvl in range(0,len(theoretical_Y)):
        training_y = np.floor(training_factor*theoretical_Y[sub_lvl])
        theor_vals[sub_lvl,:] += np.sqrt(1/(1 + (w[1]/training_y) + (w[2]/(training_y*theoretical_X))))
    ABCD_theor += np.sqrt(1/(1 + (w[1]/2565) + (w[2]/(2565*20)))) 
    HCP_theor += np.sqrt(1/(1 + (w[1]/792) + (w[2]/(792*57.6)))) 
    num_scores += 1
    
# load ADNI results
ADNI_behav_ind = ADNI_log_ind
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ADNI', 'predacc', rep_dir)
for b in ADNI_behav_ind:
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    for sub_lvl in range(0,len(theoretical_Y)):
        training_y = np.floor(training_factor*theoretical_Y[sub_lvl])
        theor_vals[sub_lvl,:] += np.sqrt(1/(1 + (w[1]/training_y) + (w[2]/(training_y*theoretical_X))))
    ABCD_theor += np.sqrt(1/(1 + (w[1]/2565) + (w[2]/(2565*20)))) 
    HCP_theor += np.sqrt(1/(1 + (w[1]/792) + (w[2]/(792*57.6)))) 
    num_scores += 1
    
# load ABCD MID results
ABCD_behav_ind = ABCD_MID_log_ind
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='full_MID')
for b in ABCD_behav_ind:
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    for sub_lvl in range(0,len(theoretical_Y)):
        training_y = np.floor(training_factor*theoretical_Y[sub_lvl])
        theor_vals[sub_lvl,:] += np.sqrt(1/(1 + (w[1]/training_y) + (w[2]/(training_y*theoretical_X))))
    ABCD_theor += np.sqrt(1/(1 + (w[1]/2565) + (w[2]/(2565*20)))) 
    HCP_theor += np.sqrt(1/(1 + (w[1]/792) + (w[2]/(792*57.6)))) 
    num_scores += 1
    
# load ABCD NBACK results
ABCD_behav_ind = ABCD_NBACK_log_ind
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='full_NBACK')
for b in ABCD_behav_ind:
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    for sub_lvl in range(0,len(theoretical_Y)):
        training_y = np.floor(training_factor*theoretical_Y[sub_lvl])
        theor_vals[sub_lvl,:] += np.sqrt(1/(1 + (w[1]/training_y) + (w[2]/(training_y*theoretical_X))))
    ABCD_theor += np.sqrt(1/(1 + (w[1]/2565) + (w[2]/(2565*20)))) 
    HCP_theor += np.sqrt(1/(1 + (w[1]/792) + (w[2]/(792*57.6)))) 
    num_scores += 1
    
# load ABCD SST results
ABCD_behav_ind = ABCD_SST_log_ind
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='full_SST')
for b in ABCD_behav_ind:
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    for sub_lvl in range(0,len(theoretical_Y)):
        training_y = np.floor(training_factor*theoretical_Y[sub_lvl])
        theor_vals[sub_lvl,:] += np.sqrt(1/(1 + (w[1]/training_y) + (w[2]/(training_y*theoretical_X))))
    ABCD_theor += np.sqrt(1/(1 + (w[1]/2565) + (w[2]/(2565*20)))) 
    HCP_theor += np.sqrt(1/(1 + (w[1]/792) + (w[2]/(792*57.6)))) 
    num_scores += 1
    
    
theor_vals = theor_vals / num_scores
theor_vals = np.round(theor_vals,2) + 0.01

ABCD_theor = ABCD_theor / num_scores
HCP_theor = HCP_theor / num_scores

print(num_scores)
print("ABCD:", ABCD_theor, "HCP:", HCP_theor)

con_lines = [0.4,0.5,0.6,
             0.7,0.8,0.9,1] 
              
manual_loc = [(5,2),(10,3.5),(10,4.6),
              (10,5.2),(10,6),(10,7),
              (10,11)]

## plot contour plot
#fig,ax = plt.subplots(figsize=(7, 8))
fig,ax = plt.subplots(figsize=(8, 4))
extent=[0-0.5, theoretical_X[:-1].max(),0-0.5, np.log(theoretical_Y[:-1]).max()]
theor_vals_flipped = np.flip(np.flip(theor_vals),1)
contours = plt.contour(theor_vals_flipped[::-1], con_lines,extent=extent, colors='black')
plt.clabel(contours, inline=True, manual=manual_loc, fontsize=12)
# overlay colours
plt.imshow(theor_vals_flipped[::-1], extent=extent, origin='lower',
            aspect='auto', cmap='rainbow', alpha=0.5)
ax.set_yscale("log")
ax.yaxis.set_major_formatter(ScalarFormatter())
ax.minorticks_off()
custom_y_ticks = np.concatenate((np.arange(2.5, 7, step=0.5),np.arange(7, 14, step=1)))
custom_y_ticks = [2.3, 3.0, 3.401, 3.912, 4.605, 5.521, 
                  6.2146, 6.908, 8.294, 9.6803, 11.1562,
                  12.89922]
ax.set_yticks(np.array(custom_y_ticks),
                np.round(np.exp(custom_y_ticks),-1).astype(int), 
                fontsize=11)
xtic = ax.get_xticks()
xtic[1] = 2
ax.set_xticks(xtic, fontsize=11)
ax.set_ylim([2.3, 14])
ax.set_xlim([2, 200])
plt.xlabel('Scan Time per Participant (mins)', fontsize=11)
plt.ylabel('# Participants', fontsize=11)
cbar = plt.colorbar(pad=0.02)

prefix = os.path.join(img_dir,'Fig6_Theoretical_PredAcc_Graph_')
fig.savefig(prefix + 'contour.svg', bbox_inches='tight')

In [None]:
#################################################
# Calculate correlation to ABCD and HCP contour plot in Fig 1
#################################################
theor_vals_unravel = np.flip(np.flip(theor_vals_flipped,1))

# N 
theoretical_Y = np.exp(np.linspace(2, 14, num=10000))
# T
theoretical_X = np.linspace(1, 200, num=1000)

HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir)
b = 59
behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,b].T),1)
pred_sav = []
actual_sav = []
t_idx = 0
for T_val in X:
    n_idx = 0
    T_diff = np.abs(theoretical_X - T_val)
    T_closest = np.argmin(T_diff)
    for N_val in Y:
        N_diff = np.abs(theoretical_Y - N_val)
        N_closest = np.argmin(N_diff)
        #print(T_val, N_val )
        pred_sav.append(theor_vals_unravel[N_closest,T_closest])
        actual_sav.append(behav[n_idx,t_idx])
        n_idx += 1
    t_idx += 1
#print(pred_sav, actual_sav )
fig,ax = plt.subplots(figsize=(6, 6))
plt.scatter(actual_sav,pred_sav)
res = scipy.stats.linregress(actual_sav,pred_sav)
xy_line = np.linspace(0.2,0.55,100)
plt.plot(xy_line , res.intercept + res.slope*xy_line , 'k', linestyle='--')
orsp.format_scatter_plot('HCP Prediction Accruacy (r)',
                    'Predicted Frac of Max Accuracy', ax)
corr_val = np.corrcoef(actual_sav,pred_sav)
ax.text(0.8,0.1,'r = ' + str(np.round(corr_val[0][1],2)), transform=ax.transAxes, size=12)

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir)
b = 36
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
pred_sav = []
actual_sav = []
t_idx = 0
for T_val in X:
    n_idx = 0
    T_diff = np.abs(theoretical_X - T_val)
    T_closest = np.argmin(T_diff)
    for N_val in Y:
        N_diff = np.abs(theoretical_Y - N_val)
        N_closest = np.argmin(N_diff)
        #print(T_val, N_val )
        pred_sav.append(theor_vals_unravel[N_closest,T_closest])
        actual_sav.append(behav[n_idx,t_idx])
        n_idx += 1
    t_idx += 1
#print(pred_sav, actual_sav )
fig,ax = plt.subplots(figsize=(6, 6))
plt.scatter(actual_sav,pred_sav)
res = scipy.stats.linregress(actual_sav,pred_sav)
xy_line = np.linspace(0.2,0.5,100)
plt.plot(xy_line , res.intercept + res.slope*xy_line , 'k', linestyle='--')
orsp.format_scatter_plot('ABCD Prediction Accruacy (r)',
                    'Predicted Frac of Max Accuracy', ax)
ax.text(0.8,0.1,'r = ' + str(np.round(corr_val[0][1],2)), transform=ax.transAxes, size=12)

In [None]:
def calc_avgAcc(N, T, trainingsize = 0.9, rd = False):
    theor_vals = []
    training_N = trainingsize * N
    if rd:
        training_N = np.floor(training_N)
    # load HCP results
    HCP_behav_ind = HCP_rs_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir)
    for b in HCP_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T)))) 
        theor_vals.append(b_acc)

    # load ABCD results
    ABCD_behav_ind = ABCD_rs_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir)
    for b in ABCD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        theor_vals.append(b_acc)
        
    # load SINGER results
    SINGER_behav_ind = SINGER_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('SINGER', 'predacc', rep_dir)
    for b in SINGER_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        theor_vals.append(b_acc)
        
    # load TCP results
    TCP_behav_ind = TCP_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('TCP', 'predacc', rep_dir)
    for b in TCP_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        theor_vals.append(b_acc)
        
    # load MDD results
    MDD_behav_ind = MDD_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('MDD', 'predacc', rep_dir)
    for b in MDD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        theor_vals.append(b_acc)
        
    # load ADNI results
    ADNI_behav_ind = ADNI_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ADNI', 'predacc', rep_dir)
    for b in ADNI_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        theor_vals.append(b_acc)
        
    # load ABCD results
    ABCD_behav_ind = ABCD_MID_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='full_MID')
    for b in ABCD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        theor_vals.append(b_acc)
        
    # load ABCD results
    ABCD_behav_ind = ABCD_NBACK_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='full_NBACK')
    for b in ABCD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        theor_vals.append(b_acc)
        
    # load ABCD results
    ABCD_behav_ind = ABCD_SST_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='full_SST')
    for b in ABCD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        theor_vals.append(b_acc)
    
    # get confidence interval
    c_int = 1.96 * np.std(theor_vals)/np.sqrt((len(theor_vals)))
        
    return np.mean(theor_vals,0),c_int

#################################################
# Tom's theoretical equations
#################################################
budgets = [10000000, 1000000, 100000]
scanner_costs = np.array([500, 1000, 2000]) / 60
recruitment_costs = [0, 500, 1000, 2000, 5000]
max_T = 200
theoretical_X = np.linspace(1, max_T, num=1000)
y_bottom_lim = [0.65, 0.25, 0.1]
perc = 1
fig = plt.figure(figsize=(8,6))
gspec = gridspec.GridSpec(3,3,hspace=0.22,wspace=0.28)

# add in various costs
curr_y = 0
for budget in budgets:
    curr_x = 0
    for scanner_cost in scanner_costs:
        
        ax = plt.subplot(gspec[curr_y,curr_x])

        # $500 per participant
        c = 'blue'
        # smooth curve
        remaining_Y = budget / (theoretical_X*scanner_cost + recruitment_costs[1])
        final_predacc, c_int = calc_avgAcc(remaining_Y,theoretical_X)
        l1,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0)
        # limits using rounded down sample size
        remaining_Y = np.floor(budget / (theoretical_X*scanner_cost + recruitment_costs[1]))
        final_predacc, c_int = calc_avgAcc(remaining_Y,theoretical_X,rd=True)
        lb,rb = orsp.calc_percOfmax(final_predacc,perc)
        print("B$"+str(budget)+", S"+ str(np.round(scanner_cost*60)) + ", P$500")
        print("Left bound: N=", remaining_Y[lb], " T=", theoretical_X[lb])
        print("Optima: N=", remaining_Y[np.argmax(final_predacc)], " T=", theoretical_X[np.argmax(final_predacc)])
        print("Right bound: N=", remaining_Y[rb], " T=", theoretical_X[rb])
        orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)
        
        # $1000 per participant
        c = 'green'
        # smooth curve
        remaining_Y = budget / (theoretical_X*scanner_cost + recruitment_costs[2])
        final_predacc, c_int = calc_avgAcc(remaining_Y,theoretical_X)
        l2,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0)
        # limits using rounded down sample size
        remaining_Y = np.floor(budget / (theoretical_X*scanner_cost + recruitment_costs[2]))
        final_predacc, c_int = calc_avgAcc(remaining_Y,theoretical_X)
        orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

        # $2000 per participant
        c = 'orange'
        # smooth curve
        remaining_Y = budget / (theoretical_X*scanner_cost + recruitment_costs[3])
        final_predacc, c_int = calc_avgAcc(remaining_Y,theoretical_X)
        l3,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0)
        # limits using rounded down sample size
        remaining_Y = np.floor(budget / (theoretical_X*scanner_cost + recruitment_costs[3]))
        final_predacc, c_int = calc_avgAcc(remaining_Y,theoretical_X)
        orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)
        lb,rb = orsp.calc_percOfmax(final_predacc,perc)
        print("B$"+str(budget)+", S"+ str(np.round(scanner_cost*60)) + ", P$2k")
        print("Left bound: N=", remaining_Y[lb], " T=", theoretical_X[lb])
        print("Optima: N=", remaining_Y[np.argmax(final_predacc)], " T=", theoretical_X[np.argmax(final_predacc)])
        print("Right bound: N=", remaining_Y[rb], " T=", theoretical_X[rb])

        # $5000 per participant
        c = 'red'
        # smooth curve
        remaining_Y = budget / (theoretical_X*scanner_cost + recruitment_costs[4])
        final_predacc, c_int = calc_avgAcc(remaining_Y,theoretical_X)
        l4,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0)
        # limits using rounded down sample size
        remaining_Y = np.floor(budget / (theoretical_X*scanner_cost + recruitment_costs[4]))
        final_predacc, c_int = calc_avgAcc(remaining_Y,theoretical_X)
        orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)
        lb,rb = orsp.calc_percOfmax(final_predacc,perc)
        print("B$"+str(budget)+", S"+ str(np.round(scanner_cost*60)) + ", P$5k")
        print("Left bound: N=", remaining_Y[lb], " T=", theoretical_X[lb])
        print("Optima: N=", remaining_Y[np.argmax(final_predacc)], " T=", theoretical_X[np.argmax(final_predacc)])
        print("Right bound: N=", remaining_Y[rb], " T=", theoretical_X[rb])
        
        # modify plotting settings
        ax.set_ylabel('')
        ax.set_xlabel('')
        ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
        ax.set_ylim([y_bottom_lim[curr_y],1.03])
        ax.set_xlim([0,max_T])
        ax.spines[['right', 'top']].set_visible(False)
        # move to next plot
        curr_x += 1
    
    # move to next plot
    curr_y += 1
    
plt.legend(handles=[l1,l2,l3,l4], 
           labels=['$500', '$1000', '$2000', '$5000'],
           title="Overhead Cost Per participant",title_fontproperties={'family' : 'Arial', 'weight':'bold', 'size': 11},
           frameon=False, bbox_to_anchor=[0.5, -0.5], ncol=5)

fig.savefig(os.path.join(img_dir,'Fig6_BudgetvsAcc.svg'), bbox_inches='tight')

# Supplementary xlsx of prediction accuracies

In [None]:
## Save prediction accuracy results
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir)
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])
xlsx_path = os.path.join(img_dir,'CBIG_ME_HCP_PredAcc.xlsx')
mode = 'w'
for b in np.append(range(0,58),59):
    behav = HCP_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=700','N=600','N=500',
                'N=400','N=300','N=200']
    df.columns = ['T=2min','T=4min','T=6min','T=8min','T=10min',
                       'T=12min','T=14min','T=16min','T=18min','T=20min',
                       'T=22min','T=24min','T=26min','T=28min','T=30min',
                       'T=32min','T=34min','T=36min','T=38min','T=40min',
                       'T=42min','T=44min','T=46min','T=48min','T=50min',
                       'T=52min','T=54min','T=56min','T=58min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        if b == 59:
            df.to_excel(writer, sheet_name = 'HCP score '+str(59))
        else:
            df.to_excel(writer, sheet_name = 'HCP score '+str(b+1))
        mode = 'a'
            
# load ABCD rest data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir)
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
xlsx_path = os.path.join(img_dir,'CBIG_ME_ABCD_rest_PredAcc.xlsx')
mode = 'w'
for b in np.append(range(0,36),36):
    behav = ABCD_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=1800','N=1600','N=1400','N=1200','N=1000',
                'N=800','N=600','N=400','N=200']
    df.columns = ['T=2min','T=4min','T=6min','T=8min','T=10min',
                       'T=12min','T=14min','T=16min','T=18min','T=20min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'ABCD score '+str(b+1))
        mode = 'a'
    
# load SINGER data
SINGER_img_dir,SINGER_res,X,Y,SINGER_extent,scan_duration = orsp.load_data('SINGER', 'predacc', rep_dir)
scores_names = SINGER_scores
xlsx_path = os.path.join(img_dir,'CBIG_ME_SINGER_PredAcc.xlsx')
mode = 'w'
for b in range(0,19):
    behav = SINGER_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=580','N=500','N=400','N=300',
                'N=200','N=100']
    df.columns = ['T=2min','T=3min','T=4min','T=5min','T=6min',
                       'T=7min','T=8min','T=9min','T=10min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'SINGER score '+str(b+1))
        mode = 'a'
    create_file = 0
    
# load TCP data
TCP_img_dir,TCP_res,X,Y,TCP_extent,scan_duration = orsp.load_data('TCP', 'predacc', rep_dir)
scores_names = TCP_scores
xlsx_path = os.path.join(img_dir,'CBIG_ME_TCP_PredAcc.xlsx')
mode = 'w'
for b in range(0,19):
    behav = TCP_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=175','N=150','N=125','N=100',
                'N=75','N=50']
    df.columns = ['T=2min','T=4min','T=6min','T=8min','T=10min',
                       'T=12min','T=14min','T=16min','T=18min',
                       'T=20min','T=22min','T=24min','T=26min',]
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'TCP score '+str(b+1))
        mode = 'a'
    create_file = 0
    
# load MDD data
MDD_img_dir,MDD_res,X,Y,MDD_extent,scan_duration = orsp.load_data('MDD', 'predacc', rep_dir)
scores_names = SINGER_scores
xlsx_path = os.path.join(img_dir,'CBIG_ME_MDD_PredAcc.xlsx')
mode = 'w'
for b in range(0,20):
    behav = MDD_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=260','N=250','N=225','N=200',
                'N=175','N=150','N=125','N=100',
                'N=75','N=50']
    df.columns = ['T=3min','T=5min','T=7min','T=9min','T=11min',
                       'T=13min','T=15min','T=17min','T=19min',
                       'T=21min','T=23min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'MDD score '+str(b+1))
        mode = 'a'
    create_file = 0
    
# load ADNI data
ADNI_img_dir,ADNI_res,X,Y,ADNI_extent,scan_duration = orsp.load_data('ADNI', 'predacc', rep_dir)
scores_names = ADNI_scores
xlsx_path = os.path.join(img_dir,'CBIG_ME_ADNI_PredAcc.xlsx')
mode = 'w'
for b in np.append(range(0,1),range(3,7)):
    behav = ADNI_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=500','N=400','N=300','N=200',
                'N=100']
    df.columns = ['T=2min','T=3min','T=4min','T=5min','T=6min',
                       'T=7min','T=8min','T=9min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        if b < 2:
            df.to_excel(writer, sheet_name = 'ADNI score '+str(b))
        else:
            df.to_excel(writer, sheet_name = 'ADNI score '+str(b+1))
        mode = 'a'
    create_file = 0

# load ABCD MID data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir,vers='full_MID')
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
xlsx_path = os.path.join(img_dir,'CBIG_ME_ABCD_MID_PredAcc.xlsx')
mode = 'w'
for b in np.append(range(0,36),36):
    behav = ABCD_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=1600','N=1400','N=1200','N=1000',
                'N=800','N=600','N=400','N=200']
    df.columns = ['T=2min','T=3min','T=4min','T=5min','T=6min',
                       'T=7min','T=8min','T=9min','T=10min','T=11min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'ABCD score '+str(b+1))
        mode = 'a'
        
# load ABCD NBACK data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir,vers='full_NBACK')
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
xlsx_path = os.path.join(img_dir,'CBIG_ME_ABCD_NBACK_PredAcc.xlsx')
mode = 'w'
for b in np.append(range(0,36),36):
    behav = ABCD_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=1600','N=1400','N=1200','N=1000',
                'N=800','N=600','N=400','N=200']
    df.columns = ['T=2min','T=3min','T=4min','T=5min','T=6min',
                       'T=7min','T=8min','T=9min','T=10min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'ABCD score '+str(b+1))
        mode = 'a'
        
# load ABCD SST data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir,vers='full_SST')
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
xlsx_path = os.path.join(img_dir,'CBIG_ME_ABCD_SST_PredAcc.xlsx')
mode = 'w'
for b in np.append(range(0,36),36):
    behav = ABCD_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=1600','N=1400','N=1200','N=1000',
                'N=800','N=600','N=400','N=200']
    df.columns = ['T=2min','T=3min','T=4min','T=5min','T=6min',
                       'T=7min','T=8min','T=9min','T=10min','T=11min','T=12min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'ABCD score '+str(b+1))
        mode = 'a'

# Supplementary xlsx of theoretical model parameters

In [None]:
## Save prediction accuracy results
xlsx_path = os.path.join(img_dir,'CBIG_ME_TheoreticalModel_Params.xlsx')

# categories
# ABCD
abcd_mh = [0,1,2,3,4,5,6,7,27,28,29]
abcd_cog = [8,9,10,11,12,13,14,15,16,17,30,31,32,33,34,35,36]
abcd_pers=[18,19,20,21,22,23,24,25,26]
# HCP
hcp_cog = [0,1,2,3,4,5,6,8,9,10,11,12,13,24,25,26,27,28,29,59]
hcp_pers = [7,30,31,32,33,34]
hcp_phy = [14,15,16,17,18,19,20,21,22]
hcp_emo = [23,35,36,37,38,39,40,41,42,43,44,45,46]
hcp_wb = [47,48,49,50,51,52,53,54,55,56,57]
# SINGER
singer_phy = [0,3,4,5]
singer_cog = [1,2,6,7,8,9,10,11,12,13,14,15,16,17,18]
# TCP
tcp_phy = [0,18]
tcp_mh = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]
# MDD
mdd_phy = [0,18]
mdd_mh = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19]
# ADNI
adni_phy = [0,1]
adni_cog = [3,4]
adni_pet = [5,6]

# load ABCD full data
c_vers = 'full'
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir,vers=c_vers)
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
full_df = pd.DataFrame()

for b in ABCD_rs_log_ind:
    w = w_pa_all[b,-1,:]
    # find score type
    cat = []
    if b in abcd_cog:
        cat = "Cognition"
    elif b in abcd_mh:
        cat = "Mental Health"
    elif b in abcd_pers:
        cat = "Personality"
    new_row = pd.DataFrame({'Dataset': 'ABCD', 'Version': c_vers,
                            'Phenotype':scores_names[b],'Category': cat,
                            'K0':w[0], 'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
    
# load HCP full data
c_vers = 'full'
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir,vers=c_vers)
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])
for b in HCP_rs_log_ind:
    w = w_pa_all[b,-1,:]
    # find score type
    cat = []
    if b in hcp_cog:
        cat = "Cognition"
    elif b in hcp_pers:
        cat = "Personality"
    elif b in hcp_phy:
        cat = "Physical"
    elif b in hcp_emo:
        cat = "Emotion"
    elif b in hcp_wb:
        cat = "Well Being"
    new_row = pd.DataFrame({'Dataset': 'HCP', 'Version': c_vers,
                            'Phenotype':scores_names[b],'Category': cat,
                            'K0':w[0], 'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
    
# load SINGER full data
c_vers = 'full'
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('SINGER', 'predacc', rep_dir,vers=c_vers)
scores_names = SINGER_scores
for b in SINGER_log_ind:
    w = w_pa_all[b,-1,:]
    # find score type
    cat = []
    if b in singer_cog:
        cat = "Cognition"
    elif b in singer_phy:
        cat = "Physical"
    new_row = pd.DataFrame({'Dataset': 'SINGER', 'Version': c_vers,
                            'Phenotype':scores_names[b],'Category': cat,
                            'K0':w[0], 'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
    
# load TCP full data
c_vers = 'full'
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('TCP', 'predacc', rep_dir,vers=c_vers)
scores_names = TCP_scores
for b in TCP_log_ind:
    w = w_pa_all[b,-1,:]
    # find score type
    cat = []
    if b in tcp_phy:
        cat = "Physical"
    elif b in tcp_mh:
        cat = "Mental Health"
    new_row = pd.DataFrame({'Dataset': 'TCP', 'Version': c_vers,
                            'Phenotype':scores_names[b],'Category': cat,
                            'K0':w[0], 'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
    
# load MDD full data
c_vers = 'full'
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('MDD', 'predacc', rep_dir,vers=c_vers)
scores_names = MDD_scores
for b in MDD_log_ind:
    w = w_pa_all[b,-1,:]
    # find score type
    cat = []
    if b in mdd_phy:
        cat = "Physical"
    elif b in mdd_mh:
        cat = "Mental Health"
    new_row = pd.DataFrame({'Dataset': 'MDD', 'Version': c_vers,
                            'Phenotype':scores_names[b],'Category': cat,
                            'K0':w[0], 'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
    
# load ADNI full data
c_vers = 'full'
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ADNI', 'predacc', rep_dir,vers=c_vers)
scores_names = ADNI_scores
for b in ADNI_log_ind:
    w = w_pa_all[b,-1,:]
    # find score type
    cat = []
    if b in adni_cog:
        cat = "Cognition"
    elif b in adni_phy:
        cat = "Physical"
    elif b in adni_pet:
        cat = "PET"
    new_row = pd.DataFrame({'Dataset': 'ADNI', 'Version': c_vers,
                            'Phenotype':scores_names[b],'Category': cat,
                            'K0':w[0], 'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
    
# load ABCD MID data
c_vers = 'full_MID'
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir,vers=c_vers)
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
for b in ABCD_MID_log_ind:
    w = w_pa_all[b,-1,:]
    # find score type
    cat = []
    if b in abcd_cog:
        cat = "Cognition"
    elif b in abcd_mh:
        cat = "Mental Health"
    elif b in abcd_pers:
        cat = "Personality"
    new_row = pd.DataFrame({'Dataset': 'ABCD', 'Version': c_vers,
                            'Phenotype':scores_names[b],'Category': cat,
                            'K0':w[0], 'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
    
# load ABCD NBACK data
c_vers = 'full_NBACK'
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir,vers=c_vers)
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
for b in ABCD_NBACK_log_ind:
    w = w_pa_all[b,-1,:]
    # find score type
    cat = []
    if b in abcd_cog:
        cat = "Cognition"
    elif b in abcd_mh:
        cat = "Mental Health"
    elif b in abcd_pers:
        cat = "Personality"
    new_row = pd.DataFrame({'Dataset': 'ABCD', 'Version': c_vers,
                            'Phenotype':scores_names[b],'Category': cat,
                            'K0':w[0], 'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
    
# load ABCD SST data
c_vers = 'full_SST'
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir,vers=c_vers)
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
for b in ABCD_SST_log_ind:
    w = w_pa_all[b,-1,:]
    # find score type
    cat = []
    if b in abcd_cog:
        cat = "Cognition"
    elif b in abcd_mh:
        cat = "Mental Health"
    elif b in abcd_pers:
        cat = "Personality"
    new_row = pd.DataFrame({'Dataset': 'ABCD', 'Version': c_vers,
                            'Phenotype':scores_names[b],'Category': cat,
                            'K0':w[0], 'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
    
with pd.ExcelWriter(xlsx_path,mode='w') as writer: 
    full_df.to_excel(writer, sheet_name = 'Theoretical Model Params', index=False)
    


In [None]:
# load ABCD reliability data
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'tstats', rep_dir)
full_df = pd.DataFrame()
for b in np.append(ABCD_log_ind, 36):
    w = w_r_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'ABCD univariate BWAS param', index=False)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'Haufe', rep_dir)
full_df = pd.DataFrame()
for b in np.append(ABCD_log_ind, 36):
    w = w_r_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'ABCD multivariate BWAS param', index=False)

# load HCP predacc data
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir)
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])
full_df = pd.DataFrame()
#HCP_behav_ind = [1, 2, 3, 4, 5, 6, 7, 8, 22, 23, 24, 25, 26, 29, 32, 47, 59]
for b in np.append(HCP_log_ind, 59):
    w = w_pa_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'HCP pred acc param', index=False)

# load HCP reliability data
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'tstats', rep_dir)
full_df = pd.DataFrame()
for b in np.append(HCP_log_ind, 59):
    w = w_r_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'HCP univariate BWAS param', index=False)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'Haufe', rep_dir)
full_df = pd.DataFrame()
for b in np.append(HCP_log_ind, 59):
    w = w_r_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'HCP multivariate BWAS param', index=False)

In [None]:
## Save prediction accuracy results
xlsx_path = os.path.join(img_dir,'PredAcc_randomized.xlsx')
create_file = 1

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir, vers='random')
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
for b in np.append(range(0,36),36):
    behav = ABCD_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=1800','N=1600','N=1400','N=1200','N=1000',
                'N=800','N=600','N=400','N=200']
    df.columns = ['T=2min','T=4min','T=6min','T=8min','T=10min',
                       'T=12min','T=14min','T=16min','T=18min','T=20min']
    if create_file:
        mode = 'w'
    else:
        mode = 'a'
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'ABCD score '+str(b+1))
    create_file = 0
    
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir, vers='random')
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])
for b in np.append(range(0,58),59):
    behav = HCP_res['acc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=700','N=600','N=500',
                'N=400','N=300','N=200']
    df.columns = ['T=2min','T=4min','T=6min','T=8min','T=10min',
                       'T=12min','T=14min','T=16min','T=18min','T=20min',
                       'T=22min','T=24min','T=26min','T=28min','T=30min',
                       'T=32min','T=34min','T=36min','T=38min','T=40min',
                       'T=42min','T=44min','T=46min','T=48min','T=50min',
                       'T=52min','T=54min','T=56min','T=58min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        if b == 59:
            df.to_excel(writer, sheet_name = 'HCP score '+str(59))
        else:
            df.to_excel(writer, sheet_name = 'HCP score '+str(b+1))

In [None]:
## Save prediction accuracy results
xlsx_path = os.path.join(img_dir,'univariate_BWAS.xlsx')
create_file = 1

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'tstats', rep_dir)
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
for b in np.append(range(0,36),36):
    behav = ABCD_res['tstats_icc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=1200','N=1000','N=800',
                'N=600','N=400','N=200']
    df.columns = ['T=2min','T=4min','T=6min','T=8min','T=10min',
                       'T=12min','T=14min','T=16min','T=18min','T=20min']
    if create_file:
        mode = 'w'
    else:
        mode = 'a'
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'ABCD score '+str(b+1))
    create_file = 0
    
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'tstats', rep_dir)
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])
for b in np.append(range(0,58),59):
    behav = HCP_res['tstats_icc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=400','N=350','N=300',
                'N=250','N=200','N=150']
    df.columns = ['T=2min','T=4min','T=6min','T=8min','T=10min',
                       'T=12min','T=14min','T=16min','T=18min','T=20min',
                       'T=22min','T=24min','T=26min','T=28min','T=30min',
                       'T=32min','T=34min','T=36min','T=38min','T=40min',
                       'T=42min','T=44min','T=46min','T=48min','T=50min',
                       'T=52min','T=54min','T=56min','T=58min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        if b == 59:
            df.to_excel(writer, sheet_name = 'HCP score '+str(59))
        else:
            df.to_excel(writer, sheet_name = 'HCP score '+str(b+1))

In [None]:
## Save prediction accuracy results
xlsx_path = os.path.join(img_dir,'multivariate_BWAS.xlsx')
create_file = 1

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'Haufe', rep_dir)
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
for b in np.append(range(0,36),36):
    behav = ABCD_res['fi_icc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=1200','N=1000','N=800',
                'N=600','N=400','N=200']
    df.columns = ['T=2min','T=4min','T=6min','T=8min','T=10min',
                       'T=12min','T=14min','T=16min','T=18min','T=20min']
    if create_file:
        mode = 'w'
    else:
        mode = 'a'
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        df.to_excel(writer, sheet_name = 'ABCD score '+str(b+1))
    create_file = 0
    
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'Haufe', rep_dir)
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])
for b in np.append(range(0,58),59):
    behav = HCP_res['fi_icc_landscape'][:,:,b].T
    df = pd.DataFrame(behav)
    df.index = ['N=400','N=350','N=300',
                'N=250','N=200','N=150']
    df.columns = ['T=2min','T=4min','T=6min','T=8min','T=10min',
                       'T=12min','T=14min','T=16min','T=18min','T=20min',
                       'T=22min','T=24min','T=26min','T=28min','T=30min',
                       'T=32min','T=34min','T=36min','T=38min','T=40min',
                       'T=42min','T=44min','T=46min','T=48min','T=50min',
                       'T=52min','T=54min','T=56min','T=58min']
    with pd.ExcelWriter(xlsx_path,mode=mode) as writer: 
        if b == 59:
            df.to_excel(writer, sheet_name = 'HCP score '+str(59))
        else:
            df.to_excel(writer, sheet_name = 'HCP score '+str(b+1))

In [None]:
## Save prediction accuracy results
xlsx_path = os.path.join(img_dir,'Theoretical_Calculator_randomized.xlsx')

# load ABCD predacc data
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='random')
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])
full_df = pd.DataFrame()
ABCD_behav_ind = [8, 10, 11, 13, 14, 15, 16, 17, 29, 30, 31, 32, 33, 36]
for b in np.append(ABCD_log_ind, 36):
    w = w_pa_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='w') as writer: 
    full_df.to_excel(writer, sheet_name = 'ABCD pred acc param', index=False)
    
# load ABCD reliability data
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'tstats', rep_dir, vers='random')
full_df = pd.DataFrame()
for b in np.append(ABCD_log_ind, 36):
    w = w_r_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'ABCD univariate BWAS param', index=False)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'Haufe', rep_dir, vers='random')
full_df = pd.DataFrame()
for b in np.append(ABCD_log_ind, 36):
    w = w_r_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'ABCD multivariate BWAS param', index=False)

# load HCP predacc data
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir, vers='random')
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])
full_df = pd.DataFrame()
#HCP_behav_ind = [1, 2, 3, 4, 5, 6, 7, 8, 22, 23, 24, 25, 26, 29, 32, 47, 59]
for b in np.append(HCP_log_ind, 59):
    w = w_pa_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'HCP pred acc param', index=False)

# load HCP reliability data
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'tstats', rep_dir, vers='random')
full_df = pd.DataFrame()
for b in np.append(HCP_log_ind, 59):
    w = w_r_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'HCP univariate BWAS param', index=False)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'Haufe', rep_dir)
full_df = pd.DataFrame()
for b in np.append(HCP_log_ind, 59):
    w = w_r_all[b,-1,:]
    new_row = pd.DataFrame({'Phenotype':scores_names[b], 'K0':w[0], 
                            'K1':w[1], 'K2':w[2]}, index=[0])
    full_df = pd.concat([full_df, new_row]).reset_index(drop=True)
with pd.ExcelWriter(xlsx_path,mode='a') as writer: 
    full_df.to_excel(writer, sheet_name = 'HCP multivariate BWAS param', index=False)

# Fig S1-S5: Contour Plots (HCP & ABCD different preprocessing)

In [None]:
fig,axs = plt.subplots(1,2,figsize=(8.5,4.5))
fig.tight_layout(pad=7)

### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir,
                                                             vers='full', reg='KRR', metric='COD')
# Cognition
con_lines = [0.05, 0.1, 0.2]
manual_locations = [(1.5,0.3),(6.5,1.5), (11,2.5)]
behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, HCP_extent, 
                      fig, axs[1], Ax_Ttl='HCP')

### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir,
                                                             vers='full', reg='KRR', metric='COD')
# Cognition
con_lines = [0.05, 0.15, 0.2]
manual_locations = [(1,0.3),(4,1.5),(8.5,3)]
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, ABCD_extent, 
                      fig, axs[0], Ax_Ttl='ABCD')

fig.savefig(os.path.join(img_dir, 'FigS1_' +
                    'KRR_full_acc_COD_contour.svg'), bbox_inches='tight')

In [None]:
fig,axs = plt.subplots(1,2,figsize=(8.5,4.5))
fig.tight_layout(pad=7)

### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir,
                                                             vers='uncensored_only', reg='KRR', metric='corr')
# Cognition
con_lines = [0.3, 0.4, 0.5]
manual_locations = [(1.5,0.3),(6.5,1.5),(20,5)]
behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, HCP_extent, 
                      fig, axs[1], Ax_Ttl='HCP')

### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir,
                                                             vers='uncensored_only', reg='KRR', metric='corr')
# Cognition
con_lines = [0.3, 0.4, 0.45]
manual_locations = [(1,0.3),(4,1.5),(5,3)]
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, ABCD_extent, 
                      fig, axs[0], Ax_Ttl='ABCD')

fig.savefig(os.path.join(img_dir, 'FigS2_' +
                    'KRR_uncensored_only_acc_corr_contour.svg'), bbox_inches='tight')

In [None]:
fig,axs = plt.subplots(1,2,figsize=(8.5,4.5))
fig.tight_layout(pad=7)

### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir,
                                                             vers='no_censoring', reg='KRR', metric='corr')
# Cognition
con_lines = [0.3, 0.4, 0.5]
manual_locations = [(1.5,0.3),(6.5,1.5),(12.5,3)]
behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, HCP_extent, 
                      fig, axs[1], Ax_Ttl='HCP')

### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir,
                                                             vers='no_censoring', reg='KRR', metric='corr')
# Cognition
con_lines = [0.3, 0.4, 0.5]
manual_locations = [(1,0.3),(4,1.5),(8.5,8)]
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, ABCD_extent, 
                      fig, axs[0], Ax_Ttl='ABCD')

fig.savefig(os.path.join(img_dir, 'FigS3_' +
                    'KRR_no_censoring_acc_corr_contour.svg'), bbox_inches='tight')

In [None]:
fig,axs = plt.subplots(1,2,figsize=(8.5,4.5))
fig.tight_layout(pad=7)

### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir,
                                                             vers='full', reg='LRR', metric='corr')
# Cognition
con_lines = [0.3, 0.4, 0.5]
manual_locations = [(1.5,0.3),(6.5,1.5),(18,5)]
behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, HCP_extent, 
                      fig, axs[1], Ax_Ttl='HCP')

### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir,
                                                             vers='full', reg='LRR', metric='corr')
# Cognition
con_lines = [0.3, 0.4]
manual_locations = [(1,0.3),(4,1.5)]
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, ABCD_extent, 
                      fig, axs[0], Ax_Ttl='ABCD')

fig.savefig(os.path.join(img_dir, 'FigS4_' +
                    'LRR_full_acc_corr_contour.svg'), bbox_inches='tight')

In [None]:
fig,axs = plt.subplots(1,2,figsize=(8.5,4.5))
fig.tight_layout(pad=7)

### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir,
                                                             vers='full', reg='LRR', metric='COD')
# Cognition
con_lines = [0.05, 0.2]
manual_locations = [(1.5,0.3),(18,5)]
behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, HCP_extent, 
                      fig, axs[1], Ax_Ttl='HCP')

### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir,
                                                             vers='full', reg='LRR', metric='COD')
# Cognition
con_lines = [0.05, 0.15]
manual_locations = [(1,0.3),(8,4)]
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, ABCD_extent, 
                      fig, axs[0], Ax_Ttl='ABCD')
fig.savefig(os.path.join(img_dir, 'FigS5_' +
                    'LRR_full_acc_COD_contour.svg'), bbox_inches='tight')

# Fig S6: Correlation between common points in ABCD and HCP contour plots (pred acc)

In [None]:
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir)
behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
hcp_behav = behav[[0,2,4],:10].ravel()


# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'predacc', rep_dir)
behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
abcd_behav = behav[:3,:].ravel()

fig,ax = plt.subplots(figsize=(6, 6))
plt.scatter(abcd_behav,hcp_behav)
res = scipy.stats.linregress(abcd_behav,hcp_behav)
xy_line = np.linspace(0.2,0.5,100)
plt.plot(xy_line , res.intercept + res.slope*xy_line , 'k', linestyle='--')
orsp.format_scatter_plot('ABCD Prediction Accruacy (r)',
                    'HCP Prediction Accruacy (r)', ax)
corr_val = np.corrcoef(abcd_behav,hcp_behav)
ax.text(0.8,0.1,'r = ' + str(np.round(corr_val[0][1],2)), transform=ax.transAxes, size=12)
np.corrcoef(abcd_behav,hcp_behav)


fig.savefig(os.path.join(img_dir, 'FigS6_ABCD_HCP_KRR_commonpts_predacc_correlation.svg'), bbox_inches='tight')

# Fig S7: Scatter plots for ABCD behaviors

In [None]:
### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir)

# initialize cognition scores
behav_ind = ABCD_rs_log_ind
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for n in behav_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,n].T),1)
    for limit in [10]:
        orsp.plot_scatter(9,behav,scan_duration,ABCD_subcolors,limit,axs[plot_y][plot_x])
        axs[plot_y][plot_x].legend(ABCD_lgd, Y, markerscale=2, loc='lower right', \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
        orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                            'Prediction accuracy (r)', axs[plot_y][plot_x])
        #axs[plot_y][plot_x].set_xlim(0,40)
        axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
        axs[plot_y][plot_x].set_title(scores_names[n],fontname='Arial')
        
        # move to next plot
        if plot_x == 0:
            plot_x += 1
        else:
            plot_x = 0
            if plot_y != 2:
                plot_y += 1
            else: 
                plot_y = 0
        # move to next figure
        if (behav_count % 6) == 0:
            fig.savefig(os.path.join(img_dir, 'FigS7.' + str(plot_num) +
                    '_ABCD_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
            fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
            fig.tight_layout(h_pad=5, w_pad=7)
            plot_num += 1
        # continue behavior count
        # turn off remaining subplots if last behavior
        if behav_count == len(behav_ind):
            while plot_y != 3:
                axs[plot_y][plot_x].axis('off')
                if plot_x == 0:
                    plot_x += 1
                else:
                    plot_x = 0
                    if plot_y != 3:
                        plot_y += 1
            fig.savefig(os.path.join(img_dir, 'FigS7.' + str(plot_num) +
                    '_ABCD_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
        else:
            behav_count += 1


# Fig S8: Scatter plots for HCP behaviors

In [None]:
### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)

# initialize cognition scores
behav_ind = HCP_rs_log_ind
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for n in behav_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    for limit in [29,10]:
        orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,limit,axs[plot_y][plot_x])
        axs[plot_y][plot_x].legend(HCP_lgd, Y, markerscale=2, loc='lower right', \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
        orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                            'Prediction accuracy (r)', axs[plot_y][plot_x])
        axs[plot_y][plot_x].set_xlim(0,40000)
        axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
        axs[plot_y][plot_x].set_title(scores_names[n] + ' (' + str(limit*2) + 'mins)', fontname='Arial')
        
        # move to next plot
        if plot_x == 0:
            plot_x += 1
        else:
            plot_x = 0
            if plot_y != 2:
                plot_y += 1
            else: 
                plot_y = 0
        # move to next figure
        if (behav_count % 3) == 0 and limit == 10:
            fig.savefig(os.path.join(img_dir, 'FigS8.' + str(plot_num) +
                    '_HCP_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
            fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
            fig.tight_layout(h_pad=5, w_pad=7)
            plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS8.' + str(plot_num) +
                '_HCP_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1


# Fig S9: Log Plot (Cog Factors / Different regressions)

In [None]:
### plot for both HCP and ABCD
fig,ax = plt.subplots(figsize=(8.5, 4.5))

### settings
output_vers = 'output'
vers = 'full'
regs = ['LRR', 'KRR']
metrics = ['COD', 'corr']

hcp_cp = sns.color_palette("blend:paleturquoise,teal",n_colors=4)
abcd_cp = sns.color_palette("blend:mistyrose,crimson",n_colors=4)
reg_lgd = [plt.Line2D([], [], marker='.', color=hcp_cp[0], linestyle='None'),
          plt.Line2D([], [], marker='.', color=hcp_cp[1], linestyle='None'),
          plt.Line2D([], [], marker='.', color=hcp_cp[2], linestyle='None'),
          plt.Line2D([], [], marker='.', color=hcp_cp[3], linestyle='None'),
          plt.Line2D([], [], marker='.', color=abcd_cp[0], linestyle='None'),
          plt.Line2D([], [], marker='.', color=abcd_cp[1], linestyle='None'),
          plt.Line2D([], [], marker='.', color=abcd_cp[2], linestyle='None'),
          plt.Line2D([], [], marker='.', color=abcd_cp[3], linestyle='None')]


n_c = 0
for reg in regs:
    for metric in metrics:
        # read output files
        HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data(
            'HCP','predacc', rep_dir,reg=reg,metric=metric)

        # HCP scan parameters
        cog = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
        limit = 15
        cog = cog[:,:limit]
        curr_scan = scan_duration[:,:limit]

        # fit curve
        z,k = orsp.lst_sq_log(curr_scan.flatten(), cog.flatten())
        norm_acc = (cog - k) / z 
        sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=hcp_cp[n_c])
        
        #################################################

        # read output files
        ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data(
            'ABCD', 'predacc', rep_dir,reg=reg,metric=metric)

        # ABCD scan parameters
        cog = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
        curr_scan = scan_duration
        # fit curve
        z,k = orsp.lst_sq_log(curr_scan.flatten(), cog.flatten())
        norm_acc = (cog - k) / z 
        sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=abcd_cp[n_c])
        n_c += 1

labels = ['HCP: LRR (COD)', 'HCP: LRR (r)', 'HCP: KRR (COD)', 'HCP: KRR (r)', 
         'ABCD: LRR (COD)', 'ABCD: LRR (r)', 'ABCD: KRR (COD)', 'ABCD: KRR (r)' ]
lgd = plt.legend(reg_lgd, labels, markerscale=2, loc='lower right', 
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
# plot fitted curve
orsp.plot_curve(200, 37000)

# set custom limits
ax.set_ylim([7.5, 16.5])
orsp.format_scatter_plot('Total scan duration (# training participants x scan time per participant)',
                    'Norm. prediction performance', ax)
fig.savefig(os.path.join(img_dir, 'FigS9_ScanTime_AllReg_AllMetric.svg'), bbox_inches='tight')

In [None]:
### plot for both HCP and ABCD
fig,ax = plt.subplots(figsize=(8.5, 4.5))
### settings
output_vers = 'output'
vers = 'full'
regs = ['LRR', 'KRR']
metrics = ['COD', 'corr']

hcp_cp = sns.color_palette("blend:paleturquoise,teal",n_colors=4)
abcd_cp = sns.color_palette("blend:mistyrose,crimson",n_colors=4)
reg_lgd = [plt.Line2D([], [], marker='.', color=hcp_cp[0], linestyle='None'),
          plt.Line2D([], [], marker='.', color=hcp_cp[1], linestyle='None'),
          plt.Line2D([], [], marker='.', color=hcp_cp[2], linestyle='None'),
          plt.Line2D([], [], marker='.', color=hcp_cp[3], linestyle='None'),
          plt.Line2D([], [], marker='.', color=abcd_cp[0], linestyle='None'),
          plt.Line2D([], [], marker='.', color=abcd_cp[1], linestyle='None'),
          plt.Line2D([], [], marker='.', color=abcd_cp[2], linestyle='None'),
          plt.Line2D([], [], marker='.', color=abcd_cp[3], linestyle='None')]


n_c = 0
for reg in regs:
    for metric in metrics:
        # read output files
        HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data(
            'HCP','predacc', rep_dir,reg=reg,metric=metric)

        # HCP scan parameters
        cog = np.flip(np.flip(HCP_res['acc_landscape'][:,:,59].T),1)
        limit = 15
        cog = cog[:,:limit]
        curr_scan = scan_duration[:,:limit]

        # fit curve
        z,k = orsp.lst_sq_log(curr_scan.flatten(), cog.flatten())
        norm_acc = (cog - k) / z 
        sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=hcp_cp[n_c])
        
        #################################################

        # read output files
        ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data(
            'ABCD', 'predacc', rep_dir,reg=reg,metric=metric)

        # ABCD scan parameters
        cog = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,36].T),1)
        curr_scan = scan_duration
        # fit curve
        z,k = orsp.lst_sq_log(curr_scan.flatten(), cog.flatten())
        norm_acc = (cog - k) / z 
        sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=abcd_cp[n_c])
        n_c += 1

labels = ['HCP: LRR (COD)', 'HCP: LRR (r)', 'HCP: KRR (COD)', 'HCP: KRR (r)', 
         'ABCD: LRR (COD)', 'ABCD: LRR (r)', 'ABCD: KRR (COD)', 'ABCD: KRR (r)' ]
lgd = plt.legend(reg_lgd, labels, markerscale=2, loc='lower right', 
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)

# plot log curve
X_fit = np.linspace(350, 38000, num=100, dtype=int)
curve_val = np.log(X_fit) / np.log(2)
plt.plot(np.log(X_fit)/ np.log(2), curve_val, color='k')

# set custom limits
ax.set_ylim([6.5, 16.5])
orsp.format_scatter_plot('log\N{SUBSCRIPT TWO}(Total Scan Duration)',
                    'Norm. prediction performance', ax)
fig.savefig(os.path.join(img_dir, 'FigS9_ScanTime_AllReg_AllMetric_Log.svg'), bbox_inches='tight')

# Fig S10: Log plot (All scores, 30m)

In [None]:
#################################################
# plot individual scores
#################################################
fig,ax = plt.subplots(figsize=(8.5, 3.5))
all_scores = []
legend_handle = []
lgd_handles = []
HCP_cog_ind = [1,2,3,4,5,6,8,10,25,26,29,59]
HCP_emo_ind = [23]
HCP_pers_ind = [7,31,32,34]
HCP_phy_ind = [14]
HCP_wb_ind = [47]
ABCD_cog_ind = [8,10,11,13,14,15,16,17,30,31,32,33,36]
ABCD_mh_ind = [5,29,6,3]
    
### cognition
## ABCD
limit = 10
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
for n in ABCD_cog_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(),color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_cog_ind[:-1]], ['ABCD Cog. Factor']))

## HCP
limit = 15
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
for n in HCP_cog_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_cog_ind[:-1]], ['HCP Cog. Factor']))

### mental health
limit = 10
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:lightgrey,darkgrey", n_colors=4)
for n in ABCD_mh_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(),color=custom_colors[n_c], zorder=-1)
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_mh_ind]))

### personality
## HCP
limit = 15
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:powderblue,darkslateblue", n_colors=4)
for n in HCP_pers_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_pers_ind]))


### physical
limit = 15
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:gold,gold", n_colors=2)
for n in HCP_phy_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_phy_ind]))

### emotion
## HCP
limit = 15
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:forestgreen,forestgreen", n_colors=2)
for n in HCP_emo_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_emo_ind]))

### well being
limit = 10
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:magenta,magenta", n_colors=2)
for n in HCP_wb_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_wb_ind]))

# plot fitted curve
orsp.plot_curve(200, 38000)

# figure parameters
ax.set_ylim([7, 16])
orsp.format_scatter_plot('Total scan duration (# training participants x scan time per participant)',
                    'Norm. prediction performance', ax)
fig.savefig(os.path.join(img_dir, 'FigS10_ScanTime_AllBehavCurves_30m.svg'), bbox_inches='tight')

In [None]:
#################################################
# plot individual scores
#################################################
fig,ax = plt.subplots(figsize=(8.5, 2.5))
all_scores = []
legend_handle = []
lgd_handles = []
HCP_cog_ind = [1,2,3,4,5,6,8,10,25,26,29,59]
HCP_emo_ind = [23]
HCP_pers_ind = [7,31,32,34]
HCP_phy_ind = [14]
HCP_wb_ind = [47]
ABCD_cog_ind = [8,10,11,13,14,15,16,17,30,31,32,33,36]
ABCD_mh_ind = [5,29,6,3]
    
### cognition
## ABCD
limit = 10
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
for n in ABCD_cog_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(),color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_cog_ind[:-1]], ['Cog Factor (A)']))

## HCP
limit = 15
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
for n in HCP_cog_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_cog_ind[:-1]], ['Cog Factor (H)']))

### mental health
limit = 10
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:lightgrey,darkgrey", n_colors=4)
for n in ABCD_mh_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(),color=custom_colors[n_c], zorder=-1)
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_mh_ind]))

### personality
## HCP
limit = 15
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:powderblue,darkslateblue", n_colors=4)
for n in HCP_pers_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_pers_ind]))


### physical
limit = 15
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:gold,gold", n_colors=2)
for n in HCP_phy_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_phy_ind]))

### emotion
## HCP
limit = 15
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:forestgreen,forestgreen", n_colors=2)
for n in HCP_emo_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_emo_ind]))

### well being
limit = 15
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:magenta,magenta", n_colors=2)
for n in HCP_wb_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_wb_ind]))

## Add legend
# Add the custom legend handle to the legend
lgd = plt.legend(all_scores, handletextpad=0.01, bbox_to_anchor=[1.06, -0.27],
           fontsize=9, ncol=6, columnspacing=0.5, frameon=False)

# plot log curve
X_fit = np.linspace(350, 38000, num=100, dtype=int)
curve_val = np.log(X_fit) / np.log(2)
plt.plot(np.log(X_fit)/ np.log(2), curve_val, color='k')

# figure parameters
ax.set_ylim([6, 16])
ax.set_xlim([8, 15.5])
orsp.format_scatter_plot('log\N{SUBSCRIPT TWO}(Total Scan Duration)',
                    'Norm. prediction performance', ax)

fig.savefig(os.path.join(img_dir, 'FigS10_ScanTime_Log_AllBehavCurves_30m.svg'), bbox_inches='tight')

# Fig S11: ABCD/HCP - N and T are not equivalent (all phenotypes)

In [None]:
#################################################
# Violin plot comparing same total scan time
#################################################
# ABCD: 4000 total scan time but different N and T
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir)
mat = os.path.join(ABCD_img_dir,'acc_KRR_indiv_corr_landscape.mat')
res = scipy.io.loadmat(mat)

# extract points with 4000m of total scan time
full_df = pd.DataFrame()
fourm_vals = []
tenm_vals = []
twenm_vals = []
for b in np.append(ABCD_log_ind,36):
    behav = np.mean(res['acc_landscape'][:,:,:,b],2)
    dfour = pd.DataFrame(data={'acc': [behav[4,1]]})
    dfour['Time'] = '4'
    dfour['Subs'] = '1000subs'
    fourm_vals.append(behav[4,1])
    dten = pd.DataFrame(data={'acc': [behav[1,4]]})
    dten['Time'] = '10'
    dten['Subs'] = '400subs'
    tenm_vals.append(behav[1,4])
    dtwen = pd.DataFrame(data={'acc': [behav[0,8]]})
    dtwen['Time'] = '20'
    dtwen['Subs'] = '200subs'
    twenm_vals.append(behav[0,8])
    full_df = pd.concat([full_df, dfour, dten, dtwen])

# plot violin plot
fig,ax = plt.subplots(figsize=(4, 3))
vp = sns.violinplot(data=full_df, x="Time", y="acc", palette="Reds_r",orient='v')
orsp.format_scatter_plot('', 'Prediction accuracy (r)', ax)
ax.set_title('ABCD')
vp.set(xticklabels=[])
ax.set_ylim([-0.15,0.8])
fig.savefig(os.path.join(img_dir,'FigS11_ABCD_not1to1_violin.svg'), bbox_inches='tight')

# mean accuracy
print('4m mean:', np.mean(fourm_vals))
print('10m mean:', np.mean(tenm_vals))
print('20m mean:', np.mean(twenm_vals))

# stats
print('4m vs 10m:', scipy.stats.ttest_rel(fourm_vals, tenm_vals))
print('10m vs 20m:', scipy.stats.ttest_rel(tenm_vals, twenm_vals))

print('4m vs 10m:', orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, tenm_vals)], 3/7, 0))
print('10m vs 20m:', orsp.corrected_resample_ttest([a - b for a, b in zip(tenm_vals, twenm_vals)], 3/7, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, tenm_vals)], 3/7, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(tenm_vals, twenm_vals)], 3/7, 0))

In [None]:
#################################################
# Violin plot comparing same total scan time
#################################################
# HCP: 6000 total scan time but different N and T
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
mat = os.path.join(HCP_img_dir,'acc_KRR_avg_indiv_corr_landscape.mat')
res = scipy.io.loadmat(mat)

# extract points with 6000m of total scan time
full_df = pd.DataFrame()
tenm_vals = []
twenm_vals = []
thirm_vals = []
fourm_vals = []
fiftm_vals = []
sixtm_vals = []
for b in np.append(HCP_log_ind,59):
    behav = np.mean(res['acc_landscape'][:,:,:,b],2)
    dten = pd.DataFrame(data={'acc': [behav[4,1]]})
    dten['Time'] = '10'
    dten['Subs'] = '600subs'
    tenm_vals.append(behav[4,1])
    dtwen = pd.DataFrame(data={'acc': [behav[9,4]]})
    dtwen['Time'] = '20'
    dtwen['Subs'] = '300subs'
    twenm_vals.append(behav[9,4])
    dthir = pd.DataFrame(data={'acc': [behav[14,5]]})
    dthir['Time'] = '30'
    dthir['Subs'] = '200subs'
    thirm_vals.append(behav[14,5])
    dfour = pd.DataFrame(data={'acc': [behav[19,6]]})
    dfour['Time'] = '40'
    dfour['Subs'] = '150subs'
    fourm_vals.append(behav[19,6])
    dfift = pd.DataFrame(data={'acc': [behav[24,7]]})
    dfift['Time'] = '50'
    dfift['Subs'] = '120subs'
    fiftm_vals.append(behav[24,7])
    dsixt = pd.DataFrame(data={'acc': [behav[28,8]]})
    dsixt['Time'] = '~60'
    dsixt['Subs'] = '100subs'
    sixtm_vals.append(behav[28,8])
    full_df = pd.concat([full_df, dten, dtwen, dthir, dfour, dfift, dsixt])

# plot violin plot
fig,ax = plt.subplots(figsize=(4, 3))
vp=sns.violinplot(data=full_df, x="Time", y="acc", palette="Blues_r",orient='v')
orsp.format_scatter_plot('', 'Prediction accuracy (r)', ax)
ax.set_title('HCP')
vp.set(xticklabels=[])
ax.set_ylim([-0.15,0.7])
fig.savefig(os.path.join(img_dir,'FigS11_HCP_not1to1_violin.svg'), bbox_inches='tight')

# mean accuracy
print('10m mean:', np.mean(tenm_vals))
print('20m mean:', np.mean(twenm_vals))
print('30m mean:', np.mean(thirm_vals))
print('40m mean:', np.mean(fourm_vals))
print('50m mean:', np.mean(fiftm_vals))
print('60m mean:', np.mean(sixtm_vals))

# stats) 
print('10m vs 20m:', orsp.corrected_resample_ttest([a - b for a, b in zip(tenm_vals, twenm_vals)], 1/9, 0))
print('20m vs 30m:', orsp.corrected_resample_ttest([a - b for a, b in zip(twenm_vals, thirm_vals)], 1/9, 0))
print('30m vs 40m:', orsp.corrected_resample_ttest([a - b for a, b in zip(thirm_vals, fourm_vals)], 1/9, 0))
print('40m vs 50m:', orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, fiftm_vals)], 1/9, 0))
print('50m vs 58m:', orsp.corrected_resample_ttest([a - b for a, b in zip(fiftm_vals, sixtm_vals)], 1/9, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(tenm_vals, twenm_vals)], 1/9, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(twenm_vals, thirm_vals)], 1/9, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(thirm_vals, fourm_vals)], 1/9, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, fiftm_vals)], 1/9, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(fiftm_vals, sixtm_vals)], 1/9, 0))

# Fig S12: Theoretical fit for ABCD scores

In [None]:
### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir)

# initialize cognition scores
behav_ind = ABCD_rs_log_ind
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for b in behav_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(9,behav,scan_duration,ABCD_subcolors,10,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(ABCD_lgd, Y, markerscale=2, loc='lower right', \
             ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                        'Prediction accuracy (r)', axs[plot_y][plot_x])
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    X_fit = np.linspace(2, 20, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=ABCD_theor_subcolors[sub_lvl])
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS12.' + str(plot_num) +
                '_ABCD_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS12.' + str(plot_num) +
                '_ABCD_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1


# Fig S13: Theoretical fit for HCP scores

In [None]:
### settings for ABCD
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir,)

# initialize cognition scores
behav_ind = HCP_rs_log_ind
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for b in behav_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,29,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(HCP_lgd, Y, markerscale=2, loc='lower right', \
             ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                        'Prediction accuracy (r)', axs[plot_y][plot_x])
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    X_fit = np.linspace(2, 58, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=HCP_theor_subcolors[sub_lvl])
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS13.' + str(plot_num) +
                '_HCP_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS13.' + str(plot_num) +
                '_HCP_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1


# Fig S14: HCP log model vs theoretical model

In [None]:
### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'predacc', rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir)


# initialize cognition scores
behav_ind = [59,8,47]
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1

plot_y = 0
for b in behav_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,b].T),1)
    w = w_pa_all[b,-1,:]
    zk = zk_all[b,-1,:]
    
    # scatter plot with log equation fit
    plot_x = 0
    curr_ax = axs[plot_y][plot_x]
    orsp.plot_scatter(len(Y),behav,scan_duration,HCP_subcolors,len(X),curr_ax)
    # 58 min log model fit to full equation
    X_fit = np.linspace(2, 60, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        log_val = zk[0] * np.log(Y[sub_lvl]*X_fit)/np.log(2) + zk[1]
        curr_ax.plot(Y[sub_lvl]*X_fit, log_val, color='k')
    curr_ax.legend(HCP_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,loc='lower right',
                     handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                        'Prediction accuracy (r)', curr_ax)

    curr_ax.set_xlim(0,40000)
    curr_ax.set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    curr_ax.set_title(scores_names[b] + ' (Log)', fontname='Arial')
    
    # scatter plot with theoretical equation fit
    plot_x = 1
    curr_ax = axs[plot_y][plot_x]
    orsp.plot_scatter(len(Y),behav,scan_duration,HCP_subcolors,len(X),curr_ax)
    # Tom's equation fit to full duration
    X_fit = np.linspace(2, 60, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        theor_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        curr_ax.plot(Y[sub_lvl]*X_fit, theor_val, color=HCP_theor_subcolors[sub_lvl])
    curr_ax.legend(HCP_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,loc='lower right',
                     handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                    'Prediction accuracy (r)', curr_ax)
    curr_ax.set_xlim(0,40000)
    curr_ax.set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    curr_ax.set_title(scores_names[b] + ' (Theoretical)', fontname='Arial')
        
    # move to next plot
    plot_y += 1

fig.savefig(os.path.join(img_dir, 'FigS14.HCP_LogVSTheor.svg'), bbox_inches='tight')

# Fig S15: ABCD scores after randomization

In [None]:
### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir, vers='random')
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir, vers='random')

# initialize cognition scores
behav_ind = ABCD_rs_log_ind
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for b in behav_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(9,behav,scan_duration,ABCD_subcolors,10,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(ABCD_lgd, Y, markerscale=2, loc='lower right', \
             ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                        'Prediction accuracy (r)', axs[plot_y][plot_x])
    
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    X_fit = np.linspace(2, 20, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=ABCD_subcolors[sub_lvl])
        
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS15.' + str(plot_num) +
                '_ABCD_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS15.' + str(plot_num) +
                '_ABCD_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1

# Fig S16: HCP scores after randomization

In [None]:
### settings for ABCD
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir, vers='random')
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','predacc',rep_dir,vers='random')

# initialize cognition scores
behav_ind = HCP_rs_log_ind
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for b in behav_ind:
    behav = np.flip(np.flip(HCP_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,29,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(HCP_lgd, Y, markerscale=2, loc='lower right', \
             ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                        'Prediction accuracy (r)', axs[plot_y][plot_x])
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    X_fit = np.linspace(2, 58, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=HCP_subcolors[sub_lvl])
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS16.' + str(plot_num) +
                '_HCP_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS16.' + str(plot_num) +
                '_HCP_KRR_full_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1


# Fig S17: SINGER scores

In [None]:
# load SINGER data
SINGER_img_dir,SINGER_res,X,Y,SINGER_extent,scan_duration = orsp.load_data('SINGER','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('SINGER','predacc',rep_dir)

# count how many scores r > 0.1
behav_ind = range(0,19)
abv01 = 0
avg_acc = []
for b in behav_ind:
    behav = np.flip(np.flip(SINGER_res['acc_landscape'][:,:,b].T),1)
    if behav[-1,-1] > 0.1:
        avg_acc.append(behav[-1,-1])
        abv01 += 1
print("r > 0.1 = ", str(abv01))
print(np.mean(avg_acc))

# initialize scores
behav_ind = SINGER_log_ind
scores_names = SINGER_scores

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
meanCOD = []
for b in behav_ind:
    behav = np.flip(np.flip(SINGER_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(6,behav,scan_duration,SINGER_subcolors,9,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(SINGER_lgd, Y, markerscale=2, loc='lower right', \
            ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)', \
            'Prediction accuracy (r)', axs[plot_y][plot_x])
    

    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    meanCOD.append(loss_pa_all[b,-1])
    print(behav[-1,-1] > 0.1, '-', loss_pa_all[b,-1])
    X_fit = np.linspace(2, 10, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=SINGER_theor_subcolors[sub_lvl])
    # additional settings
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS17.' + str(plot_num) +
                '_SINGER_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS17.' + str(plot_num) +
                '_SINGER_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1
print("Mean COD = ", str(np.mean(meanCOD)))

# Fig S18: TCP scores

In [None]:
# load TCP data
TCP_img_dir,TCP_res,X,Y,TCP_extent,scan_duration = orsp.load_data('TCP','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('TCP','predacc',rep_dir)

# count how many scores r > 0.1
behav_ind = range(0,19)
abv01 = 0
avg_acc = []
for b in behav_ind:
    behav = np.flip(np.flip(TCP_res['acc_landscape'][:,:,b].T),1)
    if behav[-1,-1] > 0.1:
        avg_acc.append(behav[-1,-1])
        abv01 += 1
print("r > 0.1 = ", str(abv01))
print(np.mean(avg_acc))

# initialize scores
behav_ind = TCP_log_ind
scores_names = TCP_scores

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
meanCOD = []
for b in behav_ind:
    behav = np.flip(np.flip(TCP_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(6,behav,scan_duration,TCP_subcolors,13,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(TCP_lgd, Y, markerscale=2, loc='lower right', \
            ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)', \
            'Prediction accuracy (r)', axs[plot_y][plot_x])
    

    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    meanCOD.append(loss_pa_all[b,-1])
    print(behav[-1,-1] > 0.1, '-', loss_pa_all[b,-1])
    X_fit = np.linspace(2, 26, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=TCP_theor_subcolors[sub_lvl])

    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS18.' + str(plot_num) +
                '_TCP_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS18.' + str(plot_num) +
                '_TCP_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1
print("Mean COD = ", str(np.mean(meanCOD)))

# Fig S19: MDD scores

In [None]:
# load MDD data
MDD_img_dir,MDD_res,X,Y,MDD_extent,scan_duration = orsp.load_data('MDD','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('MDD','predacc',rep_dir)


# count how many scores r > 0.1
behav_ind = range(0,20)
abv01 = 0
avg_acc = []
for b in behav_ind:
    behav = np.flip(np.flip(MDD_res['acc_landscape'][:,:,b].T),1)
    if behav[-1,-1] > 0.1:
        avg_acc.append(behav[-1,-1])
        abv01 += 1
print("r > 0.1 = ", str(abv01))
print(np.mean(avg_acc))

# initialize scores
behav_ind = MDD_log_ind
scores_names = MDD_scores

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
meanCOD = []
for b in behav_ind:
    behav = np.flip(np.flip(MDD_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(10,behav,scan_duration,MDD_subcolors,11,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(MDD_lgd, Y, markerscale=2, loc='lower right', \
            ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)', \
            'Prediction accuracy (r)', axs[plot_y][plot_x])
    

    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    meanCOD.append(loss_pa_all[b,-1])
    print(behav[-1,-1] > 0.1, '-', loss_pa_all[b,-1])
    X_fit = np.linspace(3, 23, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=MDD_theor_subcolors[sub_lvl])

    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS19.' + str(plot_num) +
                '_MDD_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS19.' + str(plot_num) +
                '_MDD_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1
print("Mean COD = ", str(np.mean(meanCOD)))

# Fig S20: ADNI scores

In [None]:
# load ADNI data
ADNI_img_dir,ADNI_res,X,Y,ADNI_extent,scan_duration = orsp.load_data('ADNI','predacc',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ADNI','predacc',rep_dir)


# count how many scores r > 0.1
behav_ind = range(0,6)
abv01 = 0
avg_acc = []
for b in behav_ind:
    behav = np.flip(np.flip(ADNI_res['acc_landscape'][:,:,b].T),1)
    if behav[-1,-1] > 0.1:
        avg_acc.append(behav[-1,-1])
        abv01 += 1
print("r > 0.1 = ", str(abv01))
print(np.mean(avg_acc))

# initialize scores
behav_ind = ADNI_log_ind
scores_names = ADNI_scores

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
meanCOD = []
for b in behav_ind:
    behav = np.flip(np.flip(ADNI_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(5,behav,scan_duration,ADNI_subcolors,8,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(ADNI_lgd, Y, markerscale=2, loc='lower right', \
            ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)', \
            'Prediction accuracy (r)', axs[plot_y][plot_x])
    

    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    meanCOD.append(loss_pa_all[b,-1])
    print(behav[-1,-1] > 0.1, '-', loss_pa_all[b,-1])
    X_fit = np.linspace(2, 9, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=ADNI_theor_subcolors[sub_lvl])

    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS20.' + str(plot_num) +
                '_ADNI_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS20.' + str(plot_num) +
                '_ADNI_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1
print("Mean COD = ", str(np.mean(meanCOD)))

# Fig S21: ABCD Task MID scores

In [None]:
### settings for ABCD
c_vers = 'full_MID'
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir, vers=c_vers)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir, vers=c_vers)

# count how many scores r > 0.1
behav_ind = np.append(range(0,36), 36)
abv01 = 0
avg_acc = []
abv01_beh = []
for b in behav_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
    if behav[-1,-1] > 0.1:
        abv01_beh.append(b)
        avg_acc.append(behav[-1,-1])
        abv01 += 1
print("r > 0.1 = ", str(abv01))
print(np.mean(avg_acc))
    
# initializez scores
num_pts = 10
behav_ind = ABCD_MID_log_ind
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
meanCOD = []
for b in behav_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(8,behav,scan_duration,ABCD_subcolors,num_pts,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(ABCD_lgd, Y, markerscale=2, loc='lower right', \
             ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                        'Prediction accuracy (r)', axs[plot_y][plot_x])
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    print(b,loss_pa_all[b,-1])
    meanCOD.append(loss_pa_all[b,-1])
    X_fit = np.linspace(2, num_pts+1, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=ABCD_theor_subcolors[sub_lvl])
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS21.' + str(plot_num) +
                '_ABCD_MID_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS21.' + str(plot_num) +
                '_ABCD_MID_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1
        
print("Mean COD = ", str(np.mean(meanCOD)))

# Fig S22: ABCD Task NBACK scores

In [None]:
### settings for ABCD
c_vers = 'full_NBACK'
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir, vers=c_vers)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir, vers=c_vers)

# count how many scores r > 0.1
behav_ind = np.append(range(0,36), 36)
abv01 = 0
avg_acc = []
abv01_beh = []
for b in behav_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
    if behav[-1,-1] > 0.1:
        abv01_beh.append(b)
        avg_acc.append(behav[-1,-1])
        abv01 += 1
print("r > 0.1 = ", str(abv01))
print(np.mean(avg_acc))
    
# initializez scores
num_pts = 9
behav_ind = ABCD_NBACK_log_ind
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
meanCOD = []
for b in behav_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(8,behav,scan_duration,ABCD_subcolors,num_pts,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(ABCD_lgd, Y, markerscale=2, loc='lower right', \
             ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                        'Prediction accuracy (r)', axs[plot_y][plot_x])
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    print(b,loss_pa_all[b,-1])
    meanCOD.append(loss_pa_all[b,-1])
    X_fit = np.linspace(2, num_pts+1, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=ABCD_theor_subcolors[sub_lvl])
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS22.' + str(plot_num) +
                '_ABCD_NBACK_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS22.' + str(plot_num) +
                '_ABCD_NBACK_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1
        
print("Mean COD = ", str(np.mean(meanCOD)))

# Fig S23: ABCD Task SST scores

In [None]:
### settings for ABCD
c_vers = 'full_SST'
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir, vers=c_vers)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','predacc',rep_dir, vers=c_vers)

# count how many scores r > 0.1
behav_ind = np.append(range(0,36), 36)
abv01 = 0
avg_acc = []
abv01_beh = []
for b in behav_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
    if behav[-1,-1] > 0.1:
        abv01_beh.append(b)
        avg_acc.append(behav[-1,-1])
        abv01 += 1
print("r > 0.1 = ", str(abv01))
print(np.mean(avg_acc))
    
# initializez scores
num_pts = 11
behav_ind = ABCD_SST_log_ind
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
meanCOD = []
for b in behav_ind:
    behav = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(8,behav,scan_duration,ABCD_subcolors,num_pts,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(ABCD_lgd, Y, markerscale=2, loc='lower right', \
             ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# training participants \nx scan time per participant)',
                        'Prediction accuracy (r)', axs[plot_y][plot_x])
    # Tom's equation fit to full duration
    w = w_pa_all[b,-1,:]
    print(b,loss_pa_all[b,-1])
    meanCOD.append(loss_pa_all[b,-1])
    X_fit = np.linspace(2, num_pts+1, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] * np.sqrt(1/(1 + (w[1]/Y[sub_lvl]) + (w[2]/(Y[sub_lvl]*X_fit)))) 
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=ABCD_theor_subcolors[sub_lvl])
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS23.' + str(plot_num) +
                '_ABCD_SST_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS23.' + str(plot_num) +
                '_ABCD_SST_KRR_acc_corr_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1
        
print("Mean COD = ", str(np.mean(meanCOD)))

# Fig S24: Overhead cost for different datasets

In [None]:
#################################################
# Optimal accuracy including overhead (all datasets)
#################################################    
budgets = [10000000, 1000000, 100000]
scanner_costs = np.array([500, 1000, 2000]) / 60
recruitment_costs = [500, 1000, 2000, 5000]
max_T = 200
theoretical_X = np.linspace(1, max_T, num=1000)
perc = 1
y_bottom_lim = [0.65, 0.25, 0.05]

### Iterate over plots
for recruitment_cost in recruitment_costs:
    curr_y = 0
    fig = plt.figure(figsize=(8,6))
    for budget in budgets:
        curr_x = 0
        for scanner_cost in scanner_costs:
            ax = plt.subplot(gspec[curr_y,curr_x])

            ## ABCD
            c = 'red'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rs_log_ind,'ABCD', rep_dir,'full',rd=None)
            l1,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rs_log_ind,'ABCD',rep_dir,'full',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)


            ## HCP
            c = 'blue'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_rs_log_ind,'HCP',rep_dir,'full',rd=None)
            l2,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_rs_log_ind,'HCP',rep_dir,'full',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # SINGER
            c = 'purple'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,SINGER_log_ind,'SINGER',rep_dir,'full',rd=None)
            l3,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,SINGER_log_ind,'SINGER',rep_dir,'full',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # TCP
            c = 'goldenrod'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget, scanner_cost, recruitment_cost,
                                       theoretical_X,TCP_log_ind,'TCP',rep_dir,'full',rd=None)
            l4,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget, scanner_cost, recruitment_cost,
                                       theoretical_X,TCP_log_ind,'TCP',rep_dir,'full',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # MDD
            c = 'orange'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget, scanner_cost, recruitment_cost,
                                       theoretical_X,MDD_log_ind,'MDD',rep_dir,'full',rd=None)
            l5,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget, scanner_cost, recruitment_cost,
                                       theoretical_X,MDD_log_ind,'MDD',rep_dir,'full',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # ADNI
            c = 'green'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget, scanner_cost, recruitment_cost,
                                       theoretical_X,ADNI_log_ind,'ADNI',rep_dir,'full',rd=None)
            l6,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget, scanner_cost, recruitment_cost,
                                       theoretical_X,ADNI_log_ind,'ADNI',rep_dir,'full',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # format plot
            ax.set_ylabel('')
            ax.set_xlabel('')
            ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
            ax.set_ylim([y_bottom_lim[curr_y],1.03])
            ax.set_xlim([-3,max_T])
            ax.spines[['right', 'top']].set_visible(False)
            # move to next plot
            curr_x += 1
        # move to next plot
        curr_y += 1

    # set legend
    # Create new legend handles with alpha=1 (no transparency)
    legend_handles = [plt.Line2D([0], [0], color='red', alpha=1, label='ABCD'),
                      plt.Line2D([0], [0], color='blue', alpha=1, label='HCP'),
                      plt.Line2D([0], [0], color='purple', alpha=1, label='SINGER'),
                      plt.Line2D([0], [0], color='goldenrod', alpha=1, label='TCP'),
                      plt.Line2D([0], [0], color='orange', alpha=1, label='MDD'),
                      plt.Line2D([0], [0], color='green', alpha=1, label='ADNI')]
    plt.legend(handles=legend_handles, loc='lower right',
               title="Dataset", title_fontproperties={'family': 'Arial', 'weight': 'bold', 'size': 11},
               frameon=False, bbox_to_anchor=[1.00, -0.9], ncol=6)
    fig.savefig(os.path.join(img_dir, 'FigS24_ALLdatasets_OptimalAcc'+str(recruitment_cost)+'.svg'), bbox_inches='tight')
        

# Fig S25: Overhead cost for different phenotypic domains

In [None]:
def calc_domainAcc(N,T):
    # create empty lists
    cog_vals = []
    phy_vals = []
    emo_vals = []
    pers_vals = []
    wb_vals = []
    mh_vals = []
    pet_vals = []
    training_N = np.floor(0.9 * N)
    abcd_all_mh = []
    abcd_all_cog = []
    abcd_all_pers = []
    nonabcd_mh = []
    nonabcd_cog = []
    nonabcd_pers = []
    
    
    # categories
    # ABCD
    abcd_mh = [0,1,2,3,4,5,6,7,27,28,29]
    abcd_cog = [8,9,10,11,12,13,14,15,16,17,30,31,32,33,34,35,36]
    abcd_pers=[18,19,20,21,22,23,24,25,26]
    # HCP
    hcp_cog = [0,1,2,3,4,5,6,8,9,10,11,12,13,24,25,26,27,28,29,59]
    hcp_pers = [7,30,31,32,33,34]
    hcp_phy = [14,15,16,17,18,19,20,21,22]
    hcp_emo = [23,35,36,37,38,39,40,41,42,43,44,45,46]
    hcp_wb = [47,48,49,50,51,52,53,54,55,56,57]
    # SINGER
    singer_phy = [0,3,4,5]
    singer_cog = [1,2,6,7,8,9,10,11,12,13,14,15,16,17,18]
    # TCP
    tcp_phy = [0,18]
    tcp_mh = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]
    # MDD
    mdd_phy = [0,18]
    mdd_mh = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19]
    # ADNI
    adni_phy = [0,1]
    adni_cog = [3,4]
    adni_pet = [5,6]
    
    # load HCP results
    HCP_behav_ind = HCP_rs_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'predacc', rep_dir)
    for b in HCP_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T)))) 
        if b in hcp_cog:
            cog_vals.append(b_acc)
            nonabcd_cog.append(b)
        if b in hcp_pers:
            pers_vals.append(b_acc)
            nonabcd_pers.append(b)
        if b in hcp_phy:
            phy_vals.append(b_acc)
        if b in hcp_emo:
            emo_vals.append(b_acc)
        if b in hcp_wb:
            wb_vals.append(b_acc)

    # load ABCD results
    ABCD_behav_ind = ABCD_rs_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir)
    for b in ABCD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        if b in abcd_cog:
            cog_vals.append(b_acc)
            abcd_all_cog.append(b)
        if b in abcd_pers:
            pers_vals.append(b_acc)
            abcd_all_pers.append(b)
        if b in abcd_mh:
            mh_vals.append(b_acc)
            abcd_all_mh.append(b)
    
    # load SINGER results
    SINGER_behav_ind = SINGER_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('SINGER', 'predacc', rep_dir)
    for b in SINGER_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        if b in singer_cog:
            cog_vals.append(b_acc)
            nonabcd_cog.append(b)
        if b in singer_phy:
            phy_vals.append(b_acc)
        
        
    # load TCP results
    TCP_behav_ind = TCP_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('TCP', 'predacc', rep_dir)
    count = 0
    for b in TCP_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        if b in tcp_phy:
            phy_vals.append(b_acc)
        if b in tcp_mh:
            mh_vals.append(b_acc)
            nonabcd_mh.append(b)
        
    # load MDD results
    MDD_behav_ind = MDD_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('MDD', 'predacc', rep_dir)
    count = 0
    for b in MDD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        if b in mdd_phy:
            phy_vals.append(b_acc)
        if b in mdd_mh:
            mh_vals.append(b_acc)
            nonabcd_mh.append(b)
        
    # load ADNI results
    ADNI_behav_ind = ADNI_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ADNI', 'predacc', rep_dir)
    count = 0 
    for b in ADNI_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        if b in adni_phy:
            phy_vals.append(b_acc)
            count += 1
        if b in adni_cog:
            cog_vals.append(b_acc)
            nonabcd_cog.append(b)
        if b in adni_pet:
            pet_vals.append(b_acc)
        
    # load ABCD results
    ABCD_behav_ind = ABCD_MID_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='full_MID')
    for b in ABCD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        if b in abcd_cog:
            cog_vals.append(b_acc)
            abcd_all_cog.append(b)
        if b in abcd_pers:
            pers_vals.append(b_acc)
            abcd_all_pers.append(b)
        if b in abcd_mh:
            mh_vals.append(b_acc)
            abcd_all_mh.append(b)
        
    # load ABCD results
    ABCD_behav_ind = ABCD_NBACK_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='full_NBACK')
    for b in ABCD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        if b in abcd_cog:
            cog_vals.append(b_acc)
            abcd_all_cog.append(b)
        if b in abcd_pers:
            pers_vals.append(b_acc)
            abcd_all_pers.append(b)
        if b in abcd_mh:
            mh_vals.append(b_acc)
            abcd_all_mh.append(b)
        
    # load ABCD results
    ABCD_behav_ind = ABCD_SST_log_ind
    w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'predacc', rep_dir, vers='full_SST')
    for b in ABCD_behav_ind:
        # Tom's equation fit to full duration
        w = w_pa_all[b,-1,:]
        b_acc = np.sqrt(1/(1 + (w[1]/training_N) + (w[2]/(training_N*T))))
        if b in abcd_cog:
            cog_vals.append(b_acc)
            abcd_all_cog.append(b)
        if b in abcd_pers:
            pers_vals.append(b_acc)
            abcd_all_pers.append(b)
        if b in abcd_mh:
            mh_vals.append(b_acc)
            abcd_all_mh.append(b)
    # Absolute counts        
    #print(len(cog_vals),len(phy_vals),len(emo_vals),len(pers_vals),len(wb_vals),len(mh_vals), len(pet_vals))
    # Unique counts
    #print("Cog:", len(nonabcd_cog)+len(np.unique(abcd_all_cog)),
    #      "Phy:", len(phy_vals),
    #      "Emo:", len(emo_vals),
    #      "Pers:", len(nonabcd_pers)+len(np.unique(abcd_all_pers)),
    #      "WB:", len(wb_vals),
    #      "Mh:", len(nonabcd_mh)+len(np.unique(abcd_all_mh)),
    #      "PET:", len(pet_vals)
    #     )
    
    return np.mean(cog_vals,0), np.mean(phy_vals,0), np.mean(emo_vals,0), np.mean(pers_vals,0), np.mean(wb_vals,0), np.mean(mh_vals,0), np.mean(pet_vals,0)

#################################################
# Tom's theoretical equations
#################################################
budgets = [10000000, 1000000, 100000]
scanner_costs = np.array([500, 1000, 2000]) / 60
recruitment_costs = [500, 1000, 2000, 5000]
max_T = 200
theoretical_X = np.linspace(1, max_T, num=1000)
y_bottom_lim = [0.55, 0.15, 0.05]
perc = 1

### Iterate over plots
for recruitment_cost in recruitment_costs:
    curr_y = 0
    fig = plt.figure(figsize=(8,6))
    gspec = gridspec.GridSpec(3,3,hspace=0.22,wspace=0.25)
    for budget in budgets:
        curr_x = 0
        for scanner_cost in scanner_costs:

            ax = plt.subplot(gspec[curr_y,curr_x])

            # cognition
            c = 'blue'
            # smooth curve
            remaining_Y = budget / (theoretical_X*scanner_cost + recruitment_costs[1])
            cog_vals,phy_vals,emo_vals,pers_vals,wb_vals,mh_vals,pet_vals = calc_domainAcc(remaining_Y,theoretical_X)
            l1,=ax.plot(theoretical_X,cog_vals, color='blue', zorder=0, alpha=0.5)
            l2,=ax.plot(theoretical_X,phy_vals, color='green', zorder=0, alpha=0.5)
            l3,=ax.plot(theoretical_X,emo_vals, color='red', zorder=0, alpha=0.5)
            l4,=ax.plot(theoretical_X,pers_vals, color='orange', zorder=0, alpha=0.5)
            l5,=ax.plot(theoretical_X,wb_vals, color='purple', zorder=0, alpha=0.5)
            l6,=ax.plot(theoretical_X,mh_vals, color='black', zorder=0, alpha=0.5)
            l7,=ax.plot(theoretical_X,pet_vals, color='olive', zorder=0, alpha=0.5)
            # limits using rounded down sample size
            remaining_Y = np.floor(budget / (theoretical_X*scanner_cost + recruitment_costs[1]))
            cog_vals,phy_vals,emo_vals,pers_vals,wb_vals,mh_vals,pet_vals = calc_domainAcc(remaining_Y,theoretical_X)
            orsp.plot_max_range(cog_vals,perc,theoretical_X,'blue',ax)
            orsp.plot_max_range(phy_vals,perc,theoretical_X,'green',ax)
            orsp.plot_max_range(emo_vals,perc,theoretical_X,'red',ax)
            orsp.plot_max_range(pers_vals,perc,theoretical_X,'orange',ax)
            orsp.plot_max_range(wb_vals,perc,theoretical_X,'purple',ax)
            orsp.plot_max_range(mh_vals,perc,theoretical_X,'black',ax)
            orsp.plot_max_range(pet_vals,perc,theoretical_X,'olive',ax)


            ax.set_ylabel('')
            ax.set_xlabel('')
            ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
            ax.set_ylim([y_bottom_lim[curr_y],1.03])
            ax.set_xlim([-3,max_T])
            ax.spines[['right', 'top']].set_visible(False)
            # move to next plot
            curr_x += 1

        # move to next plot
        curr_y += 1

    legend_handles = [plt.Line2D([0], [0], color='blue', alpha=1, label='Cognition (39)'),
                          plt.Line2D([0], [0], color='green', alpha=1, label='Physical (10)'),
                          plt.Line2D([0], [0], color='red', alpha=1, label='Emotion (1)'),
                          plt.Line2D([0], [0], color='orange', alpha=1, label='Personality (6)'),
                          plt.Line2D([0], [0], color='purple', alpha=1, label='Well-Being (1)'),
                          plt.Line2D([0], [0], color='black', alpha=1, label='Mental Health (17)'),
                          plt.Line2D([0], [0], color='olive', alpha=1, label='PET (2)')]
    plt.legend(handles=legend_handles, 
               title="Domain",title_fontproperties={'family' : 'Arial', 'weight':'bold', 'size': 11},
               frameon=False, bbox_to_anchor=[1.05, -0.5], ncol=4)
    fig.savefig(os.path.join(img_dir, 'FigS25_ALLdomains_OptimalAcc'+str(recruitment_cost)+'.svg'), bbox_inches='tight')

# Fig S26: Overhead cost for task vs rest

In [None]:
#################################################
# Optimal accuracy including overhead (ABCD tasks)
#################################################
budgets = [10000000, 1000000, 100000]
scanner_costs = np.array([500, 1000, 2000]) / 60
recruitment_costs = [500, 1000, 2000, 5000]
max_T = 200
theoretical_X = np.linspace(1, max_T, num=1000)
perc = 1
y_bottom_lim = [0.65, 0.25, 0.1]

### Iterate over plots
for recruitment_cost in recruitment_costs:
    curr_y = 0
    fig = plt.figure(figsize=(8,6))
    for budget in budgets:
        curr_x = 0
        for scanner_cost in scanner_costs:
            ax = plt.subplot(gspec[curr_y,curr_x])

            ## ABCD only - 400 parcels
            c = 'black'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rst_intersect,'ABCD',rep_dir,'full',rd=None)
            l1,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rst_intersect,'ABCD',rep_dir,'full',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            ## ABCD only - MID
            c='blue'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rst_intersect,'ABCD',rep_dir,'full_MID',rd=None)
            l2,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rst_intersect,'ABCD',rep_dir,'full_MID',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            ## ABCD only - nback
            c='green'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rst_intersect,'ABCD',rep_dir,'full_NBACK',rd=None)
            l3,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rst_intersect,'ABCD',rep_dir,'full_NBACK',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            ## ABCD only - SST
            c = 'purple'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rst_intersect,'ABCD',rep_dir,'full_SST',rd=None)
            l4,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_rst_intersect,'ABCD',rep_dir,'full_SST',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # format plot
            ax.set_ylabel('')
            ax.set_xlabel('')
            ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
            ax.set_ylim([y_bottom_lim[curr_y],1.03])
            ax.set_xlim([-3,max_T])
            ax.spines[['right', 'top']].set_visible(False)
            # move to next plot
            curr_x += 1
        # move to next plot
        curr_y += 1

    # set legend
    # Create new legend handles with alpha=1 (no transparency)
    legend_handles = [plt.Line2D([0], [0], color='black', alpha=1, label='Rest'),
                      plt.Line2D([0], [0], color='blue', alpha=1, label='Task: MID'),
                      plt.Line2D([0], [0], color='green', alpha=1, label='Task: NBACK'),
                      plt.Line2D([0], [0], color='purple', alpha=1, label='Task: SST')]
    plt.legend(handles=legend_handles, loc='lower right',
               title="Condition", title_fontproperties={'family': 'Arial', 'weight': 'bold', 'size': 11},
               frameon=False, bbox_to_anchor=[0.7, -0.9], ncol=4)
    fig.savefig(os.path.join(img_dir, 'FigS26_ABCD_RS_Task_OptimalAcc'+str(recruitment_cost)+'.svg'), bbox_inches='tight')


# Fig S27: Strict set ABCD and HCP

In [None]:
#################################################
# Optimal accuracy for ABCD and HCP (controlling for fit to theoretical model)
#################################################
budgets = [10000000, 1000000, 100000]
scanner_costs = np.array([500, 1000, 2000]) / 60
recruitment_costs = [500, 1000, 2000, 5000]
max_T = 200
theoretical_X = np.linspace(1, max_T, num=1000)
perc = 1
y_bottom_lim = [0.65, 0.25, 0.1]

### Iterate over plots
for recruitment_cost in recruitment_costs:
    curr_y = 0
    fig = plt.figure(figsize=(8,6))
    gspec = gridspec.GridSpec(3,3,hspace=0.22,wspace=0.25)
    for budget in budgets:
        curr_x = 0
        for scanner_cost in scanner_costs:
            ax = plt.subplot(gspec[curr_y,curr_x])
            
            ## original - full set
            c = 'black'
            # smooth curve
            remaining_Y = budget / (theoretical_X*scanner_cost + recruitment_cost)
            final_predacc, c_int = orsp.calc_avgHCPABCDAcc(remaining_Y,theoretical_X,'full',
                                                      rep_dir,HCP_rs_log_ind,ABCD_rs_log_ind)
            l1, = ax.plot(theoretical_X,final_predacc, c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            remaining_Y = np.floor(budget / (theoretical_X*scanner_cost + recruitment_cost))
            final_predacc, c_int = orsp.calc_avgHCPABCDAcc(remaining_Y,theoretical_X,'full',
                                                      rep_dir,HCP_rs_log_ind,ABCD_rs_log_ind,rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)
            
            ## original - strict set
            c = 'blue'
            # smooth curve
            remaining_Y = budget / (theoretical_X*scanner_cost + recruitment_cost)
            final_predacc, c_int = orsp.calc_avgHCPABCDAcc(remaining_Y,theoretical_X,'full', rep_dir,
                                                      HCP_behav_ind_fullstrict,ABCD_behav_ind_fullstrict)
            l2, = ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            ax.scatter(theoretical_X[np.argmax(final_predacc)],np.max(final_predacc), color=c, zorder=1, clip_on=True)
            # limits using rounded down sample size
            remaining_Y = np.floor(budget / (theoretical_X*scanner_cost + recruitment_cost))
            final_predacc, c_int = orsp.calc_avgHCPABCDAcc(remaining_Y,theoretical_X,'full', rep_dir,
                                                      HCP_behav_ind_fullstrict,ABCD_behav_ind_fullstrict, rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            ## randomized - full set
            c = 'red'
            # smooth curve
            remaining_Y = budget / (theoretical_X*scanner_cost + recruitment_cost)
            final_predacc, c_int = orsp.calc_avgHCPABCDAcc(remaining_Y,theoretical_X,'random',
                                                      rep_dir,HCP_rs_log_ind,ABCD_rs_log_ind)
            l3, = ax.plot(theoretical_X,final_predacc, color=c, zorder=0,alpha=0.35)
            # limits using rounded down sample size
            remaining_Y = np.floor(budget / (theoretical_X*scanner_cost + recruitment_cost))
            final_predacc, c_int = orsp.calc_avgHCPABCDAcc(remaining_Y,theoretical_X,'random',
                                                      rep_dir,HCP_rs_log_ind,ABCD_rs_log_ind, rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)
   
            ## randomized - strict set
            c = 'purple'
            # smooth curve
            remaining_Y = budget / (theoretical_X*scanner_cost + recruitment_cost)
            final_predacc, c_int = orsp.calc_avgHCPABCDAcc(remaining_Y,theoretical_X,'random',rep_dir,
                                                      HCP_behav_ind_randomstrict,ABCD_behav_ind_randomstrict)
            l4, = ax.plot(theoretical_X,final_predacc, color=c, zorder=0,alpha=0.35)
            # limits using rounded down sample size
            remaining_Y = np.floor(budget / (theoretical_X*scanner_cost + recruitment_cost))
            final_predacc, c_int = orsp.calc_avgHCPABCDAcc(remaining_Y,theoretical_X,'random',rep_dir,
                                                      HCP_behav_ind_randomstrict,ABCD_behav_ind_randomstrict,rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)
            
            # format plot
            ax.set_ylabel('')
            ax.set_xlabel('')
            ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
            ax.set_ylim([y_bottom_lim[curr_y],1.03])
            ax.set_xlim([-3,max_T])
            ax.spines[['right', 'top']].set_visible(False)
            # move to next plot
            curr_x += 1
        # move to next plot
        curr_y += 1

    # set legend
    # Create new legend handles with alpha=1 (no transparency)
    legend_handles = [plt.Line2D([0], [0], color='black', alpha=1, label='Original run order (36)'),
                      plt.Line2D([0], [0], color='blue', alpha=1, label='Original run order strict (13)'),
                      plt.Line2D([0], [0], color='red', alpha=1, label='Randomized run order (36)'),
                      plt.Line2D([0], [0], color='purple', alpha=1, label='Randomized run order strict (17)')]
    plt.legend(handles=legend_handles, loc='lower right',
               title="Condition", title_fontproperties={'family': 'Arial', 'weight': 'bold', 'size': 11},
               frameon=False, bbox_to_anchor=[0.83, -0.9], ncol=2)
    fig.savefig(os.path.join(img_dir, 'FigS27_ABCDHCP_StrictSet_OptimalAcc'+str(recruitment_cost)+'.svg'), bbox_inches='tight')


# Fig S28: ABCD control analyses

In [None]:
#################################################
# Optimal accuracy including overhead (ABCD control)
#################################################

budgets = [10000000, 1000000, 100000]
scanner_costs = np.array([500, 1000, 2000]) / 60
recruitment_costs = [500, 1000, 2000, 5000]
max_T = 200
theoretical_X = np.linspace(1, max_T, num=1000)
perc = 1
y_bottom_lim = [0.65, 0.25, 0.1]

### Iterate over plots
for recruitment_cost in recruitment_costs:
    curr_y = 0
    fig = plt.figure(figsize=(8,6))
    for budget in budgets:
        curr_x = 0
        for scanner_cost in scanner_costs:
            ax = plt.subplot(gspec[curr_y,curr_x])

            # ABCD only - 400 parcels
            c = 'black'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_intersect,'ABCD',rep_dir,'full',rd=None)
            l1,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_intersect,'ABCD',rep_dir,'full',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # ABCD only - subcortical
            c = 'red'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_intersect,'ABCD',rep_dir,'full_sc',rd=None)
            l2,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_intersect,'ABCD',rep_dir,'full_sc',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)
            
            # ABCD only - 1000parcels
            c = 'orange'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_intersect,'ABCD',rep_dir,'full_1000parcels',rd=None)
            l3,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,ABCD_intersect,'ABCD',rep_dir,'full_1000parcels',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)
            # format plot
            ax.set_ylabel('')
            ax.set_xlabel('')
            ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
            ax.set_ylim([y_bottom_lim[curr_y],1.03])
            ax.set_xlim([-3,max_T])
            ax.spines[['right', 'top']].set_visible(False)
            # move to next plot
            curr_x += 1
        # move to next plot
        curr_y += 1

    # set legend
    # Create new legend handles with alpha=1 (no transparency)
    legend_handles = [plt.Line2D([0], [0], color='black', alpha=1, label='400+19 parcels (original)'),
                      plt.Line2D([0], [0], color='red', alpha=1, label='19 subcortical parcels'),
                      plt.Line2D([0], [0], color='orange', alpha=1, label='1000+19 parcels')]
    plt.legend(handles=legend_handles, loc='lower right',
               title="Condition", title_fontproperties={'family': 'Arial', 'weight': 'bold', 'size': 11},
               frameon=False, bbox_to_anchor=[1.05, -0.9], ncol=4)
    fig.savefig(os.path.join(img_dir, 'FigS28_ABCD_Control_OptimalAcc'+str(recruitment_cost)+'.svg'), bbox_inches='tight')

# Fig S29: HCP control analyses

In [None]:
#################################################
# Optimal accuracy including overhead (HCP control)
#################################################

budgets = [10000000, 1000000, 100000]
scanner_costs = np.array([500, 1000, 2000]) / 60
recruitment_costs = [500, 1000, 2000, 5000]
max_T = 200
theoretical_X = np.linspace(1, max_T, num=1000)
perc = 1
y_bottom_lim = [0.65, 0.25, 0.1]

### Iterate over plots
for recruitment_cost in recruitment_costs:
    curr_y = 0
    fig = plt.figure(figsize=(8,6))
    for budget in budgets:
        curr_x = 0
        for scanner_cost in scanner_costs:
            ax = plt.subplot(gspec[curr_y,curr_x])

            # HCP only - 400 parcels
            c = 'black'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_intersect,'HCP',rep_dir,'full')
            l1,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_intersect,'HCP',rep_dir,'full',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # HCP only - subcortical
            c = 'red'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_intersect,'HCP',rep_dir,'full_sc')
            l2,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_intersect,'HCP',rep_dir,'full_sc',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)


            # HCP only - 1000parcels
            c = 'orange'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_intersect,'HCP',rep_dir,'full_1000parcels')
            l3,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_intersect,'HCP',rep_dir,'full_1000parcels',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # HCP only - mixdays
            c = 'blue'
            # smooth curve
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_intersect,'HCP',rep_dir,'full_mixdays')
            l3,=ax.plot(theoretical_X,final_predacc, color=c, zorder=0, alpha=0.35)
            # limits using rounded down sample size
            final_predacc, c_int = orsp.calc_datasetAcc(budget,scanner_cost,recruitment_cost,
                                       theoretical_X,HCP_intersect,'HCP',rep_dir,'full_mixdays',rd=1)
            orsp.plot_max_range(final_predacc,perc,theoretical_X,c,ax)

            # format plot
            ax.set_ylabel('')
            ax.set_xlabel('')
            ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
            ax.set_ylim([y_bottom_lim[curr_y],1.03])
            ax.set_xlim([-3,max_T])
            ax.spines[['right', 'top']].set_visible(False)
            # move to next plot
            curr_x += 1
        # move to next plot
        curr_y += 1

    # set legend
    # Create new legend handles with alpha=1 (no transparency)
    legend_handles = [plt.Line2D([0], [0], color='black', alpha=1, label='400+19 parcels (original)'),
                      plt.Line2D([0], [0], color='red', alpha=1, label='19 subcortical parcels'),
                      plt.Line2D([0], [0], color='orange', alpha=1, label='1000+19 parcels'),
                      plt.Line2D([0], [0], color='blue', alpha=1, label='Two Sessions')]
    plt.legend(handles=legend_handles, loc='lower right',
               title="Condition", title_fontproperties={'family': 'Arial', 'weight': 'bold', 'size': 11},
               frameon=False, bbox_to_anchor=[1.55, -0.9], ncol=4)
    fig.savefig(os.path.join(img_dir, 'FigS29_HCP_Control_OptimalAcc'+str(recruitment_cost)+'.svg'), bbox_inches='tight')

# Fig S30: BWAS Contour plot for ABCD and HCP

In [None]:
#################################################
# plot contour plots
#################################################
fig,axs = plt.subplots(1,2,figsize=(8.5,4.5))
fig.tight_layout(pad=7)

### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
# Cognition
con_lines = [0.2, 0.3, 0.4]
manual_locations = [(3,0.5),(6.5,1.5),(25,5)]
behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,59].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, HCP_extent, 
                      fig, axs[1], Yax_lbl='ICC', Ax_Ttl='HCP')

### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir)
# Cognition
con_lines = [0.3, 0.55, 0.7]
manual_locations = [(1,0.3),(4,2),(8,6)]
behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,36].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, ABCD_extent, 
                      fig, axs[0], Yax_lbl='ICC', Ax_Ttl='ABCD')

fig.savefig(os.path.join(img_dir, 'FigS30_' +
                    'KRR_full_rel_BWAS_cog_contour.svg'), bbox_inches='tight')

# Fig S31: Correlation between common points in ABCD and HCP contour plots (BWAS)

In [None]:
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'tstats', rep_dir)
behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,59].T),1)
hcp_behav = behav[[1,5],:10].ravel()

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'tstats', rep_dir)
behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,36].T),1)
abcd_behav = behav[:2,:].ravel()

fig,ax = plt.subplots(figsize=(6, 6))
plt.scatter(abcd_behav,hcp_behav)
res = scipy.stats.linregress(abcd_behav,hcp_behav)
xy_line = np.linspace(0.05,0.55,100)
plt.plot(xy_line , res.intercept + res.slope*xy_line , 'k', linestyle='--')
orsp.format_scatter_plot('ABCD Reliability (ICC)',
                    'HCP Reliability (ICC)', ax)
corr_val = np.corrcoef(abcd_behav,hcp_behav)
ax.text(0.8,0.1,'r = ' + str(np.round(corr_val[0][1],2)), transform=ax.transAxes, size=12)
plt.yticks(np.arange(0.1, 0.55, step=0.1)) 
plt.xticks(np.arange(0.1, 0.55, step=0.1)) 


fig.savefig(os.path.join(img_dir, 'FigS31_ABCD_HCP_KRR_commonpts_tstats_correlation.svg'), bbox_inches='tight')

# Fig S32: Scatter plots for reliability

In [None]:
#################################################
# plot scatter plot against total scan time
#################################################
limit=5
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'tstats', rep_dir)
behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,59].T),1)
# plot scatter plot
fig,ax = plt.subplots(figsize=(4, 3))
#orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,58,ax)
#orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,limit,ax,outline='Y')
limit = 5
for n_subs in range(0,6)[::-1]:
    beh = behav[n_subs, :limit]
    curr_scan = scan_duration[n_subs, :limit]
    sns.scatterplot(x=curr_scan.flatten(), y=beh.flatten(), ax=ax,
                            color=HCP_subcolors[n_subs],s=40)
for n_subs in range(0,6):
    beh = behav[n_subs, limit:]
    curr_scan = scan_duration[n_subs, limit:]
    sns.scatterplot(x=curr_scan.flatten(), y=beh.flatten(), ax=ax,
                            color=HCP_subcolors[n_subs], edgecolor="k", linewidth=0.75, s=30)
orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                    'Reliability (ICC)', ax)
lgd = plt.legend(HCP_lgd, Y, markerscale=2, \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
fig.savefig(os.path.join(img_dir, 'FigS32_HCP_BWA_icc.svg'), bbox_inches='tight')

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'tstats', rep_dir)
behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,36].T),1)
# Plot scatter
fig,ax = plt.subplots(figsize=(4, 3))
#orsp.plot_scatter(6,behav,scan_duration,ABCD_subcolors,10,ax)
#orsp.plot_scatter(6,behav,scan_duration,ABCD_subcolors,limit,ax,outline='Y')
limit = 5
for n_subs in range(0,6)[::-1]:
    beh = behav[n_subs, :limit]
    curr_scan = scan_duration[n_subs, :limit]
    sns.scatterplot(x=curr_scan.flatten(), y=beh.flatten(), ax=ax,
                            color=ABCD_subcolors[n_subs],s=40)
for n_subs in range(0,6):
    beh = behav[n_subs, limit:]
    curr_scan = scan_duration[n_subs, limit:]
    sns.scatterplot(x=curr_scan.flatten(), y=beh.flatten(), ax=ax,
                            color=ABCD_subcolors[n_subs], edgecolor="k", linewidth=0.75, s=30)
orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                    'Reliability (ICC)', ax)
lgd = plt.legend(ABCD_lgd, Y, markerscale=2, \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
fig.savefig(os.path.join(img_dir, 'FigS32_ABCD_BWA_icc.svg'), bbox_inches='tight')

In [None]:
#################################################
# plot individual scores
#################################################
fig,ax = plt.subplots(figsize=(8.5, 3.5))
all_scores = []
legend_handle = []
lgd_handles = []
#HCP_cog_ind = [1,2,3,4,5,6,8,10,25,26,29,59]
#HCP_emo_ind = [23]
#HCP_pers_ind = [7,31,32,34]
#HCP_phy_ind = [14]
#HCP_wb_ind = [47]
#ABCD_cog_ind = [8,10,11,13,14,15,16,17,30,31,32,33,36]
#ABCD_mh_ind = [5,29,6,3]
    
### cognition
## ABCD
limit = 5
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
for n in ABCD_cog_ind:
    behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(),color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_cog_ind[:-1]], ['ABCD Cog. Factor']))

## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
for n in HCP_cog_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_cog_ind[:-1]], ['HCP Cog. Factor']))

### mental health
limit = 5
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:lightgrey,darkgrey", n_colors=4)
for n in ABCD_mh_ind:
    behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(),color=custom_colors[n_c], zorder=-1)
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_mh_ind]))

### personality
## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:powderblue,darkslateblue", n_colors=4)
for n in HCP_pers_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_pers_ind]))


### physical
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:gold,gold", n_colors=2)
for n in HCP_phy_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_phy_ind]))

### emotion
## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:forestgreen,forestgreen", n_colors=2)
for n in HCP_emo_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_emo_ind]))

### well being
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:magenta,magenta", n_colors=2)
for n in HCP_wb_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_wb_ind]))

# plot fitted curve
orsp.plot_curve(200, 15000)

# figure parameters
ax.set_ylim([6, 16])
orsp.format_scatter_plot('Total scan duration (# participants x scan time per participant)',
                    'Norm. reliability', ax)

fig.savefig(os.path.join(img_dir, 'FigS32_ScanTime_AllBehavCurves_Tstats_10m.svg'), bbox_inches='tight')

In [None]:
#################################################
# plot individual scores
#################################################
fig,ax = plt.subplots(figsize=(8.5, 2.5))
all_scores = []
legend_handle = []
lgd_handles = []
HCP_cog_ind = [1,2,3,4,5,6,8,10,25,26,29,59]
HCP_emo_ind = [23]
HCP_pers_ind = [7,31,32,34]
HCP_phy_ind = [14]
HCP_wb_ind = [47]
ABCD_cog_ind = [8,10,11,13,14,15,16,17,30,31,32,33,36]
ABCD_mh_ind = [5,29,6,3]
    
### cognition
## ABCD
limit = 5
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
for n in ABCD_cog_ind:
    behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(),color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_cog_ind[:-1]], ['Cog Factor (A)']))

## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
for n in HCP_cog_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_cog_ind[:-1]], ['Cog Factor (H)']))

### mental health
limit = 5
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:lightgrey,darkgrey", n_colors=4)
for n in ABCD_mh_ind:
    behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(),color=custom_colors[n_c], zorder=-1)
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_mh_ind]))

### personality
## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:powderblue,darkslateblue", n_colors=4)
for n in HCP_pers_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_pers_ind]))


### physical
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:gold,gold", n_colors=2)
for n in HCP_phy_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_phy_ind]))

### emotion
## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:forestgreen,forestgreen", n_colors=2)
for n in HCP_emo_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_emo_ind]))

### well being
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:magenta,magenta", n_colors=2)
for n in HCP_wb_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_wb_ind]))

## Add legend
# Add the custom legend handle to the legend
lgd = plt.legend(all_scores, handletextpad=0.01, bbox_to_anchor=[1.06, -0.27],
           fontsize=9, ncol=6, columnspacing=0.5, frameon=False)

# plot log curve
X_fit = np.linspace(200, 15000, num=100, dtype=int)
curve_val = np.log(X_fit) / np.log(2)
plt.plot(np.log(X_fit)/ np.log(2), curve_val, color='k')

# figure parameters
ax.set_ylim([6, 16])
#ax.set_xlim([8, 15.5])
orsp.format_scatter_plot('log\N{SUBSCRIPT TWO}(Total Scan Duration)',
                    'Norm. reliability', ax)

fig.savefig(os.path.join(img_dir, 'FigS32_ScanTime_Log_AllBehavCurves_Tstats_10m.svg'), bbox_inches='tight')

# Fig S33: BWAS scatter plot for ABCD

In [None]:
### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir)

# initialize cognition scores
behav_ind = ABCD_rs_log_ind
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for n in behav_ind:
    behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,n].T),1)
    for limit in [10,5]:
        orsp.plot_scatter(6,behav,scan_duration,ABCD_subcolors,limit,axs[plot_y][plot_x])
        axs[plot_y][plot_x].legend(ABCD_lgd, Y, markerscale=2, loc='lower right', \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
        orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                            'Reliability (ICC)', axs[plot_y][plot_x])
        axs[plot_y][plot_x].set_xlim(0,26000)
        axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
        axs[plot_y][plot_x].set_title(scores_names[n] + ' (' + str(limit*2) + 'mins)')
        
        # move to next plot
        if plot_x == 0:
            plot_x += 1
        else:
            plot_x = 0
            if plot_y != 2:
                plot_y += 1
            else: 
                plot_y = 0
        # move to next figure
        if (behav_count % 3) == 0 and limit == 5:
            fig.savefig(os.path.join(img_dir, 'FigS33.' + str(plot_num) +
                    '_ABCD_KRR_full_BWAS_scatter.svg'), bbox_inches='tight')
            fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
            fig.tight_layout(h_pad=5, w_pad=7)
            plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS33.' + str(plot_num) +
                '_ABCD_KRR_full_BWAS_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1


# Fig S34: BWAS scatter plot for HCP

In [None]:
### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)

# initialize cognition scores
behav_ind = HCP_rs_log_ind
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for n in behav_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,n].T),1)
    for limit in [29,5]:
        orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,limit,axs[plot_y][plot_x])
        axs[plot_y][plot_x].legend(HCP_lgd, Y, markerscale=2, loc='lower right', \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
        orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                            'Reliability (ICC)', axs[plot_y][plot_x])
        axs[plot_y][plot_x].set_xlim(0,26000)
        axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
        axs[plot_y][plot_x].set_title(scores_names[n] + ' (' + str(limit*2) + 'mins)')
        
        # move to next plot
        if plot_x == 0:
            plot_x += 1
        else:
            plot_x = 0
            if plot_y != 2:
                plot_y += 1
            else: 
                plot_y = 0
        # move to next figure
        if (behav_count % 3) == 0 and limit == 5:
            fig.savefig(os.path.join(img_dir, 'FigS34.' + str(plot_num) +
                    '_HCP_KRR_full_BWAS_scatter.svg'), bbox_inches='tight')
            fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
            fig.tight_layout(h_pad=5, w_pad=7)
            plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS34.' + str(plot_num) +
                    '_HCP_KRR_full_BWAS_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1


# Fig S35: Reliability N and T are not equivalent (BWAS - factor score)

In [None]:
#################################################
# Violin plot comparing same total scan time
#################################################
# ABCD: 2400 total scan time but different N and T
b = 36
n_seeds = 126
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','tstats',rep_dir)
mat = os.path.join(ABCD_img_dir,'tstats_icc_indiv_landscape.mat')
res = scipy.io.loadmat(mat)

# extract points with 6000m of total scan time
full_df = pd.DataFrame()
twom_vals = []
fourm_vals = []
sixm_vals = []
twelm_vals = []
for n in range(0,n_seeds):
    behav = res['tstats_landscape'][:,:,n,b]
    dtwo = pd.DataFrame(data={'rel': [behav[5,0]]})
    dtwo['Time'] = '2'
    dtwo['Subs'] = '1200subs'
    twom_vals.append(behav[5,0])
    dfour = pd.DataFrame(data={'rel': [behav[2,1]]})
    dfour['Time'] = '4'
    dfour['Subs'] = '600subs'
    fourm_vals.append(behav[2,1])
    dsix = pd.DataFrame(data={'rel': [behav[1,2]]})
    dsix['Time'] = '6'
    dsix['Subs'] = '400subs'
    sixm_vals.append(behav[1,2])
    dtwel = pd.DataFrame(data={'rel': [behav[0,5]]})
    dtwel['Time'] = '12'
    dtwel['Subs'] = '200subs'
    twelm_vals.append(behav[0,5])
    full_df = pd.concat([full_df, dtwo, dfour, dsix, dtwel])

# plot violin plot
fig,ax = plt.subplots(figsize=(4, 3))
vp=sns.violinplot(data=full_df, x="Time", y="rel", palette="Reds_r",orient='v')
vp.set(xticklabels=[])
orsp.format_scatter_plot('', 'Reliability (ICC)', ax)
fig.savefig(os.path.join(img_dir,'FigS35_ABCD_not1to1_violin.svg'), bbox_inches='tight')

# mean accuracy
print('2m mean:', np.mean(twom_vals))
print('4m mean:', np.mean(fourm_vals))
print('6m mean:', np.mean(sixm_vals))
print('12m mean:', np.mean(twelm_vals))

# stats
print('2m vs 4m:', scipy.stats.ttest_rel(twom_vals, fourm_vals)) 
print('4m vs 6m:', scipy.stats.ttest_rel(fourm_vals, sixm_vals))
print('6m vs 12m:', scipy.stats.ttest_rel(sixm_vals, twelm_vals))

print('2m vs 4m:', orsp.corrected_resample_ttest([a - b for a, b in zip(twom_vals, fourm_vals)], 1/1, 0))
print('4m vs 6m:', orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, sixm_vals)], 1/1, 0))
print('6m vs 12m:', orsp.corrected_resample_ttest([a - b for a, b in zip(sixm_vals, twelm_vals)], 1/1, 0))

# save pvals for FDR
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(twom_vals, fourm_vals)], 1/1, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, sixm_vals)], 1/1, 0))
p_list.append(orsp.corrected_resample_ttest([a - b for a, b in zip(sixm_vals, twelm_vals)], 1/1, 0))

In [None]:
### Perform FDR
from statsmodels.stats.multitest import fdrcorrection
print(p_list)
fdrcorrection(p_list)

In [None]:
#################################################
# Tom's theoretical equations
#################################################
# load HCP results
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'tstats', rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'tstats', rep_dir)
b = 59
behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,b].T),1)
# scatter plot with theoretical equation fit
fig,ax = plt.subplots(figsize=(4, 3))
orsp.plot_scatter(len(Y),behav,scan_duration,HCP_subcolors,len(X),ax)
# Tom's equation fit to full duration
w = w_r_all[b,-1,:]
X_fit = np.linspace(2, 58, num=100, dtype=int)
for sub_lvl in range(0,len(Y)):
    curve_val = w[0] / (w[0] + (1/(Y[sub_lvl]/2)) * (1 - 2*w[1]/(1+(w[2]/X_fit))))
    plt.plot(X_fit*Y[sub_lvl], curve_val, color=HCP_theor_subcolors[sub_lvl])
lgd = plt.legend(HCP_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,
                 handletextpad=0.05, frameon=False, fontsize=10)
orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                    'Reliability (ICC)', ax)
fig.savefig(os.path.join(img_dir,'FigS35_HCP_CogFactor_Theoretical_rel.svg'), bbox_inches='tight')

# load ABCD results
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'tstats', rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'tstats', rep_dir)
b = 36
behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,b].T),1)
# scatter plot with theoretical equation fit
fig,ax = plt.subplots(figsize=(4, 3))
orsp.plot_scatter(len(Y),behav,scan_duration,ABCD_subcolors,len(X),ax)
# Tom's equation fit to full duration
w = w_r_all[b,-1,:]
X_fit = np.linspace(2, 20, num=100, dtype=int)
for sub_lvl in range(0,len(Y)):
    curve_val = w[0] / (w[0] + (1/(Y[sub_lvl]/2)) * (1 - 2*w[1]/(1+(w[2]/X_fit))))
    plt.plot(X_fit*Y[sub_lvl], curve_val, color=ABCD_theor_subcolors[sub_lvl])
lgd = plt.legend(ABCD_lgd, Y, markerscale=2, ncol=2, labelspacing=0.1,
                 handletextpad=0.05, frameon=False, fontsize=10)
orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                    'Reliability (ICC)', ax)
fig.savefig(os.path.join(img_dir,'FigS35_ABCD_CogFactor_Theoretical_rel.svg'), bbox_inches='tight')

# Fig S36: N and T are not equivalent (BWAS - all scores)

In [None]:
#################################################
# Violin plot comparing same total scan time
#################################################
# ABCD: 2400 total scan time but different N and T
c_vers = 'full'
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir,vers=c_vers)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','tstats',rep_dir,vers=c_vers)
mat = os.path.join(ABCD_img_dir,'tstats_icc_indiv_landscape.mat')
res = scipy.io.loadmat(mat)

# extract points with 2400m of total scan time
full_df = pd.DataFrame()
twom_vals = []
fourm_vals = []
sixm_vals = []
twelm_vals = []
for b in ABCD_rs_log_ind:
    behav = np.mean(res['tstats_landscape'][:,:,:,b],2)
    dtwo = pd.DataFrame(data={'rel': [behav[5,0]]})
    dtwo['Time'] = '2'
    dtwo['Subs'] = '1200subs'
    twom_vals.append(behav[5,0])
    dfour = pd.DataFrame(data={'rel': [behav[2,1]]})
    dfour['Time'] = '4'
    dfour['Subs'] = '600subs'
    fourm_vals.append(behav[2,1])
    dsix = pd.DataFrame(data={'rel': [behav[1,2]]})
    dsix['Time'] = '6'
    dsix['Subs'] = '400subs'
    sixm_vals.append(behav[1,2])
    dtwel = pd.DataFrame(data={'rel': [behav[0,5]]})
    dtwel['Time'] = '12'
    dtwel['Subs'] = '200subs'
    twelm_vals.append(behav[0,5])
    full_df = pd.concat([full_df, dtwo, dfour, dsix, dtwel])

# plot violin plot
fig,ax = plt.subplots(figsize=(4, 3))
vp=sns.violinplot(data=full_df, x="Time", y="rel", palette="Reds_r",orient='v')
orsp.format_scatter_plot('', 'Reliability (ICC)', ax)
vp.set(xticklabels=[])
ax.set_title('ABCD')
fig.savefig(os.path.join(img_dir,'FigS36_ABCD_not1to1_violin.svg'), bbox_inches='tight')

# mean accuracy
print('2m mean:', np.mean(twom_vals))
print('4m mean:', np.mean(fourm_vals))
print('6m mean:', np.mean(sixm_vals))
print('12m mean:', np.mean(twelm_vals))

# stats
print('2m vs 4m:', scipy.stats.ttest_rel(twom_vals, fourm_vals)) 
print('4m vs 6m:', scipy.stats.ttest_rel(fourm_vals, sixm_vals))
print('6m vs 12m:', scipy.stats.ttest_rel(sixm_vals, twelm_vals))

print('2m vs 4m:', orsp.corrected_resample_ttest([a - b for a, b in zip(twom_vals, fourm_vals)], 1/1, 0))
print('4m vs 6m:', orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, sixm_vals)], 1/1, 0))
print('6m vs 12m:', orsp.corrected_resample_ttest([a - b for a, b in zip(sixm_vals, twelm_vals)], 1/1, 0))

In [None]:
#################################################
# Violin plot comparing same total scan time
#################################################
# ABCD: 2400 total scan time but different N and T
c_vers = 'full'
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir,vers=c_vers)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir,vers=c_vers)
mat = os.path.join(HCP_img_dir,'tstats_icc_indiv_landscape.mat')
res = scipy.io.loadmat(mat)

# extract points with 1600m of total scan time
full_df = pd.DataFrame()
twom_vals = []
fourm_vals = []
sixm_vals = []
twelm_vals = []
for b in HCP_rs_log_ind:
    behav = np.mean(res['tstats_landscape'][:,:,:,b],2)
    dtwo = pd.DataFrame(data={'rel': [behav[5,1]]})
    dtwo['Time'] = '4'
    dtwo['Subs'] = '400subs'
    twom_vals.append(behav[5,1])
    dfour = pd.DataFrame(data={'rel': [behav[1,3]]})
    dfour['Time'] = '8'
    dfour['Subs'] = '200subs'
    fourm_vals.append(behav[1,3])
    dsix = pd.DataFrame(data={'rel': [behav[0,4]]})
    dsix['Time'] = '~10'
    dsix['Subs'] = '150subs'
    sixm_vals.append(behav[0,4])
    full_df = pd.concat([full_df, dtwo, dfour, dsix])

# plot violin plot
fig,ax = plt.subplots(figsize=(4, 3))
vp=sns.violinplot(data=full_df, x="Time", y="rel", palette="Blues_r",orient='v')
orsp.format_scatter_plot('', 'Reliability (ICC)', ax)
ax.set_title('HCP')
vp.set(xticklabels=[])
fig.savefig(os.path.join(img_dir,'FigS36_HCP_not1to1_violin.svg'), bbox_inches='tight')

# mean accuracy
print('4m mean:', np.mean(twom_vals))
print('8m mean:', np.mean(fourm_vals))
print('10m mean:', np.mean(sixm_vals))

# stats
print('4m vs 8m:', scipy.stats.ttest_rel(twom_vals, fourm_vals)) 
print('8m vs 10m:', scipy.stats.ttest_rel(fourm_vals, sixm_vals))

print('4m vs 8m:', orsp.corrected_resample_ttest([a - b for a, b in zip(twom_vals, fourm_vals)], 1/1, 0))
print('8m vs 10m:', orsp.corrected_resample_ttest([a - b for a, b in zip(fourm_vals, sixm_vals)], 1/1, 0))

# Fig S37: Theoretical fit for ABCD scores (BWAS)

In [None]:
### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','tstats',rep_dir,)

# initialize cognition scores
behav_ind = ABCD_rs_log_ind
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for b in behav_ind:
    behav = np.flip(np.flip(ABCD_res['tstats_icc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(6,behav,scan_duration,ABCD_subcolors,10,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(ABCD_lgd, Y, markerscale=2, loc='lower right', \
             ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                            'Reliability (ICC)', axs[plot_y][plot_x])
    # Tom's equation fit to full duration
    w = w_r_all[b,-1,:]
    X_fit = np.linspace(2, 20, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] / (w[0] + (1/(Y[sub_lvl]/2)) * (1 - 2*w[1]/(1+(w[2]/X_fit))))
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=ABCD_subcolors[sub_lvl])
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS37.' + str(plot_num) +
                '_ABCD_KRR_full_Tstats_TheoreticalFit_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS37.' + str(plot_num) +
                '_ABCD_KRR_full_Tstats_TheoreticalFit_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1


# Fig S38: Theoretical fit for HCP scores (reliability)

In [None]:
### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','tstats',rep_dir,)

# initialize cognition scores
behav_ind = HCP_rs_log_ind
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for b in behav_ind:
    behav = np.flip(np.flip(HCP_res['tstats_icc_landscape'][:,:,b].T),1)
    orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,29,axs[plot_y][plot_x])
    axs[plot_y][plot_x].legend(HCP_lgd, Y, markerscale=2, loc='lower right', \
             ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
    orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                            'Reliability (ICC)', axs[plot_y][plot_x])
    # Tom's equation fit to full duration
    w = w_r_all[b,-1,:]
    X_fit = np.linspace(2, 58, num=100, dtype=int)
    for sub_lvl in range(0,len(Y)):
        curve_val = w[0] / (w[0] + (1/(Y[sub_lvl]/2)) * (1 - 2*w[1]/(1+(w[2]/X_fit))))
        axs[plot_y][plot_x].plot(Y[sub_lvl]*X_fit, curve_val, color=HCP_subcolors[sub_lvl])
    axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
    axs[plot_y][plot_x].set_title(scores_names[b])

    # move to next plot
    if plot_x == 0:
        plot_x += 1
    else:
        plot_x = 0
        if plot_y != 2:
            plot_y += 1
        else: 
            plot_y = 0
    # move to next figure
    if (behav_count % 6) == 0:
        fig.savefig(os.path.join(img_dir, 'FigS38.' + str(plot_num) +
                '_HCP_KRR_full_Tstats_TheoreticalFit_scatter.svg'), bbox_inches='tight')
        fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
        fig.tight_layout(h_pad=5, w_pad=7)
        plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS38.' + str(plot_num) +
                '_HCP_KRR_full_Tstats_TheoreticalFit_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1


# Fig S39: Fit vs ICC
# Fig S40: Shuffling (Tstats)

In [None]:
#################################################
# Acc against fit (ABCD)
#################################################
c_vers = 'full'
cod_thresh = 0.8
# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','predacc',rep_dir, vers = c_vers)
ABCD_img_dir,rel_ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','tstats',rep_dir, vers = c_vers)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD', 'tstats', rep_dir, vers= c_vers)
# score classifications
bpass = []
bfit = []
cog_ind = np.array([9,10,11,12,13,14,15,16,17,18,31,32,33,34,35,36,37])-1
per_ind = np.array([19,20,21,22,23,24,25,26,27])-1
mh_ind = np.array([1,2,3,4,5,6,7,8,28,29,30])-1
acc_all = np.array([])
rel_all = np.array([])
log_loss = np.array([])
r_loss = np.array([])
log_limit = 5
r_limit = 10
# remove unwanted scores
for b_idx in range(0,37):
    behav_acc = np.flip(np.flip(ABCD_res['acc_landscape'][:,:,b_idx].T),1)
    behav_rel = np.flip(np.flip(rel_ABCD_res['tstats_icc_landscape'][:,:,b_idx].T),1)
    if (np.sum(behav.flatten() < 0) < 10): # remove scores with > 10% negative predictions
        bpass.append(b_idx)
        if loss_log_all[b_idx,limit-3] > cod_thresh:
            bfit.append(b_idx)
    acc_all = np.append(acc_all, behav_acc[8,9])
    rel_all = np.append(rel_all, behav_rel[5,9])
    log_loss = np.append(log_loss, loss_log_all[b_idx,log_limit-3])
    r_loss = np.append(r_loss , loss_r_all[b_idx,r_limit-3])
for i in cog_ind:
    if i not in bpass:
        cog_ind = np.delete(cog_ind, np.where(cog_ind==i))
for i in per_ind:
    if i not in bpass:
        per_ind = np.delete(per_ind, np.where(per_ind==i))
for i in mh_ind:
    if i not in bpass:
        mh_ind = np.delete(mh_ind, np.where(mh_ind==i)) 
# final list of scores for reference
print("Scores < 10% negative:", bpass)
print("Chosen scores:", bfit)
        
# log fit at 10m
print("Log Fit:", scipy.stats.spearmanr(log_loss[bpass],rel_all[bpass]))
fig,ax = plt.subplots(figsize=(4,3))
plt.scatter(rel_all[cog_ind],log_loss[cog_ind], c='orangered')
plt.scatter(rel_all[per_ind],log_loss[per_ind], c='darkslateblue')
plt.scatter(rel_all[mh_ind],log_loss[mh_ind], c='darkgray')
plt.legend(['Cognition', 'Personality', 'Mental Health'], loc="lower right", frameon=False,
            prop={'family' : 'Arial'}, labelspacing=0.1,handletextpad=0.05, fontsize=10)
orsp.format_scatter_plot('Reliability (ICC)','Goodness of fit (COD)',ax, fontsz=10)
ax.set_title('ABCD (Logarithm)')
# plot trend line
x = rel_all[bpass]
x_new = np.linspace(np.min(x), np.max(x), 50)
y_new = f(x_new)
y_new = 1 - np.exp(-5*x_new)
plt.plot(x_new, y_new, color='k',linestyle='dashed')
fig.savefig(os.path.join(img_dir,'FigS39_ABCD_AccvsFit_Log.svg'), bbox_inches='tight')
# Nichols fit at 20m
print("Theoretical Fit:", scipy.stats.spearmanr(r_loss[bpass],rel_all[bpass]))
fig,ax = plt.subplots(figsize=(4,3))
plt.scatter(rel_all[cog_ind],r_loss[cog_ind], c='orangered')
plt.scatter(rel_all[per_ind],r_loss[per_ind], c='darkslateblue')
plt.scatter(rel_all[mh_ind],r_loss[mh_ind], c='darkgray')
plt.legend(['Cognition', 'Personality', 'Mental Health'], loc="lower right", frameon=False,
          prop={'family' : 'Arial'}, labelspacing=0.1,handletextpad=0.05, fontsize=10)
orsp.format_scatter_plot('Reliability (ICC)','Goodness of fit (COD)',ax, fontsz=10)
ax.set_xlim([-0.05,1])
ax.set_ylim([-0.1,1])
ax.set_title('ABCD (Theoretical)')
# plot trend line
x = rel_all[bpass]
x_new = np.linspace(np.min(x), np.max(x), 50)
y_new = f(x_new)
y_new = 1 - np.exp(-5*x_new)
plt.plot(x_new, y_new, color='k',linestyle='dashed')
fig.savefig(os.path.join(img_dir,'FigS39_ABCD_AccvsFit_Theoretical.svg'), bbox_inches='tight')

#################################################
# Improvement to fit after shuffling (ABCD)
#################################################
# load ABCD data
w_r_f,w_pa_f,zk_f,loss_r_f,loss_pa_f,loss_log_f = orsp.load_fits('ABCD','tstats',rep_dir,vers='full')
w_r_r,w_pa_r,zk_r,loss_r_r,loss_pa_r,loss_log_r = orsp.load_fits('ABCD','tstats',rep_dir,vers='random')
full_df = pd.DataFrame()

# t test for selected behaviors
print("Log:", scipy.stats.ttest_rel(loss_log_f[bpass,r_limit-3], loss_log_r[bpass,r_limit-3]))
df = pd.DataFrame(data={'COD': loss_log_f[bpass,r_limit-3]})
df['Domain'] = 'Log Fit'
df['Class'] = 'Original'
dr = pd.DataFrame(data={'COD': loss_log_r[bpass,r_limit-3]})
dr['Domain'] = 'Log Fit'
dr['Class'] = 'Randomized'
full_df = pd.concat([full_df, df, dr])
print("Theoretical:", scipy.stats.ttest_rel(loss_r_f[bpass,r_limit-3], loss_r_r[bpass,r_limit-3]))
df = pd.DataFrame(data={'COD': loss_r_f[bpass,r_limit-3]})
df['Domain'] = 'Theoretical Fit'
df['Class'] = 'Original'
dr = pd.DataFrame(data={'COD': loss_r_r[bpass,r_limit-3]})
dr['Domain'] = 'Theoretical Fit'
dr['Class'] = 'Randomized'
full_df = pd.concat([full_df, df, dr])
# plot box plot
fig,ax = plt.subplots(figsize=(4, 3))
sns.boxplot(data=full_df, x="Domain", y="COD", hue="Class",palette="Reds",orient='v')
plt.legend(frameon=False, fontsize=10, bbox_to_anchor=(0.68,0.23))
orsp.format_scatter_plot('','Goodness of fit (COD)',ax, fontsz=10)
ax.set_title('ABCD')
ax.set_ylim([-0.1,1])
fig.savefig(os.path.join(img_dir,'FigS40_ABCD_origvsrandom.svg'), bbox_inches='tight')

In [None]:
#################################################
# Acc against fit (HCP)
#################################################
c_vers = 'full'
cod_thresh = 0.8
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','predacc',rep_dir, vers=c_vers)
HCP_img_dir,rel_HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','tstats',rep_dir, vers=c_vers)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP', 'tstats', rep_dir, vers=c_vers)
# score classifications
bpass = []
bfit = []
cog_ind = np.array([1,2,3,4,5,6,7,9,10,11,12,13,14,25,26,27,28,29,30,59])-1
per_ind = np.array([8,31,32,33,34,35])-1
emo_ind = np.array([24,36,37,38,39,40,41,42,43,44,45,46,47])-1
phy_ind = np.array([15,16,17,18,19,20,21,22,23])-1
wb_ind = np.array([48,49,50,51,52,53,54,55,56,57,58])-1
acc_all = np.array([])
log_loss = np.array([])
r_loss = np.array([])
log_limit = 5
r_limit = 29
# remove unwanted scores
for b_idx in range(0,60):
    behav_acc = np.flip(np.flip(HCP_res['acc_landscape'][:,:,b_idx].T),1)
    behav_rel = np.flip(np.flip(rel_HCP_res['tstats_icc_landscape'][:,:,b_idx].T),1)
    if (np.sum(behav.flatten() < 0) < 18) and b_idx != 58: # remove scores with > 10% negative predictions
        bpass.append(b_idx)
        if loss_log_all[b_idx,limit-3] > cod_thresh:
            bfit.append(b_idx)
    acc_all = np.append(acc_all, behav_acc[5,28])
    rel_all = np.append(rel_all, behav_rel[5,28])
    log_loss = np.append(log_loss, loss_log_all[b_idx,log_limit-3])
    r_loss = np.append(r_loss, loss_r_all[b_idx,r_limit-3])
for i in cog_ind:
    if i not in bpass:
        cog_ind = np.delete(cog_ind, np.where(cog_ind==i))
for i in per_ind:
    if i not in bpass:
        per_ind = np.delete(per_ind, np.where(per_ind==i))
for i in emo_ind:
    if i not in bpass:
        emo_ind = np.delete(emo_ind, np.where(emo_ind==i))
for i in phy_ind:
    if i not in bpass:
        phy_ind = np.delete(phy_ind, np.where(phy_ind==i))
for i in wb_ind:
    if i not in bpass:
        wb_ind = np.delete(wb_ind, np.where(wb_ind==i))  
# final list of scores for reference
print("Scores < 10% negative:", bpass)
print("Chosen scores:", bfit)
        
# log fit at 10m
print("Log Fit:", scipy.stats.spearmanr(log_loss[bpass],rel_all[bpass]))
fig,ax = plt.subplots(figsize=(4,3))
plt.scatter(rel_all[cog_ind],log_loss[cog_ind], c='orangered')
plt.scatter(rel_all[per_ind],log_loss[per_ind], c='darkslateblue')
plt.scatter(rel_all[emo_ind],log_loss[emo_ind], c='forestgreen')
plt.scatter(rel_all[phy_ind],log_loss[phy_ind], c='goldenrod')
plt.scatter(rel_all[wb_ind],log_loss[wb_ind], c='deeppink')
plt.legend(['Cognition','Personality','Emotion', 'Physical','Well-being'], prop={'family' : 'Arial'},
           loc="best", labelspacing=0.1,handletextpad=0.05, frameon=False, fontsize=10)
orsp.format_scatter_plot('Reliability (ICC)','Goodness of fit (COD)',ax, fontsz=10)
# plot trend line
x = rel_all[bpass]
x_new = np.linspace(np.min(x), np.max(x), 50)
y_new = f(x_new)
y_new = 1 - np.exp(-5*x_new)
plt.plot(x_new, y_new, color='k',linestyle='dashed')
ax.set_xlim([0,1])
ax.set_ylim([0,1])
ax.set_title('HCP (Logarithm)')
fig.savefig(os.path.join(img_dir,'FigS39_HCP_AccvsFit_Log.svg'), bbox_inches='tight')

# Nichols fit at 58m
print("Theoretical Fit:", scipy.stats.spearmanr(r_loss[bpass],rel_all[bpass]))
fig,ax = plt.subplots(figsize=(4,3))
plt.scatter(rel_all[cog_ind],r_loss[cog_ind], c='orangered')
plt.scatter(rel_all[per_ind],r_loss[per_ind], c='darkslateblue')
plt.scatter(rel_all[emo_ind],r_loss[emo_ind], c='forestgreen')
plt.scatter(rel_all[phy_ind],r_loss[phy_ind], c='goldenrod')
plt.scatter(rel_all[wb_ind],r_loss[wb_ind], c='deeppink')
plt.legend(['Cognition','Personality','Emotion', 'Physical','Well-being'], prop={'family' : 'Arial'},
           loc="lower right", labelspacing=0.1,handletextpad=0.05, frameon=False, fontsize=10)
orsp.format_scatter_plot('Reliability (ICC)','Goodness of fit (COD)',ax, fontsz=10)
# plot trend line
x = rel_all[bpass]
x_new = np.linspace(np.min(x), np.max(x), 50)
y_new = f(x_new)
y_new = 1 - np.exp(-5*x_new)
plt.plot(x_new, y_new, color='k',linestyle='dashed')
ax.set_xlim([0,1])
ax.set_ylim([0,1])
ax.set_title('HCP (Theoretical)')
fig.savefig(os.path.join(img_dir,'FigS39_HCP_AccvsFit_Theoretical.svg'), bbox_inches='tight')

#################################################
# Improvement to fit after shuffling (HCP)
#################################################
# load HCP data
w_r_f,w_pa_f,zk_f,loss_r_f,loss_pa_f,loss_log_f = orsp.load_fits('HCP','tstats',rep_dir,vers='full')
w_r_r,w_pa_r,zk_r,loss_r_r,loss_pa_r,loss_log_r = orsp.load_fits('HCP','tstats',rep_dir,vers='random')
full_df = pd.DataFrame()

# t test for selected behaviors
# use all 58 min for comparison
print("Log:", scipy.stats.ttest_rel(loss_log_f[bpass,r_limit-3], loss_log_r[bpass,r_limit-3]))
df = pd.DataFrame(data={'COD': loss_log_f[bpass,r_limit-3]})
df['Domain'] = 'Log Fit'
df['Class'] = 'Original'
dr = pd.DataFrame(data={'COD': loss_log_r[bpass,r_limit-3]})
dr['Domain'] = 'Log Fit'
dr['Class'] = 'Randomized'
full_df = pd.concat([full_df, df, dr])

print("Theoretical:", scipy.stats.ttest_rel(loss_pa_f[bpass,r_limit-3], loss_pa_r[bpass,r_limit-3]))
df = pd.DataFrame(data={'COD': loss_pa_f[bpass,r_limit-3]})
df['Domain'] = 'Theoretical Fit'
df['Class'] = 'Original'
dr = pd.DataFrame(data={'COD': loss_pa_r[bpass,r_limit-3]})
dr['Domain'] = 'Theoretical Fit'
dr['Class'] = 'Randomized'
full_df = pd.concat([full_df, df, dr])
# plot box plot
fig,ax = plt.subplots(figsize=(4, 3))
sns.boxplot(data=full_df, x="Domain", y="COD", hue="Class",palette="Blues",orient='v')
plt.legend(frameon=False, bbox_to_anchor=[0.82, 0.1], fontsize=10)
orsp.format_scatter_plot('','Goodness of fit (COD)',ax,fontsz=10)
ax.set_title('HCP')
fig.savefig(os.path.join(img_dir,'FigS40_HCP_origvsrandom.svg'), bbox_inches='tight')

In [None]:
#lasthere

# Fig S41: Haufe Contour plots

In [None]:
#################################################
# plot contour plots
#################################################
fig,axs = plt.subplots(1,2,figsize=(8.5,4.5))
fig.tight_layout(pad=7)

### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
# Cognition
con_lines = [0.3, 0.4, 0.45]
manual_locations = [(6.5,1.5),(10,3),(25,5)]
behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,59].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, HCP_extent, 
                      fig, axs[1], Yax_lbl='ICC', Ax_Ttl='HCP')

### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','Haufe',rep_dir)
# Cognition
con_lines = [0.4, 0.65, 0.75]
manual_locations = [(1,0.3),(4,2),(8,6)]
behav = np.flip(np.flip(ABCD_res['fi_icc_landscape'][:,:,36].T),1)
orsp.plot_contour(behav, X, Y, con_lines, manual_locations, ABCD_extent, 
                      fig, axs[0], Yax_lbl='ICC', Ax_Ttl='ABCD')

fig.savefig(os.path.join(img_dir, 'FigS41_' +
                    'KRR_full_rel_Haufe_cog_contour.svg'), bbox_inches='tight')

# Fig S42: Correlation between common points in ABCD and HCP contour plots (Haufe)

In [None]:
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'Haufe', rep_dir)
behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,59].T),1)
hcp_behav = behav[[1,5],:10].ravel()

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'Haufe', rep_dir)
behav = np.flip(np.flip(ABCD_res['fi_icc_landscape'][:,:,36].T),1)
abcd_behav = behav[:2,:].ravel()

fig,ax = plt.subplots(figsize=(6, 6))
plt.scatter(abcd_behav,hcp_behav)
res = scipy.stats.linregress(abcd_behav,hcp_behav)
xy_line = np.linspace(0.05,0.65,100)
plt.plot(xy_line , res.intercept + res.slope*xy_line , 'k', linestyle='--')
orsp.format_scatter_plot('ABCD Reliability (ICC)',
                    'HCP Reliability (ICC)', ax)
corr_val = np.corrcoef(abcd_behav,hcp_behav)
ax.text(0.8,0.1,'r = ' + str(np.round(corr_val[0][1],2)), transform=ax.transAxes, size=12)
plt.yticks(np.arange(0.1, 0.65, step=0.1)) 
plt.xticks(np.arange(0.1, 0.65, step=0.1)) 

fig.savefig(os.path.join(img_dir, 'FigS42_ABCD_HCP_KRR_commonpts_haufe_correlation.svg'), bbox_inches='tight')

# Fig S43: Haufe scatter plots

In [None]:
#################################################
# plot scatter plot against total scan time
#################################################
limit=5
# load HCP data
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP', 'Haufe', rep_dir)
behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,59].T),1)
# plot scatter plot
fig,ax = plt.subplots(figsize=(4, 3))
#orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,58,ax)
#orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,limit,ax,outline='Y')
for n_subs in range(0,6)[::-1]:
    beh = behav[n_subs, :limit]
    curr_scan = scan_duration[n_subs, :limit]
    sns.scatterplot(x=curr_scan.flatten(), y=beh.flatten(), ax=ax,
                            color=HCP_subcolors[n_subs],s=40)
for n_subs in range(0,6):
    beh = behav[n_subs, limit:]
    curr_scan = scan_duration[n_subs, limit:]
    sns.scatterplot(x=curr_scan.flatten(), y=beh.flatten(), ax=ax,
                            color=HCP_subcolors[n_subs], edgecolor="k", linewidth=0.75, s=30)
orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                    'Reliability (ICC)', ax)
lgd = plt.legend(HCP_lgd, Y, markerscale=2, \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
fig.savefig(os.path.join(img_dir, 'FigS43_HCP_Haufe_icc.svg'), bbox_inches='tight')

# load ABCD data
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD', 'Haufe', rep_dir)
behav = np.flip(np.flip(ABCD_res['fi_icc_landscape'][:,:,36].T),1)
# Plot scatter
fig,ax = plt.subplots(figsize=(4, 3))
#orsp.plot_scatter(6,behav,scan_duration,ABCD_subcolors,10,ax)
#orsp.plot_scatter(6,behav,scan_duration,ABCD_subcolors,limit,ax,outline='Y')
for n_subs in range(0,6)[::-1]:
    beh = behav[n_subs, :limit]
    curr_scan = scan_duration[n_subs, :limit]
    sns.scatterplot(x=curr_scan.flatten(), y=beh.flatten(), ax=ax,
                            color=ABCD_subcolors[n_subs],s=40)
for n_subs in range(0,6):
    beh = behav[n_subs, limit:]
    curr_scan = scan_duration[n_subs, limit:]
    sns.scatterplot(x=curr_scan.flatten(), y=beh.flatten(), ax=ax,
                            color=ABCD_subcolors[n_subs], edgecolor="k", linewidth=0.75, s=30)
orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                    'Reliability (ICC)', ax)
lgd = plt.legend(ABCD_lgd, Y, markerscale=2, \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
fig.savefig(os.path.join(img_dir, 'FigS43_ABCD_Haufe_icc.svg'), bbox_inches='tight')

In [None]:
#################################################
# plot individual scores
#################################################
fig,ax = plt.subplots(figsize=(8.5, 3.5))
all_scores = []
legend_handle = []
lgd_handles = []
HCP_cog_ind = [1,2,3,4,5,6,8,10,25,26,29,59]
HCP_emo_ind = [23]
HCP_pers_ind = [7,31,32,34]
HCP_phy_ind = [14]
HCP_wb_ind = [47]
ABCD_cog_ind = [8,10,11,13,14,15,16,17,30,31,32,33,36]
ABCD_mh_ind = [5,29,6,3]
    
### cognition
## ABCD
limit = 5
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
for n in ABCD_cog_ind:
    behav = np.flip(np.flip(ABCD_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(),color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_cog_ind[:-1]], ['ABCD Cog. Factor']))

## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
for n in HCP_cog_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_cog_ind[:-1]], ['HCP Cog. Factor']))

### mental health
limit = 5
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:lightgrey,darkgrey", n_colors=4)
for n in ABCD_mh_ind:
    behav = np.flip(np.flip(ABCD_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(),color=custom_colors[n_c], zorder=-1)
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_mh_ind]))

### personality
## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:powderblue,darkslateblue", n_colors=4)
for n in HCP_pers_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_pers_ind]))


### physical
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:gold,gold", n_colors=2)
for n in HCP_phy_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_phy_ind]))

### emotion
## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:forestgreen,forestgreen", n_colors=2)
for n in HCP_emo_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_emo_ind]))

### well being
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:magenta,magenta", n_colors=2)
for n in HCP_wb_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=curr_scan.flatten(), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_wb_ind]))

# plot fitted curve
orsp.plot_curve(200, 15000)

# figure parameters
ax.set_ylim([6, 16])
orsp.format_scatter_plot('Total scan duration (# participants x scan time per participant)',
                    'Norm. reliability', ax)

fig.savefig(os.path.join(img_dir, 'FigS43_ScanTime_AllBehavCurves_Haufe_10m.svg'), bbox_inches='tight')

In [None]:
#################################################
# plot individual scores
#################################################
fig,ax = plt.subplots(figsize=(8.5, 2.5))
all_scores = []
legend_handle = []
lgd_handles = []
HCP_cog_ind = [1,2,3,4,5,6,8,10,25,26,29,59]
HCP_emo_ind = [23]
HCP_pers_ind = [7,31,32,34]
HCP_phy_ind = [14]
HCP_wb_ind = [47]
ABCD_cog_ind = [8,10,11,13,14,15,16,17,30,31,32,33,36]
ABCD_mh_ind = [5,29,6,3]
    
### cognition
## ABCD
limit = 5
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:bisque,orangered", n_colors=13)
for n in ABCD_cog_ind:
    behav = np.flip(np.flip(ABCD_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(),color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_cog_ind[:-1]], ['Cog Factor (A)']))

## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
for n in HCP_cog_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_cog_ind[:-1]], ['Cog Factor (H)']))

### mental health
limit = 5
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('ABCD','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:lightgrey,darkgrey", n_colors=4)
for n in ABCD_mh_ind:
    behav = np.flip(np.flip(ABCD_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(),color=custom_colors[n_c], zorder=-1)
    n_c += 1
all_scores = np.concatenate((all_scores, ABCD_scores_short[ABCD_mh_ind]))

### personality
## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:powderblue,darkslateblue", n_colors=4)
for n in HCP_pers_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_pers_ind]))


### physical
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:gold,gold", n_colors=2)
for n in HCP_phy_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_phy_ind]))

### emotion
## HCP
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:forestgreen,forestgreen", n_colors=2)
for n in HCP_emo_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_emo_ind]))

### well being
limit = 5
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)
w_r_all,w_pa_all,zk_all,loss_r_all,loss_pa_all,loss_log_all = orsp.load_fits('HCP','Haufe',rep_dir)
n_c = 0
custom_colors = sns.color_palette("blend:magenta,magenta", n_colors=2)
for n in HCP_wb_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    curr_scan = scan_duration[:,:limit]
    behav = behav[:,:limit]
    z = zk_all[n,limit-3,0]
    k = zk_all[n,limit-3,1]
    norm_acc = (behav - k) / z 
    sns.scatterplot(x=np.log(curr_scan.flatten())/ np.log(2), y=norm_acc.flatten(), color=custom_colors[n_c])
    n_c += 1
all_scores = np.concatenate((all_scores, HCP_scores_short[HCP_wb_ind]))

## Add legend
# Add the custom legend handle to the legend
lgd = plt.legend(all_scores, handletextpad=0.01, loc='lower center', bbox_to_anchor=[0.48, -0.95],
            fontsize=9, ncol=6, columnspacing=0.5, frameon=False)

# plot log curve
X_fit = np.linspace(200, 15000, num=100, dtype=int)
curve_val = np.log(X_fit) / np.log(2)
plt.plot(np.log(X_fit)/ np.log(2), curve_val, color='k')

# figure parameters
ax.set_ylim([6, 16])
#ax.set_xlim([8, 15.5])
orsp.format_scatter_plot('log\N{SUBSCRIPT TWO}(Total Scan Duration)',
                    'Norm. reliability', ax)

fig.savefig(os.path.join(img_dir, 'FigS43_ScanTime_Log_AllBehavCurves_Haufe_10m.svg'), bbox_inches='tight')

# Fig S44: Haufe scatter plot for ABCD

In [None]:
### settings for ABCD
ABCD_img_dir,ABCD_res,X,Y,ABCD_extent,scan_duration = orsp.load_data('ABCD','Haufe',rep_dir)

# initialize cognition scores
behav_ind = ABCD_rs_log_ind
scores_names = np.append(ABCD_scores, ['Cognition Factor Score',
                         'Mental Health Factor Score', 'Personality Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for n in behav_ind:
    behav = np.flip(np.flip(ABCD_res['fi_icc_landscape'][:,:,n].T),1)
    for limit in [10,5]:
        orsp.plot_scatter(6,behav,scan_duration,ABCD_subcolors,limit,axs[plot_y][plot_x])
        axs[plot_y][plot_x].legend(ABCD_lgd, Y, markerscale=2, loc='lower right', \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
        orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                            'Reliability (ICC)', axs[plot_y][plot_x])
        axs[plot_y][plot_x].set_xlim(0,26000)
        axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
        axs[plot_y][plot_x].set_title(scores_names[n] + ' (' + str(limit*2) + 'mins)')
        
        # move to next plot
        if plot_x == 0:
            plot_x += 1
        else:
            plot_x = 0
            if plot_y != 2:
                plot_y += 1
            else: 
                plot_y = 0
        # move to next figure
        if (behav_count % 3) == 0 and limit == 5:
            fig.savefig(os.path.join(img_dir, 'FigS44.' + str(plot_num) +
                    '_ABCD_KRR_full_Haufe_scatter.svg'), bbox_inches='tight')
            fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
            fig.tight_layout(h_pad=5, w_pad=7)
            plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS44.' + str(plot_num) +
                '_ABCD_KRR_full_Haufe_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1


# Fig S45: Haufe scatter plot for HCP

In [None]:
### settings for HCP
HCP_img_dir,HCP_res,X,Y,HCP_extent,scan_duration = orsp.load_data('HCP','Haufe',rep_dir)

# initialize cognition scores
behav_ind = HCP_rs_log_ind
scores_names = np.append(HCP_scores, ['Dissatisfaction Factor Score',
                         'Cognition Factor Score', 'Emotion Factor Score'])

# plot scatter plots into 6 subplots
fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
fig.tight_layout(h_pad=5, w_pad=7)
behav_count = 1
plot_num = 1
plot_x = 0
plot_y = 0
for n in behav_ind:
    behav = np.flip(np.flip(HCP_res['fi_icc_landscape'][:,:,n].T),1)
    for limit in [29,5]:
        orsp.plot_scatter(6,behav,scan_duration,HCP_subcolors,limit,axs[plot_y][plot_x])
        axs[plot_y][plot_x].legend(HCP_lgd, Y, markerscale=2, loc='lower right', \
                 ncol=2, labelspacing=0.1, handletextpad=0.05, frameon=False, fontsize=10)
        orsp.format_scatter_plot('Total scan duration (# participants \nx scan time per participant)',
                            'Reliability (ICC)', axs[plot_y][plot_x])
        axs[plot_y][plot_x].set_xlim(0,26000)
        axs[plot_y][plot_x].set_ylim(np.min(behav)-0.05,np.max(behav)+0.05)
        axs[plot_y][plot_x].set_title(scores_names[n] + ' (' + str(limit*2) + 'mins)')
        
        # move to next plot
        if plot_x == 0:
            plot_x += 1
        else:
            plot_x = 0
            if plot_y != 2:
                plot_y += 1
            else: 
                plot_y = 0
        # move to next figure
        if (behav_count % 3) == 0 and limit == 5:
            fig.savefig(os.path.join(img_dir, 'FigS45.' + str(plot_num) +
                    '_HCP_KRR_full_Haufe_scatter.svg'), bbox_inches='tight')
            fig,axs = plt.subplots(3,2,figsize=(7.5,9.5))
            fig.tight_layout(h_pad=5, w_pad=7)
            plot_num += 1
    # continue behavior count
    # turn off remaining subplots if last behavior
    if behav_count == len(behav_ind):
        while plot_y != 3:
            axs[plot_y][plot_x].axis('off')
            if plot_x == 0:
                plot_x += 1
            else:
                plot_x = 0
                if plot_y != 3:
                    plot_y += 1
        fig.savefig(os.path.join(img_dir, 'FigS45.' + str(plot_num) +
                    '_HCP_KRR_full_Haufe_scatter.svg'), bbox_inches='tight')
    else:
        behav_count += 1
