In [2]:
import pandas as pd
import numpy as np
import time
from scipy.stats import kurtosis, skew

In [6]:
X_real = pd.DataFrame()

contrasts = ['depression_activation', 'depression_all', 'depression_cognition', 'depression_deactivation', 'depression_emotion',
             'empathy_ae', 'empathy_ce', 'empathy_emotion', 'empathy_pain',
             'pain_henn', 'pain_kogler', 'taskcontrol_all',
             'stress_all', 'stress_physical', 'stress_social',
             'sus_attention', 'bvFTD_all', 'insomnia_all', 'narcolepsy_all', 'LLD_all', 'time_all']

ale_cut = [0.022936608121560502, 0.028137361577983664, 0.01883347150349725, 0.021977028117639352, 0.025307904045890048, 0.020011614261757408, 0.022860821319980077, 0.022988287171221033, 0.019885988643657725, 0.03368984139853556, 0.036382509156447404, 0.05426058568320616, 0.03946194613078291, 0.03174504417513653, 0.028018379802801373, 0.03206712393009183, 0.031677161712782, 0.023109154330954285, 0.020118309329395012, 0.021453577415179895, 0.035100005067251513]
cluster_cut = [92.0, 100.0, 95.0, 95.0, 96.0, 86.0, 91.0, 89.0, 91.0, 91.0, 110.0, 104.0, 110.0, 113.0, 88.0, 102.0, 86.0, 81.0, 87.0, 87.0, 104.0]
tfce_cut = [302.31045771756897, 325.42898401696885, 298.34826815388453, 304.2685918496436, 312.08801857043676, 285.1721738933155, 298.4163487148547, 296.93596828490035, 288.53390448324154, 316.9848539336057, 359.55297178553224, 364.95767815266026, 361.3121558875412, 357.23877623580375, 304.0434760678481, 337.97621273805265, 315.34192966880386, 278.5675156480203, 287.65243817711047, 288.2474752690594, 345.36578655943765]

X_real['contrasts'] = contrasts
X_real['v_cutoff'] = ale_cut
X_real['c_cutoff'] = cluster_cut
X_real['t_cutoff'] = tfce_cut

X_tmp = np.empty((len(contrasts),26))
for idx, contrast in enumerate(contrasts):
    dataset = pd.read_csv(f'input/validation_datasets/{contrast}.csv')
    dataset = dataset.drop('Unnamed: 0', axis=1)

    nsub_real = dataset.Subjects.values
    foci = pd.read_pickle(f'input/validation_datasets/{contrast}_coords.pickle')
    nfoci_real = []
    for arr in foci.values:
        nfoci_tmp = arr.shape[1]
        if nfoci_tmp > 100:
            print(contrast, nfoci_tmp)
            nfoci_tmp = 100
        nfoci_real.append(nfoci_tmp)

    nstudies_real = dataset.shape[0]
    nsub_real_total = np.sum(nsub_real)
    nsub_real_mean = np.mean(nsub_real)
    nsub_real_median = np.median(nsub_real)
    nsub_real_std = np.std(nsub_real)
    nsub_real_max = np.max(nsub_real)
    nsub_real_min = np.min(nsub_real)
    nsub_real_skew = skew(nsub_real)
    nsub_real_kurtosis = kurtosis(nsub_real)
    
    nfoci_real_total = np.sum(nfoci_real)
    nfoci_real_mean = np.mean(nfoci_real)
    nfoci_real_median = np.median(nfoci_real)
    nfoci_real_std = np.std(nfoci_real)
    nfoci_real_max = np.max(nfoci_real)
    nfoci_real_min = np.min(nfoci_real)
    nfoci_real_skew = skew(nfoci_real)
    nfoci_real_kurtosis = kurtosis(nfoci_real)

    ratio_real_mean = np.mean(nfoci_real / nsub_real)
    ratio_real_std = np.std(nfoci_real / nsub_real)
    ratio_real_max = np.max(nfoci_real / nsub_real)
    ratio_real_min = np.min(nfoci_real / nsub_real)
    
    nstudies_foci_ratio_real = nfoci_real_total / nstudies_real
    
    hi_foci_real = 0
    mi_foci_real = 0
    li_foci_real = 0
    vi_foci_real = 0
    
    for i in range(nstudies_real):
        if nsub_real[i] > 20:
            hi_foci_real += nfoci_real[i]
        if (nsub_real[i] < 20) and (nsub_real[i] > 15):
            mi_foci_real += nfoci_real[i]
        if (nsub_real[i] < 15) and (nsub_real[i] > 10):
            li_foci_real += nfoci_real[i]
        if nsub_real[i] < 10:
            vi_foci_real += nfoci_real[i]


    X_tmp[idx] = np.c_[nstudies_real,
                   nsub_real_total, nsub_real_mean, nsub_real_median, nsub_real_std, nsub_real_max, nsub_real_min, nsub_real_skew, nsub_real_kurtosis,
                   nfoci_real_total, nfoci_real_mean, nfoci_real_median, nfoci_real_std, nfoci_real_max, nfoci_real_min, nfoci_real_skew, nfoci_real_kurtosis,
                   ratio_real_mean, ratio_real_std, ratio_real_max, ratio_real_min, nstudies_foci_ratio_real,
                   hi_foci_real, mi_foci_real, li_foci_real, vi_foci_real]

X_real['n_exp'] = X_tmp[:,0]
X_real['n_sub_total'] = X_tmp[:,1]
X_real['n_sub_mean'] = X_tmp[:,2]
X_real['n_sub_media'] = X_tmp[:,3]
X_real['n_sub_std'] = X_tmp[:,4]
X_real['n_sub_max'] = X_tmp[:,5]
X_real['n_sub_min'] = X_tmp[:,6]
X_real['n_sub_skew'] = X_tmp[:,7]
X_real['n_sub_kurtosis'] = X_tmp[:,8]
X_real['n_foci_total'] = X_tmp[:,9]
X_real['n_foci_mean'] = X_tmp[:,10]
X_real['n_foci_median'] = X_tmp[:,11]
X_real['n_foci_std'] = X_tmp[:,12]
X_real['n_foci_max'] = X_tmp[:,13]
X_real['n_foci_min'] = X_tmp[:,14]
X_real['n_foci_skew'] = X_tmp[:,15]
X_real['n_foci_kurtosis'] = X_tmp[:,16]
X_real['foci_sub_ratio_mean'] = X_tmp[:,17]
X_real['foci_sub_ratio_std'] = X_tmp[:,18]
X_real['foci_sub_ratio_max'] = X_tmp[:,19]
X_real['foci_sub_ratio_min'] = X_tmp[:,20]
X_real['foci_exp_ratio'] = X_tmp[:,21]
X_real['hi_foci'] = X_tmp[:,22]
X_real['mi_foci'] = X_tmp[:,23]
X_real['li_foci'] = X_tmp[:,24]
X_real['vli_foci'] = X_tmp[:,25]


X_real.to_csv('output/X_real.csv', index=False)

taskcontrol_all 104
bvFTD_all 175
bvFTD_all 634
