In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import sys
import argparse
import h5py
from scipy.stats import t as student_t
from statsmodels.stats import multitest as mt
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [2]:
# IDP_df = pd.read_pickle('male_IDP_prototype.pkl')
# vars_df = pd.read_pickle('male_vars_prototype.pkl')

IDP_df = pd.read_pickle('male_IDP_testA.pkl')
vars_df = pd.read_pickle('male_vars_testA.pkl')

IDP_names = np.loadtxt("IDP_names.txt", dtype=str, delimiter='\n')
IDP_categories = np.loadtxt("IDP_categories.txt", dtype=str, delimiter='\n')
with open('varsHeader.txt') as f:
    varsHeader = f.readlines()
    varsHeader = [l.strip('\n\r') for l in varsHeader]
    varsHeader = np.array(varsHeader)
vars_categories = np.loadtxt("vars_categories.txt", dtype=str, delimiter='\n')

In [3]:
vars_df.head()

Unnamed: 0,modality,bonf,fdr,dataframe
0,T1_nonlinear,5.444014,5.532281,idx ...
1,T1_linear,5.444014,4.889133,idx ...
2,jacobian,5.444014,,idx ...
3,vbm,5.444014,5.139161,idx ...
4,T2_nonlinear,5.444014,4.259926,idx ...


In [4]:
vars_df.iloc[0].dataframe.head()

Unnamed: 0,idx,names,Categories,pearson_r,t_test_statistic,p_values_corrected,p_values,abs_pearson_r,log_p_values,log_p_values_corrected
0,0,Ethnic background (0.0),Ethnic Background,-0.030425,1.418935,1.0,0.156061,0.030425,0.806705,4.8216370000000004e-17
1,1,Ethnic background (1.0),Ethnic Background,-0.060353,1.156738,1.0,0.248134,0.060353,0.605314,4.8216370000000004e-17
2,2,Ethnic background (2.0),Ethnic Background,-0.005362,0.139831,1.0,0.888835,0.005362,0.051179,4.8216370000000004e-17
3,3,Genotype measurement batch (0.0),Genetic Markers,0.006193,0.286155,1.0,0.774787,0.006193,0.110818,4.8216370000000004e-17
4,4,Heterozygosity (0.0),Genetic Markers,-0.016462,0.760739,1.0,0.446897,0.016462,0.349792,4.8216370000000004e-17


In [5]:
vars_over_bThr_list = []

for idx in range(len(vars_df)):
    df = vars_df.iloc[idx].dataframe
    bThr = vars_df.iloc[idx].bonf
    vars_over_bThr = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False).names.to_list()
    for var in vars_over_bThr:
        vars_over_bThr_list.append(var)

In [6]:
print("Number of unique vars across all modalities: ", len(set(vars_over_bThr_list)))

Number of unique vars across all modalities:  51


In [7]:
unique_vars_over_bTHr = list(set(vars_over_bThr_list))

In [8]:
unique_vars_over_bThr_categories = []

for var in unique_vars_over_bTHr:
    idx = list(varsHeader).index(var)
    category = vars_categories[idx]
    unique_vars_over_bThr_categories.append(category)

In [9]:
sorted(unique_vars_over_bTHr)

['Age first had sexual intercourse (0.0)',
 'Age first had sexual intercourse (2.0)',
 'Cardiac index during PWA (2.0)',
 'Cardiac index during PWA (2.1)',
 'Cardiac output during PWA (2.0)',
 'Cardiac output during PWA (2.1)',
 'Diabetes diagnosed by doctor (2.0)',
 'Diagnoses - ICD10 (G250 - G25.0 Essential tremor)',
 'Diagnoses - ICD10 (I10 - I10 Essential (primary) hypertension)',
 'Diagnoses - ICD10 (K624 - K62.4 Stenosis of anus and rectum)',
 'Diagnoses - ICD10 (M232 - M23.2 Derangement of meniscus due to old tear or injury)',
 'Diagnoses - ICD10 (Z864 - Z86.4 Personal history of psychoactive substance abuse)',
 'Diagnoses - secondary ICD10 (G250 - G25.0 Essential tremor)',
 'Diagnoses - secondary ICD10 (I10 - I10 Essential (primary) hypertension)',
 'Diagnoses - secondary ICD10 (K624 - K62.4 Stenosis of anus and rectum)',
 'Diagnoses - secondary ICD10 (Z864 - Z86.4 Personal history of psychoactive substance abuse)',
 'Duration to entering value (0.8)',
 'Femur neck BMD (bone mi

In [10]:
modalities = []
pearson_rs_list = []
for var in unique_vars_over_bTHr:
    modalities_with_var = []
    pearsonRs_with_var = []
    for idx in range(len(vars_df)):
        df = vars_df.iloc[idx].dataframe
        modality = vars_df.iloc[idx].modality
        bThr = vars_df.iloc[idx].bonf
        df = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False)
        vars_over_bThr = df.names.to_list()
        pearson_r_over_bThr = df.pearson_r.to_list()
        if var in vars_over_bThr:
            modalities_with_var.append(modality)
            pearsonRs_with_var.append(pearson_r_over_bThr[vars_over_bThr.index(var)])
    modalities.append(modalities_with_var)
    pearson_rs_list.append(pearsonRs_with_var)

In [11]:
len(modalities)

51

In [12]:
df_vars_over_bThr = pd.DataFrame.from_dict({
    'names': unique_vars_over_bTHr,
    'Categories': unique_vars_over_bThr_categories,
    'modalities': modalities,
    'pearson_r': pearson_rs_list
})

In [13]:
df_vars_over_bThr.head()

Unnamed: 0,names,Categories,modalities,pearson_r
0,"Medication for cholesterol, blood pressure or ...",Medical History,"[tracts, tbss_MO]","[0.10551107466624031, 0.11403668751570652]"
1,Diagnoses - secondary ICD10 (Z864 - Z86.4 Pers...,Medical History,[swi],[0.10904319839901166]
2,Femur neck BMD (bone mineral density) T-score ...,Skeletal Measurements,[rsfmri_24],[-0.10790483226383428]
3,Head BMD (bone mineral density) (2.0),Skeletal Measurements,"[rsfmri_13, rsfmri_20]","[-0.10706423157832252, -0.10436752527827277]"
4,Trunk BMC (bone mineral content) (2.0),Skeletal Measurements,[rsfmri_15],[-0.10690666162181486]


In [14]:
set(df_vars_over_bThr.Categories.to_list())

{'Cardiac & Circulartory Measurements',
 'Cognitive Tests',
 'Hearing Test',
 'Lifestyle',
 'Medical History',
 'Physical Measurements',
 'Skeletal Measurements'}

In [15]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        print('Var: {}'.format(name))
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        print('   Modalities:')
        for i, mod in enumerate(mods[0]):
            print('   -> {} ({:.3f})'.format(mod, prs[0][i]))
        
    print('\n')

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
   Modalities:
   -> T2_lesions (0.111)
   -> tbss_L1_s (0.119)
   -> tbss_L2 (0.110)
   -> tbss_MD (0.109)
Var: Cardiac index during PWA (2.1)
   Modalities:
   -> tbss_L2 (0.129)
   -> tbss_MD (0.115)
Var: Cardiac output during PWA (2.0)
   Modalities:
   -> tbss_L1_s (0.112)
Var: Cardiac output during PWA (2.1)
   Modalities:
   -> tbss_L2 (0.122)
Var: Mean arterial pressure during PWA (2.1)
   Modalities:
   -> rsfmri_22 (0.108)
Var: Systolic brachial blood pressure (2.0)
   Modalities:
   -> rsfmri_11 (0.107)
   -> rsfmri_22 (0.111)
   -> tbss_L1_s (0.111)


Category: Cognitive Tests
------------------------------------------------
Var: Duration to entering value (0.8)
   Modalities:
   -> rsfmri_11 (0.146)
Var: Number of symbol digit matches attempted (2.0)
   Modalities:
   -> tfmri_1 (-0.132)
   -> tfmri_2 (-0.127)
   -> tbss_OD_s (-0.134)
Var: Numb

In [16]:
sorted(prs[0], reverse=True)

[-0.10690666162181486]

In [17]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        print('Var: {}'.format(name))
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        print('   Modalities:')
        
        prs_abs = np.abs(prs).tolist()
        _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
        _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
        
        for i, mod in enumerate(mods):
            print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
    print('\n')

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
   Modalities:
   -> tbss_L1_s (0.119)
   -> T2_lesions (0.111)
   -> tbss_L2 (0.110)
   -> tbss_MD (0.109)
Var: Cardiac index during PWA (2.1)
   Modalities:
   -> tbss_L2 (0.129)
   -> tbss_MD (0.115)
Var: Cardiac output during PWA (2.0)
   Modalities:
   -> tbss_L1_s (0.112)
Var: Cardiac output during PWA (2.1)
   Modalities:
   -> tbss_L2 (0.122)
Var: Mean arterial pressure during PWA (2.1)
   Modalities:
   -> rsfmri_22 (0.108)
Var: Systolic brachial blood pressure (2.0)
   Modalities:
   -> rsfmri_22 (0.111)
   -> tbss_L1_s (0.111)
   -> rsfmri_11 (0.107)


Category: Cognitive Tests
------------------------------------------------
Var: Duration to entering value (0.8)
   Modalities:
   -> rsfmri_11 (0.146)
Var: Number of symbol digit matches attempted (2.0)
   Modalities:
   -> tbss_OD_s (-0.134)
   -> tfmri_1 (-0.132)
   -> tfmri_2 (-0.127)
Var: Numb

In [18]:
variance_threshold = 0.1

for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        
        
        prs_abs = np.abs(prs).tolist()
        _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
        _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
        
        if prs[0] >= variance_threshold:
            print('Var: {}'.format(name))
            print('   Modalities:')
            for i, mod in enumerate(mods):
                if prs[i] >= variance_threshold:
                    print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
    print('\n')

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
   Modalities:
   -> tbss_L1_s (0.119)
   -> T2_lesions (0.111)
   -> tbss_L2 (0.110)
   -> tbss_MD (0.109)
Var: Cardiac index during PWA (2.1)
   Modalities:
   -> tbss_L2 (0.129)
   -> tbss_MD (0.115)
Var: Cardiac output during PWA (2.0)
   Modalities:
   -> tbss_L1_s (0.112)
Var: Cardiac output during PWA (2.1)
   Modalities:
   -> tbss_L2 (0.122)
Var: Mean arterial pressure during PWA (2.1)
   Modalities:
   -> rsfmri_22 (0.108)
Var: Systolic brachial blood pressure (2.0)
   Modalities:
   -> rsfmri_22 (0.111)
   -> tbss_L1_s (0.111)
   -> rsfmri_11 (0.107)


Category: Cognitive Tests
------------------------------------------------
Var: Duration to entering value (0.8)
   Modalities:
   -> rsfmri_11 (0.146)
Var: Time last key touched (2.1)
   Modalities:
   -> tfmri_2 (0.124)


Category: Hearing Test
------------------------------------------------
Var

In [19]:
mods

('rsfmri_15',)

In [20]:
counter = 0
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()[0]
        if 'ElasticNetFull' in mods or 'ElasticNetFull' in mods or 'ElasticNetFull' in mods:
            print('Var: {}'.format(name))
            counter += 1
        else:
            pass
    print('\n')
    
print("There are {} vars independent from the conventional modalities".format(counter))

Category: Cardiac & Circulartory Measurements
------------------------------------------------


Category: Cognitive Tests
------------------------------------------------


Category: Hearing Test
------------------------------------------------


Category: Lifestyle
------------------------------------------------


Category: Medical History
------------------------------------------------


Category: Physical Measurements
------------------------------------------------


Category: Skeletal Measurements
------------------------------------------------


There are 0 vars independent from the conventional modalities


In [21]:
# IDP_df1 = pd.read_pickle('male_IDP_prototype.pkl')
vars_df1 = pd.read_pickle('male_vars.pkl')

vars1_over_bThr_list = []

for idx in range(len(vars_df1)):
    df = vars_df1.iloc[idx].dataframe
    bThr = vars_df1.iloc[idx].bonf
    vars_over_bThr = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False).names.to_list()
    for var in vars_over_bThr:
        vars1_over_bThr_list.append(var)
        
print("Number of unique vars across all modalities: ", len(set(vars1_over_bThr_list)))

unique_vars1_over_bTHr = list(set(vars1_over_bThr_list))
unique_vars1_over_bThr_categories = []

for var in unique_vars1_over_bTHr:
    idx = list(varsHeader).index(var)
    category = vars_categories[idx]
    unique_vars1_over_bThr_categories.append(category)
    
modalities1 = []
pearson_rs_list1 = []
for var in unique_vars1_over_bTHr:
    modalities_with_var = []
    pearsonRs_with_var = []
    for idx in range(len(vars_df1)):
        df = vars_df1.iloc[idx].dataframe
        modality = vars_df1.iloc[idx].modality
        bThr = vars_df1.iloc[idx].bonf
        df = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False)
        vars_over_bThr = df.names.to_list()
        pearson_r_over_bThr = df.pearson_r.to_list()
        if var in vars_over_bThr:
            modalities_with_var.append(modality)
            pearsonRs_with_var.append(pearson_r_over_bThr[vars_over_bThr.index(var)])
    modalities1.append(modalities_with_var)
    pearson_rs_list1.append(pearsonRs_with_var)
    
df_vars1_over_bThr = pd.DataFrame.from_dict({
    'names': unique_vars1_over_bTHr,
    'Categories': unique_vars1_over_bThr_categories,
    'modalities': modalities1,
    'pearson_r': pearson_rs_list1
})

Number of unique vars across all modalities:  161


In [22]:
# IDP_df0 = pd.read_pickle('male_IDP_testB.pkl')
vars_df0 = pd.read_pickle('male_vars_testB.pkl')

In [23]:
vars0_over_bThr_list = []

for idx in range(len(vars_df0)):
    df = vars_df0.iloc[idx].dataframe
    bThr = vars_df0.iloc[idx].bonf
    vars_over_bThr = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False).names.to_list()
    for var in vars_over_bThr:
        vars0_over_bThr_list.append(var)

In [24]:
print("Number of unique vars across all modalities: ", len(set(vars0_over_bThr_list)))

Number of unique vars across all modalities:  42


In [25]:
unique_vars0_over_bTHr = list(set(vars0_over_bThr_list))
unique_vars0_over_bThr_categories = []

for var in unique_vars0_over_bTHr:
    idx = list(varsHeader).index(var)
    category = vars_categories[idx]
    unique_vars0_over_bThr_categories.append(category)
    
modalities0 = []
pearson_rs_list0 = []
for var in unique_vars0_over_bTHr:
    modalities_with_var = []
    pearsonRs_with_var = []
    for idx in range(len(vars_df0)):
        df = vars_df0.iloc[idx].dataframe
        modality = vars_df0.iloc[idx].modality
        bThr = vars_df0.iloc[idx].bonf
        df = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False)
        vars_over_bThr = df.names.to_list()
        pearson_r_over_bThr = df.pearson_r.to_list()
        if var in vars_over_bThr:
            modalities_with_var.append(modality)
            pearsonRs_with_var.append(pearson_r_over_bThr[vars_over_bThr.index(var)])
    modalities0.append(modalities_with_var)
    pearson_rs_list0.append(pearsonRs_with_var)

In [26]:
df_vars0_over_bThr = pd.DataFrame.from_dict({
    'names': unique_vars0_over_bTHr,
    'Categories': unique_vars0_over_bThr_categories,
    'modalities': modalities0,
    'pearson_r': pearson_rs_list0
})

In [27]:
df_vars0_over_bThr[df_vars0_over_bThr.names=='Alcohol intake frequency. (2.0)']

Unnamed: 0,names,Categories,modalities,pearson_r
37,Alcohol intake frequency. (2.0),Alcohol,"[tbss_FA_s, tbss_L1_s, tbss_OD_s, tbss_ICVF, t...","[0.1081203879296585, 0.10909079636749124, 0.10..."


In [28]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        print('Var: {}'.format(name))
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()
        print('   Modalities:')
        
        prs_abs = np.abs(prs).tolist()
        _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
        _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
        
        for i, mod in enumerate(mods):
            print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        if len(mods0) == 0:
            print('======== ATTENTION! NEW VAR ! ==========')
        else:
            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

#         for i, mod in enumerate(mods0):
#             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
            print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
        
    print('\n')

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
   Modalities:
   -> tbss_L1_s (0.119)
   -> T2_lesions (0.111)
   -> tbss_L2 (0.110)
   -> tbss_MD (0.109)
   -> tbss_ICVF_s (0.113)
Var: Cardiac index during PWA (2.1)
   Modalities:
   -> tbss_L2 (0.129)
   -> tbss_MD (0.115)
Var: Cardiac output during PWA (2.0)
   Modalities:
   -> tbss_L1_s (0.112)
   -> tbss_ICVF_s (0.110)
Var: Cardiac output during PWA (2.1)
   Modalities:
   -> tbss_L2 (0.122)
Var: Mean arterial pressure during PWA (2.1)
   Modalities:
   -> rsfmri_22 (0.108)
   -> T2_lesions (0.107)
Var: Systolic brachial blood pressure (2.0)
   Modalities:
   -> rsfmri_22 (0.111)
   -> tbss_L1_s (0.111)
   -> rsfmri_11 (0.107)
   -> T2_lesions (0.117)


Category: Cognitive Tests
------------------------------------------------
Var: Duration to entering value (0.8)
   Modalities:
   -> rsfmri_11 (0.146)
Var: Number of symbol digit matches attempted

In [29]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()

        if len(mods)!=0 and len(mods0)!=0:
        
            prs_abs = np.abs(prs).tolist()
            _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
            _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))

            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        
        
            print('Var: {}'.format(name))
            print('   Modalities:')
            print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
            print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
        if len(mods0) == 0:
#             print('======== ATTENTION! NEW VAR ! ==========')
            pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
   Modalities:
   -> tbss_L1_s (0.119)
   -> tbss_ICVF_s (0.113)
Var: Cardiac output during PWA (2.0)
   Modalities:
   -> tbss_L1_s (0.112)
   -> tbss_ICVF_s (0.110)
Var: Mean arterial pressure during PWA (2.1)
   Modalities:
   -> rsfmri_22 (0.108)
   -> T2_lesions (0.107)
Var: Systolic brachial blood pressure (2.0)
   Modalities:
   -> rsfmri_22 (0.111)
   -> T2_lesions (0.117)


Category: Cognitive Tests
------------------------------------------------


Category: Hearing Test
------------------------------------------------


Category: Lifestyle
------------------------------------------------


Category: Medical History
------------------------------------------------
Var: Diabetes diagnosed by doctor (2.0)
   Modalities:
   -> vbm (0.125)
   -> vbm (0.105)
Var: Diagnoses - secondary ICD10 (I10 - I10 Essential (primary) hypertension)
   Modalities:
  

In [30]:
counter = 0

for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()

        if len(mods)!=0 and len(mods0)!=0:
        
            prs_abs = np.abs(prs).tolist()
            _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
            _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))

            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
Var: Cardiac output during PWA (2.0)
Var: Mean arterial pressure during PWA (2.1)
Var: Systolic brachial blood pressure (2.0)


Category: Cognitive Tests
------------------------------------------------


Category: Hearing Test
------------------------------------------------


Category: Lifestyle
------------------------------------------------


Category: Medical History
------------------------------------------------
Var: Diabetes diagnosed by doctor (2.0)
Var: Diagnoses - secondary ICD10 (I10 - I10 Essential (primary) hypertension)
Var: Non-cancer illness code, self-reported (1223 - type 2 diabetes)
Var: Vascular/heart problems diagnosed by doctor (4 - High blood pressure)


Category: Physical Measurements
------------------------------------------------


Category: Skeletal Measurements
------------------------------------------------
Var: Head BMC (b

In [31]:
counter = 0

for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()
        mods1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].modalities.to_list()
        prs1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].pearson_r.to_list()
        
        if len(mods)!=0 and len(mods0)!=0 and len(mods1)!=0:
        
            prs_abs = np.abs(prs).tolist()
            _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
            _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))

            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
            
            prs_abs1 = np.abs(prs1).tolist()
            _, mods1 = zip(*sorted(zip(prs_abs1[0], mods1[0]), reverse=True))
            _, prs1 = zip(*sorted(zip(prs_abs1[0], prs1[0]), reverse=True))
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
#             print('   -> {} ({:.3f})'.format(mods1[0], prs1[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
Var: Cardiac output during PWA (2.0)
Var: Mean arterial pressure during PWA (2.1)
Var: Systolic brachial blood pressure (2.0)


Category: Cognitive Tests
------------------------------------------------


Category: Hearing Test
------------------------------------------------


Category: Lifestyle
------------------------------------------------


Category: Medical History
------------------------------------------------
Var: Diabetes diagnosed by doctor (2.0)
Var: Diagnoses - secondary ICD10 (I10 - I10 Essential (primary) hypertension)
Var: Non-cancer illness code, self-reported (1223 - type 2 diabetes)
Var: Vascular/heart problems diagnosed by doctor (4 - High blood pressure)


Category: Physical Measurements
------------------------------------------------


Category: Skeletal Measurements
------------------------------------------------
Var: Head BMC (b

In [32]:
counter = 0

for cat in sorted(list(set(df_vars1_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars1_over_bThr[df_vars1_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()
        mods1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].modalities.to_list()
        prs1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].pearson_r.to_list()
        
        if len(mods0)!=0 and len(mods1)!=0:

            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
            
            prs_abs1 = np.abs(prs1).tolist()
            _, mods1 = zip(*sorted(zip(prs_abs1[0], mods1[0]), reverse=True))
            _, prs1 = zip(*sorted(zip(prs_abs1[0], prs1[0]), reverse=True))
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
#             print('   -> {} ({:.3f})'.format(mods1[0], prs1[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Alcohol
------------------------------------------------
Var: Alcohol intake frequency. (0.0)
Var: Alcohol intake frequency. (2.0)
Var: Frequency of drinking alcohol (0.0)


Category: Blood Assays
------------------------------------------------
Var: IGF-1 (0.0)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
Var: Cardiac output during PWA (2.0)
Var: Central systolic blood pressure during PWA (2.0)
Var: End systolic pressure during PWA (2.0)
Var: End systolic pressure during PWA (2.1)
Var: Mean arterial pressure during PWA (2.0)
Var: Mean arterial pressure during PWA (2.1)
Var: Systolic blood pressure, automated reading (0.1)
Var: Systolic brachial blood pressure (2.0)


Category: Cognitive Tests
------------------------------------------------


Category: Diet
------------------------------------------------
Var: Alcohol (2.0)


Category: Medical History
-------------------------------------

In [33]:
counter = 0

for cat in sorted(list(set(df_vars1_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars1_over_bThr[df_vars1_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()
        mods1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].modalities.to_list()
        prs1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].pearson_r.to_list()
        
        if len(mods0)!=0 and len(mods1)!=0:
        
            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
            
            prs_abs1 = np.abs(prs1).tolist()
            _, mods1 = zip(*sorted(zip(prs_abs1[0], mods1[0]), reverse=True))
            _, prs1 = zip(*sorted(zip(prs_abs1[0], prs1[0]), reverse=True))
            
        else:
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
#             print('   -> {} ({:.3f})'.format(mods1[0], prs1[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Alcohol
------------------------------------------------
Var: Alcohol consumed (1.0)
Var: Amount of alcohol drunk on a typical drinking day (0.0)
Var: Average weekly beer plus cider intake (2.0)
Var: Frequency of consuming six or more units of alcohol (0.0)


Category: Blood Assays
------------------------------------------------
Var: Apolipoprotein A (0.0)
Var: Gamma glutamyltransferase (0.0)
Var: Glucose (0.0)
Var: Glycated haemoglobin (HbA1c) (0.0)
Var: HDL cholesterol (0.0)
Var: Mean corpuscular haemoglobin (0.0)
Var: Mean corpuscular volume (0.0)
Var: Mean sphered cell volume (0.0)
Var: Total protein (0.0)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Absence of notch position in the pulse waveform (2.0)
Var: Average heart rate (2.0)
Var: Cardiac index during PWA (2.1)
Var: Cardiac output during PWA (2.1)
Var: Central pulse pressure during PWA (2.0)
Var: Diastolic blood pressure, automated reading (0.0)
Var: Diastol

In [34]:
counter = 0

for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].modalities.to_list()
        prs1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].pearson_r.to_list()
        
        if len(mods)!=0 and len(mods1)!=0:
        
            prs_abs = np.abs(prs).tolist()
            _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
            _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
            
            prs_abs1 = np.abs(prs1).tolist()
            _, mods1 = zip(*sorted(zip(prs_abs1[0], mods1[0]), reverse=True))
            _, prs1 = zip(*sorted(zip(prs_abs1[0], prs1[0]), reverse=True))
            
        else:
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
#             print('   -> {} ({:.3f})'.format(mods1[0], prs1[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Cardiac & Circulartory Measurements
------------------------------------------------


Category: Cognitive Tests
------------------------------------------------
Var: Time last key touched (2.1)


Category: Hearing Test
------------------------------------------------
Var: Signal-to-noise-ratio (SNR) of triplet (left) (2.9)


Category: Lifestyle
------------------------------------------------
Var: Age first had sexual intercourse (0.0)
Var: Age first had sexual intercourse (2.0)


Category: Medical History
------------------------------------------------
Var: Diagnoses - ICD10 (G250 - G25.0 Essential tremor)
Var: Diagnoses - ICD10 (K624 - K62.4 Stenosis of anus and rectum)
Var: Diagnoses - ICD10 (M232 - M23.2 Derangement of meniscus due to old tear or injury)
Var: Diagnoses - secondary ICD10 (G250 - G25.0 Essential tremor)
Var: Diagnoses - secondary ICD10 (K624 - K62.4 Stenosis of anus and rectum)
Var: Operative procedures - OPCS4 (L714 - L71.4 Percutaneous transluminal cann