In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import sys
import argparse
import h5py
from scipy.stats import t as student_t
from statsmodels.stats import multitest as mt
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [2]:
# IDP_df = pd.read_pickle('male_IDP_prototype.pkl')
# vars_df = pd.read_pickle('male_vars_prototype.pkl')

IDP_df = pd.read_pickle('female_IDP_testA.pkl')
vars_df = pd.read_pickle('female_vars_testA.pkl')

IDP_names = np.loadtxt("IDP_names.txt", dtype=str, delimiter='\n')
IDP_categories = np.loadtxt("IDP_categories.txt", dtype=str, delimiter='\n')
with open('varsHeader.txt') as f:
    varsHeader = f.readlines()
    varsHeader = [l.strip('\n\r') for l in varsHeader]
    varsHeader = np.array(varsHeader)
vars_categories = np.loadtxt("vars_categories.txt", dtype=str, delimiter='\n')

In [3]:
vars_df.head()

Unnamed: 0,modality,bonf,fdr,dataframe
0,T1_nonlinear,5.440594,4.242652,idx ...
1,T1_linear,5.440594,3.935681,idx ...
2,jacobian,5.440594,3.809642,idx ...
3,vbm,5.440594,3.902949,idx ...
4,T2_nonlinear,5.440594,5.669645,idx ...


In [4]:
vars_df.iloc[0].dataframe.head()

Unnamed: 0,idx,names,Categories,pearson_r,t_test_statistic,p_values_corrected,p_values,abs_pearson_r,log_p_values,log_p_values_corrected
0,0,Ethnic background (0.0),Ethnic Background,-0.010066,0.503514,1.0,0.614647,0.010066,0.211374,-0.0
1,1,Ethnic background (1.0),Ethnic Background,-0.001731,0.034871,1.0,0.9722,0.001731,0.012244,-0.0
2,2,Ethnic background (2.0),Ethnic Background,-0.020829,0.551206,1.0,0.581669,0.020829,0.235324,-0.0
3,3,Genotype measurement batch (0.0),Genetic Markers,0.03093,1.524166,0.937694,0.127598,0.03093,0.894157,0.027939
4,4,Heterozygosity (0.0),Genetic Markers,-0.047418,2.338187,0.693755,0.019458,0.047418,1.710893,0.158794


In [5]:
vars_over_bThr_list = []

for idx in range(len(vars_df)):
    df = vars_df.iloc[idx].dataframe
    bThr = vars_df.iloc[idx].bonf
    vars_over_bThr = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False).names.to_list()
    for var in vars_over_bThr:
        vars_over_bThr_list.append(var)

In [6]:
print("Number of unique vars across all modalities: ", len(set(vars_over_bThr_list)))

Number of unique vars across all modalities:  123


In [7]:
unique_vars_over_bTHr = list(set(vars_over_bThr_list))

In [8]:
unique_vars_over_bThr_categories = []

for var in unique_vars_over_bTHr:
    idx = list(varsHeader).index(var)
    category = vars_categories[idx]
    unique_vars_over_bThr_categories.append(category)

In [9]:
sorted(unique_vars_over_bTHr)

['Alcohol intake frequency. (0.0)',
 'Alcohol intake frequency. (2.0)',
 'Alcohol intake versus 10 years previously (0.0)',
 'Android bone mass (2.0)',
 'Android total mass (2.0)',
 'Arm BMD (bone mineral density) (left) (2.0)',
 'Arm BMD (bone mineral density) (right) (2.0)',
 'Arm fat mass (right) (0.0)',
 'Arm fat mass (right) (2.0)',
 'Arm fat percentage (left) (0.0)',
 'Arm fat percentage (left) (2.0)',
 'Arm fat percentage (right) (2.0)',
 'Arms BMC (bone mineral content) (2.0)',
 'Arms BMD (bone mineral density) (2.0)',
 'Arms total mass (2.0)',
 'Average weekly spirits intake (2.0)',
 'Body fat percentage (0.0)',
 'Body fat percentage (2.0)',
 'Body mass index (BMI) (0.0)',
 'Body mass index (BMI) (2.0)',
 'Body surface area (2.0)',
 'C-reactive protein (0.0)',
 'Diagnoses - secondary ICD10 (C786 - C78.6 Secondary malignant neoplasm of retroperitoneum and peritoneum)',
 'Diagnoses - secondary ICD10 (D125 - D12.5 Sigmoid colon)',
 'Diastolic blood pressure, automated reading (0.

In [10]:
modalities = []
pearson_rs_list = []
for var in unique_vars_over_bTHr:
    modalities_with_var = []
    pearsonRs_with_var = []
    for idx in range(len(vars_df)):
        df = vars_df.iloc[idx].dataframe
        modality = vars_df.iloc[idx].modality
        bThr = vars_df.iloc[idx].bonf
        df = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False)
        vars_over_bThr = df.names.to_list()
        pearson_r_over_bThr = df.pearson_r.to_list()
        if var in vars_over_bThr:
            modalities_with_var.append(modality)
            pearsonRs_with_var.append(pearson_r_over_bThr[vars_over_bThr.index(var)])
    modalities.append(modalities_with_var)
    pearson_rs_list.append(pearsonRs_with_var)

In [11]:
len(modalities)

123

In [12]:
df_vars_over_bThr = pd.DataFrame.from_dict({
    'names': unique_vars_over_bTHr,
    'Categories': unique_vars_over_bThr_categories,
    'modalities': modalities,
    'pearson_r': pearson_rs_list
})

In [13]:
df_vars_over_bThr.head()

Unnamed: 0,names,Categories,modalities,pearson_r
0,"Heel quantitative ultrasound index (QUI), dire...",Skeletal Measurements,"[rsfmri_1, rsfmri_6, rsfmri_9, rsfmri_11, rsfm...","[-0.24113456221520155, -0.24750175793007453, -..."
1,Arm BMD (bone mineral density) (left) (2.0),Skeletal Measurements,"[rsfmri_0, rsfmri_1, rsfmri_2, rsfmri_4, rsfmr...","[-0.2535184470466089, -0.2065208136750318, -0...."
2,Arm fat percentage (right) (2.0),Physical Measurements,"[rsfmri_2, rsfmri_4, rsfmri_10, rsfmri_11]","[-0.10065899082357462, -0.09593380092587067, -..."
3,Diagnoses - secondary ICD10 (D125 - D12.5 Sigm...,Medical History,[tbss_OD],[0.09679790269459006]
4,"Heel quantitative ultrasound index (QUI), dire...",Skeletal Measurements,"[swi, rsfmri_0, rsfmri_1, rsfmri_2, rsfmri_4, ...","[-0.11491365977030002, -0.1989284962225435, -0..."


In [14]:
set(df_vars_over_bThr.Categories.to_list())

{'Alcohol',
 'Blood Assays',
 'Cardiac & Circulartory Measurements',
 'Medical History',
 'Physical Measurements',
 'Skeletal Measurements'}

In [15]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        print('Var: {}'.format(name))
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        print('   Modalities:')
        for i, mod in enumerate(mods[0]):
            print('   -> {} ({:.3f})'.format(mod, prs[0][i]))
        
    print('\n')

Category: Alcohol
------------------------------------------------
Var: Alcohol intake frequency. (0.0)
   Modalities:
   -> tbss_ISOVF_s (0.097)
   -> tbss_L1_s (0.122)
   -> tbss_L2_s (0.094)
Var: Alcohol intake frequency. (2.0)
   Modalities:
   -> tbss_FA_s (0.095)
   -> tbss_ICVF_s (0.099)
   -> tbss_ISOVF_s (0.112)
   -> tbss_L1_s (0.118)
   -> tbss_L2_s (0.100)
   -> tbss_L3_s (0.099)
   -> tbss_MD_s (0.096)
   -> tbss_OD_s (0.094)
Var: Alcohol intake versus 10 years previously (0.0)
   Modalities:
   -> tbss_MD (-0.098)
Var: Average weekly spirits intake (2.0)
   Modalities:
   -> tbss_L2 (0.116)


Category: Blood Assays
------------------------------------------------
Var: C-reactive protein (0.0)
   Modalities:
   -> rsfmri_0 (-0.097)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Diastolic blood pressure, automated reading (0.0)
   Modalities:
   -> T2_lesions (0.109)
   -> tbss_L3_s (0.103)
   -> tbss_MD_s (0.098)
Var: 

   Modalities:
   -> T1_linear (-0.105)
   -> jacobian (-0.148)
   -> vbm (-0.120)
   -> swi (-0.154)
   -> rsfmri_0 (-0.241)
   -> rsfmri_1 (-0.190)
   -> rsfmri_2 (-0.225)
   -> rsfmri_3 (-0.138)
   -> rsfmri_4 (-0.205)
   -> rsfmri_5 (-0.236)
   -> rsfmri_6 (-0.225)
   -> rsfmri_7 (-0.213)
   -> rsfmri_8 (-0.190)
   -> rsfmri_9 (-0.216)
   -> rsfmri_10 (-0.226)
   -> rsfmri_11 (-0.238)
   -> rsfmri_12 (-0.210)
   -> rsfmri_13 (-0.215)
   -> rsfmri_14 (-0.169)
   -> rsfmri_15 (-0.194)
   -> rsfmri_16 (-0.217)
   -> rsfmri_17 (-0.208)
   -> rsfmri_18 (-0.185)
   -> rsfmri_19 (-0.162)
   -> rsfmri_20 (-0.215)
   -> rsfmri_21 (-0.241)
   -> rsfmri_22 (-0.192)
   -> rsfmri_23 (-0.153)
   -> rsfmri_24 (-0.202)
   -> tfmri_1 (-0.163)
   -> tfmri_2 (-0.174)
   -> tfmri_5 (-0.189)
   -> tfmri_c_1 (-0.152)
   -> tfmri_c_2 (-0.158)
   -> tfmri_c_5 (-0.169)
   -> tbss_ICVF_s (-0.103)
   -> tbss_ISOVF_s (-0.105)
   -> tbss_L1_s (-0.110)
   -> tbss_L2_s (-0.118)
   -> tbss_L3_s (-0.131)
   -> tbs

   -> rsfmri_11 (-0.221)
   -> rsfmri_12 (-0.203)
   -> rsfmri_13 (-0.216)
   -> rsfmri_14 (-0.173)
   -> rsfmri_15 (-0.180)
   -> rsfmri_16 (-0.215)
   -> rsfmri_17 (-0.195)
   -> rsfmri_18 (-0.187)
   -> rsfmri_19 (-0.169)
   -> rsfmri_20 (-0.183)
   -> rsfmri_21 (-0.229)
   -> rsfmri_22 (-0.170)
   -> rsfmri_23 (-0.155)
   -> rsfmri_24 (-0.188)
   -> tfmri_1 (-0.147)
   -> tfmri_2 (-0.152)
   -> tfmri_5 (-0.170)
   -> tfmri_c_1 (-0.136)
   -> tfmri_c_2 (-0.146)
   -> tfmri_c_5 (-0.150)
   -> tracts (-0.109)
   -> tbss_ISOVF_s (-0.106)
   -> tbss_L1_s (-0.105)
   -> tbss_L2_s (-0.107)
   -> tbss_L3_s (-0.101)
   -> tbss_OD_s (-0.103)
   -> tbss_FA (-0.127)
   -> tbss_ICVF (-0.101)
   -> tbss_ISOVF (-0.099)
   -> tbss_L1 (-0.129)
   -> tbss_L2 (-0.103)
   -> tbss_L3 (-0.101)
   -> tbss_MD (-0.121)
   -> tbss_MO (-0.120)
   -> tbss_OD (-0.123)
Var: Femur troch BMD (bone mineral density) T-score (right) (2.0)
   Modalities:
   -> T1_linear (-0.101)
   -> jacobian (-0.145)
   -> vbm (-0.

   -> rsfmri_7 (-0.178)
   -> rsfmri_8 (-0.176)
   -> rsfmri_9 (-0.223)
   -> rsfmri_10 (-0.195)
   -> rsfmri_11 (-0.219)
   -> rsfmri_12 (-0.194)
   -> rsfmri_13 (-0.208)
   -> rsfmri_14 (-0.169)
   -> rsfmri_15 (-0.177)
   -> rsfmri_16 (-0.235)
   -> rsfmri_17 (-0.198)
   -> rsfmri_18 (-0.179)
   -> rsfmri_19 (-0.161)
   -> rsfmri_20 (-0.199)
   -> rsfmri_21 (-0.232)
   -> rsfmri_22 (-0.194)
   -> rsfmri_23 (-0.153)
   -> rsfmri_24 (-0.162)
   -> tfmri_1 (-0.114)
   -> tfmri_2 (-0.118)
   -> tfmri_5 (-0.137)
   -> tfmri_c_1 (-0.114)
   -> tfmri_c_2 (-0.117)
   -> tfmri_c_5 (-0.117)
   -> tbss_MO (-0.102)
Var: L1-L4 BMD (bone mineral density) (2.0)
   Modalities:
   -> jacobian (-0.145)
   -> vbm (-0.107)
   -> swi (-0.157)
   -> rsfmri_0 (-0.257)
   -> rsfmri_1 (-0.215)
   -> rsfmri_2 (-0.253)
   -> rsfmri_3 (-0.185)
   -> rsfmri_4 (-0.221)
   -> rsfmri_5 (-0.264)
   -> rsfmri_6 (-0.248)
   -> rsfmri_7 (-0.215)
   -> rsfmri_8 (-0.209)
   -> rsfmri_9 (-0.253)
   -> rsfmri_10 (-0.229)


   -> rsfmri_20 (-0.201)
   -> rsfmri_21 (-0.240)
   -> rsfmri_22 (-0.191)
   -> rsfmri_23 (-0.167)
   -> rsfmri_24 (-0.162)
   -> tfmri_1 (-0.119)
   -> tfmri_2 (-0.125)
   -> tfmri_5 (-0.148)
   -> tfmri_c_1 (-0.134)
   -> tfmri_c_2 (-0.129)
   -> tfmri_c_5 (-0.129)
   -> tbss_L1 (-0.099)
   -> tbss_MO (-0.113)
Var: Spine BMD (bone mineral density) (2.0)
   Modalities:
   -> jacobian (-0.156)
   -> vbm (-0.117)
   -> swi (-0.164)
   -> rsfmri_0 (-0.266)
   -> rsfmri_1 (-0.210)
   -> rsfmri_2 (-0.255)
   -> rsfmri_3 (-0.196)
   -> rsfmri_4 (-0.224)
   -> rsfmri_5 (-0.263)
   -> rsfmri_6 (-0.248)
   -> rsfmri_7 (-0.228)
   -> rsfmri_8 (-0.219)
   -> rsfmri_9 (-0.254)
   -> rsfmri_10 (-0.232)
   -> rsfmri_11 (-0.262)
   -> rsfmri_12 (-0.231)
   -> rsfmri_13 (-0.258)
   -> rsfmri_14 (-0.211)
   -> rsfmri_15 (-0.218)
   -> rsfmri_16 (-0.270)
   -> rsfmri_17 (-0.247)
   -> rsfmri_18 (-0.225)
   -> rsfmri_19 (-0.207)
   -> rsfmri_20 (-0.226)
   -> rsfmri_21 (-0.264)
   -> rsfmri_22 (-0.225)

In [16]:
sorted(prs[0], reverse=True)

[-0.0980340174188354,
 -0.09857631376204357,
 -0.09880270835292913,
 -0.10030442172418366,
 -0.10699996934955343,
 -0.11782305517257526,
 -0.12229584400009604]

In [17]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        print('Var: {}'.format(name))
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        print('   Modalities:')
        
        prs_abs = np.abs(prs).tolist()
        _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
        _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
        
        for i, mod in enumerate(mods):
            print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
    print('\n')

Category: Alcohol
------------------------------------------------
Var: Alcohol intake frequency. (0.0)
   Modalities:
   -> tbss_L1_s (0.122)
   -> tbss_ISOVF_s (0.097)
   -> tbss_L2_s (0.094)
Var: Alcohol intake frequency. (2.0)
   Modalities:
   -> tbss_L1_s (0.118)
   -> tbss_ISOVF_s (0.112)
   -> tbss_L2_s (0.100)
   -> tbss_L3_s (0.099)
   -> tbss_ICVF_s (0.099)
   -> tbss_MD_s (0.096)
   -> tbss_FA_s (0.095)
   -> tbss_OD_s (0.094)
Var: Alcohol intake versus 10 years previously (0.0)
   Modalities:
   -> tbss_MD (-0.098)
Var: Average weekly spirits intake (2.0)
   Modalities:
   -> tbss_L2 (0.116)


Category: Blood Assays
------------------------------------------------
Var: C-reactive protein (0.0)
   Modalities:
   -> rsfmri_0 (-0.097)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Diastolic blood pressure, automated reading (0.0)
   Modalities:
   -> T2_lesions (0.109)
   -> tbss_L3_s (0.103)
   -> tbss_MD_s (0.098)
Var: 

   Modalities:
   -> rsfmri_11 (-0.135)
   -> rsfmri_2 (-0.122)
   -> rsfmri_21 (-0.120)
   -> rsfmri_13 (-0.115)
   -> rsfmri_10 (-0.111)
   -> rsfmri_17 (-0.100)
   -> rsfmri_0 (-0.098)


Category: Skeletal Measurements
------------------------------------------------
Var: Arm BMD (bone mineral density) (left) (2.0)
   Modalities:
   -> rsfmri_12 (-0.265)
   -> rsfmri_2 (-0.261)
   -> rsfmri_0 (-0.254)
   -> rsfmri_13 (-0.253)
   -> rsfmri_21 (-0.237)
   -> rsfmri_8 (-0.233)
   -> rsfmri_5 (-0.233)
   -> rsfmri_6 (-0.228)
   -> rsfmri_16 (-0.227)
   -> rsfmri_4 (-0.218)
   -> rsfmri_7 (-0.216)
   -> rsfmri_11 (-0.212)
   -> rsfmri_24 (-0.211)
   -> rsfmri_20 (-0.210)
   -> tfmri_5 (-0.209)
   -> rsfmri_9 (-0.209)
   -> rsfmri_14 (-0.207)
   -> rsfmri_1 (-0.207)
   -> rsfmri_17 (-0.204)
   -> rsfmri_22 (-0.188)
   -> rsfmri_19 (-0.184)
   -> rsfmri_10 (-0.173)
   -> tfmri_1 (-0.170)
   -> rsfmri_18 (-0.163)
Var: Arm BMD (bone mineral density) (right) (2.0)
   Modalities:
   -> rsfmri_

   -> rsfmri_4 (-0.228)
   -> rsfmri_17 (-0.227)
   -> rsfmri_24 (-0.222)
   -> rsfmri_18 (-0.218)
   -> rsfmri_1 (-0.215)
   -> rsfmri_20 (-0.214)
   -> rsfmri_8 (-0.211)
   -> rsfmri_22 (-0.210)
   -> rsfmri_15 (-0.210)
   -> tfmri_5 (-0.204)
   -> rsfmri_14 (-0.197)
   -> rsfmri_19 (-0.193)
   -> rsfmri_23 (-0.186)
   -> tfmri_2 (-0.180)
   -> tfmri_c_5 (-0.176)
   -> jacobian (-0.172)
   -> tfmri_c_2 (-0.172)
   -> rsfmri_3 (-0.171)
   -> tfmri_1 (-0.169)
   -> tfmri_c_1 (-0.168)
   -> swi (-0.161)
   -> tbss_L1 (-0.148)
   -> tbss_FA (-0.147)
   -> tbss_OD (-0.140)
   -> tbss_MD (-0.140)
   -> vbm (-0.133)
   -> tbss_MO (-0.131)
   -> tbss_L1_s (-0.127)
   -> tbss_L2_s (-0.125)
   -> tbss_L3_s (-0.125)
   -> tbss_ISOVF_s (-0.122)
   -> tbss_OD_s (-0.118)
   -> tbss_ISOVF (-0.118)
   -> tbss_MD_s (-0.116)
   -> tbss_L3 (-0.116)
   -> tbss_L2 (-0.116)
   -> tbss_ICVF (-0.113)
   -> tbss_MO_s (-0.113)
   -> T1_linear (-0.111)
   -> tracts (-0.110)
   -> tbss_FA_s (-0.104)
Var: Femur 

   -> rsfmri_4 (-0.233)
   -> rsfmri_10 (-0.233)
   -> rsfmri_2 (-0.232)
   -> rsfmri_16 (-0.232)
   -> rsfmri_11 (-0.230)
   -> rsfmri_7 (-0.223)
   -> rsfmri_24 (-0.221)
   -> rsfmri_20 (-0.219)
   -> rsfmri_12 (-0.218)
   -> rsfmri_17 (-0.216)
   -> rsfmri_8 (-0.215)
   -> rsfmri_1 (-0.210)
   -> tfmri_5 (-0.209)
   -> rsfmri_15 (-0.208)
   -> rsfmri_22 (-0.205)
   -> rsfmri_18 (-0.194)
   -> rsfmri_14 (-0.192)
   -> tfmri_2 (-0.189)
   -> rsfmri_19 (-0.180)
   -> tfmri_c_5 (-0.177)
   -> tfmri_1 (-0.174)
   -> rsfmri_23 (-0.168)
   -> swi (-0.167)
   -> tfmri_c_2 (-0.161)
   -> tfmri_c_1 (-0.156)
   -> jacobian (-0.153)
   -> rsfmri_3 (-0.152)
   -> tbss_FA (-0.143)
   -> tbss_MD (-0.141)
   -> tbss_L3_s (-0.134)
   -> tbss_L1 (-0.132)
   -> tbss_OD (-0.131)
   -> tbss_MO (-0.125)
   -> tbss_L2_s (-0.121)
   -> vbm (-0.120)
   -> tbss_L3 (-0.118)
   -> tbss_ICVF (-0.115)
   -> tbss_ISOVF (-0.114)
   -> tbss_L2 (-0.114)
   -> tbss_L1_s (-0.112)
   -> tbss_MD_s (-0.111)
   -> tbss_IS

   -> rsfmri_23 (-0.127)
   -> tbss_MD (-0.124)
   -> rsfmri_19 (-0.123)
   -> tfmri_5 (-0.122)
   -> tbss_L1 (-0.119)
   -> tbss_OD (-0.114)
   -> swi (-0.114)
   -> tfmri_2 (-0.114)
   -> tbss_L3 (-0.110)
   -> tbss_MO (-0.109)
Var: Speed of sound through heel (right) (0.0)
   Modalities:
   -> rsfmri_0 (-0.195)
   -> rsfmri_24 (-0.165)
   -> rsfmri_15 (-0.164)
Var: Speed of sound through heel (right) (1.0)
   Modalities:
   -> rsfmri_1 (-0.261)
   -> rsfmri_12 (-0.259)
   -> rsfmri_6 (-0.251)
   -> rsfmri_9 (-0.247)
   -> rsfmri_8 (-0.243)
   -> rsfmri_0 (-0.242)
Var: Speed of sound through heel (right) (2.0)
   Modalities:
   -> rsfmri_0 (-0.187)
   -> rsfmri_15 (-0.164)
   -> rsfmri_24 (-0.161)
   -> rsfmri_21 (-0.158)
   -> rsfmri_17 (-0.158)
   -> rsfmri_6 (-0.158)
   -> rsfmri_11 (-0.157)
   -> rsfmri_12 (-0.155)
   -> rsfmri_10 (-0.150)
   -> rsfmri_8 (-0.150)
   -> rsfmri_9 (-0.149)
   -> rsfmri_13 (-0.147)
   -> rsfmri_14 (-0.146)
   -> rsfmri_1 (-0.143)
   -> rsfmri_16 (-0.

In [18]:
variance_threshold = 0.1

for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        
        
        prs_abs = np.abs(prs).tolist()
        _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
        _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
        
        if prs[0] >= variance_threshold:
            print('Var: {}'.format(name))
            print('   Modalities:')
            for i, mod in enumerate(mods):
                if prs[i] >= variance_threshold:
                    print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
    print('\n')

Category: Alcohol
------------------------------------------------
Var: Alcohol intake frequency. (0.0)
   Modalities:
   -> tbss_L1_s (0.122)
Var: Alcohol intake frequency. (2.0)
   Modalities:
   -> tbss_L1_s (0.118)
   -> tbss_ISOVF_s (0.112)
   -> tbss_L2_s (0.100)
Var: Average weekly spirits intake (2.0)
   Modalities:
   -> tbss_L2 (0.116)


Category: Blood Assays
------------------------------------------------


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Diastolic blood pressure, automated reading (0.0)
   Modalities:
   -> T2_lesions (0.109)
   -> tbss_L3_s (0.103)
Var: Diastolic blood pressure, automated reading (0.1)
   Modalities:
   -> tbss_L3_s (0.117)
   -> T2_lesions (0.110)
Var: Systolic blood pressure, automated reading (0.1)
   Modalities:
   -> tbss_L1_s (0.113)
   -> tbss_ICVF (0.113)
   -> tbss_L3_s (0.112)
   -> tbss_MD_s (0.110)
   -> tbss_ICVF_s (0.105)
   -> tbss_ISOVF_s (0.103)
Var: Systolic blood press

In [19]:
mods

('rsfmri_21',
 'rsfmri_11',
 'rsfmri_16',
 'rsfmri_0',
 'rsfmri_2',
 'rsfmri_13',
 'rsfmri_14')

In [20]:
counter = 0
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()[0]
        if 'ElasticNetFull' in mods or 'ElasticNetFull' in mods or 'ElasticNetFull' in mods:
            print('Var: {}'.format(name))
            counter += 1
        else:
            pass
    print('\n')
    
print("There are {} vars independent from the conventional modalities".format(counter))

Category: Alcohol
------------------------------------------------


Category: Blood Assays
------------------------------------------------


Category: Cardiac & Circulartory Measurements
------------------------------------------------


Category: Medical History
------------------------------------------------


Category: Physical Measurements
------------------------------------------------


Category: Skeletal Measurements
------------------------------------------------


There are 0 vars independent from the conventional modalities


In [21]:
# IDP_df1 = pd.read_pickle('male_IDP_prototype.pkl')
vars_df1 = pd.read_pickle('female_vars.pkl')

vars1_over_bThr_list = []

for idx in range(len(vars_df1)):
    df = vars_df1.iloc[idx].dataframe
    bThr = vars_df1.iloc[idx].bonf
    vars_over_bThr = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False).names.to_list()
    for var in vars_over_bThr:
        vars1_over_bThr_list.append(var)
        
print("Number of unique vars across all modalities: ", len(set(vars1_over_bThr_list)))

unique_vars1_over_bTHr = list(set(vars1_over_bThr_list))
unique_vars1_over_bThr_categories = []

for var in unique_vars1_over_bTHr:
    idx = list(varsHeader).index(var)
    category = vars_categories[idx]
    unique_vars1_over_bThr_categories.append(category)
    
modalities1 = []
pearson_rs_list1 = []
for var in unique_vars1_over_bTHr:
    modalities_with_var = []
    pearsonRs_with_var = []
    for idx in range(len(vars_df1)):
        df = vars_df1.iloc[idx].dataframe
        modality = vars_df1.iloc[idx].modality
        bThr = vars_df1.iloc[idx].bonf
        df = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False)
        vars_over_bThr = df.names.to_list()
        pearson_r_over_bThr = df.pearson_r.to_list()
        if var in vars_over_bThr:
            modalities_with_var.append(modality)
            pearsonRs_with_var.append(pearson_r_over_bThr[vars_over_bThr.index(var)])
    modalities1.append(modalities_with_var)
    pearson_rs_list1.append(pearsonRs_with_var)
    
df_vars1_over_bThr = pd.DataFrame.from_dict({
    'names': unique_vars1_over_bTHr,
    'Categories': unique_vars1_over_bThr_categories,
    'modalities': modalities1,
    'pearson_r': pearson_rs_list1
})

Number of unique vars across all modalities:  214


In [22]:
# IDP_df0 = pd.read_pickle('male_IDP_testB.pkl')
vars_df0 = pd.read_pickle('female_vars_testB.pkl')

In [23]:
vars0_over_bThr_list = []

for idx in range(len(vars_df0)):
    df = vars_df0.iloc[idx].dataframe
    bThr = vars_df0.iloc[idx].bonf
    vars_over_bThr = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False).names.to_list()
    for var in vars_over_bThr:
        vars0_over_bThr_list.append(var)

In [24]:
print("Number of unique vars across all modalities: ", len(set(vars0_over_bThr_list)))

Number of unique vars across all modalities:  149


In [25]:
unique_vars0_over_bTHr = list(set(vars0_over_bThr_list))
unique_vars0_over_bThr_categories = []

for var in unique_vars0_over_bTHr:
    idx = list(varsHeader).index(var)
    category = vars_categories[idx]
    unique_vars0_over_bThr_categories.append(category)
    
modalities0 = []
pearson_rs_list0 = []
for var in unique_vars0_over_bTHr:
    modalities_with_var = []
    pearsonRs_with_var = []
    for idx in range(len(vars_df0)):
        df = vars_df0.iloc[idx].dataframe
        modality = vars_df0.iloc[idx].modality
        bThr = vars_df0.iloc[idx].bonf
        df = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False)
        vars_over_bThr = df.names.to_list()
        pearson_r_over_bThr = df.pearson_r.to_list()
        if var in vars_over_bThr:
            modalities_with_var.append(modality)
            pearsonRs_with_var.append(pearson_r_over_bThr[vars_over_bThr.index(var)])
    modalities0.append(modalities_with_var)
    pearson_rs_list0.append(pearsonRs_with_var)

In [26]:
df_vars0_over_bThr = pd.DataFrame.from_dict({
    'names': unique_vars0_over_bTHr,
    'Categories': unique_vars0_over_bThr_categories,
    'modalities': modalities0,
    'pearson_r': pearson_rs_list0
})

In [27]:
df_vars0_over_bThr[df_vars0_over_bThr.names=='Alcohol intake frequency. (2.0)']

Unnamed: 0,names,Categories,modalities,pearson_r
53,Alcohol intake frequency. (2.0),Alcohol,"[tbss_FA, tbss_OD]","[0.09392468672552293, 0.09438980773468608]"


In [28]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        print('Var: {}'.format(name))
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()
        print('   Modalities:')
        
        prs_abs = np.abs(prs).tolist()
        _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
        _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
        
        for i, mod in enumerate(mods):
            print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        if len(mods0) == 0:
            print('======== ATTENTION! NEW VAR ! ==========')
        else:
            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

#         for i, mod in enumerate(mods0):
#             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
            print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
        
    print('\n')

Category: Alcohol
------------------------------------------------
Var: Alcohol intake frequency. (0.0)
   Modalities:
   -> tbss_L1_s (0.122)
   -> tbss_ISOVF_s (0.097)
   -> tbss_L2_s (0.094)
   -> tbss_FA (0.100)
Var: Alcohol intake frequency. (2.0)
   Modalities:
   -> tbss_L1_s (0.118)
   -> tbss_ISOVF_s (0.112)
   -> tbss_L2_s (0.100)
   -> tbss_L3_s (0.099)
   -> tbss_ICVF_s (0.099)
   -> tbss_MD_s (0.096)
   -> tbss_FA_s (0.095)
   -> tbss_OD_s (0.094)
   -> tbss_OD (0.094)
Var: Alcohol intake versus 10 years previously (0.0)
   Modalities:
   -> tbss_MD (-0.098)
Var: Average weekly spirits intake (2.0)
   Modalities:
   -> tbss_L2 (0.116)


Category: Blood Assays
------------------------------------------------
Var: C-reactive protein (0.0)
   Modalities:
   -> rsfmri_0 (-0.097)
   -> rsfmri_2 (-0.108)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Diastolic blood pressure, automated reading (0.0)
   Modalities:
   -> T2_l

   Modalities:
   -> rsfmri_11 (-0.114)
   -> rsfmri_2 (-0.109)
   -> rsfmri_20 (-0.101)
   -> rsfmri_21 (-0.100)
   -> rsfmri_0 (-0.094)
   -> rsfmri_10 (-0.094)
   -> rsfmri_11 (-0.095)
Var: Weight (pre-imaging) (2.0)
   Modalities:
   -> rsfmri_11 (-0.129)
   -> rsfmri_21 (-0.122)
   -> rsfmri_2 (-0.116)
   -> rsfmri_13 (-0.107)
   -> rsfmri_10 (-0.107)
   -> rsfmri_0 (-0.101)
   -> rsfmri_5 (-0.100)
   -> rsfmri_17 (-0.099)
   -> rsfmri_20 (-0.099)
   -> rsfmri_18 (-0.098)
   -> tbss_OD_s (-0.093)
   -> rsfmri_11 (-0.132)
Var: Whole body fat mass (0.0)
   Modalities:
   -> rsfmri_11 (-0.120)
   -> rsfmri_2 (-0.118)
   -> rsfmri_10 (-0.104)
   -> rsfmri_0 (-0.104)
   -> rsfmri_21 (-0.102)
   -> rsfmri_20 (-0.100)
   -> rsfmri_13 (-0.096)
   -> tbss_OD_s (-0.095)
   -> rsfmri_7 (-0.094)
   -> rsfmri_11 (-0.104)
Var: Whole body fat mass (2.0)
   Modalities:
   -> rsfmri_11 (-0.135)
   -> rsfmri_2 (-0.122)
   -> rsfmri_21 (-0.120)
   -> rsfmri_13 (-0.115)
   -> rsfmri_10 (-0.111)
   ->

   Modalities:
   -> rsfmri_21 (-0.220)
   -> rsfmri_0 (-0.218)
   -> rsfmri_11 (-0.211)
   -> rsfmri_9 (-0.207)
   -> rsfmri_5 (-0.206)
   -> rsfmri_13 (-0.199)
   -> rsfmri_2 (-0.196)
   -> rsfmri_16 (-0.196)
   -> rsfmri_12 (-0.194)
   -> rsfmri_10 (-0.194)
   -> rsfmri_6 (-0.189)
   -> rsfmri_17 (-0.186)
   -> rsfmri_7 (-0.186)
   -> rsfmri_1 (-0.179)
   -> rsfmri_20 (-0.176)
   -> rsfmri_4 (-0.176)
   -> rsfmri_8 (-0.176)
   -> rsfmri_24 (-0.176)
   -> rsfmri_18 (-0.175)
   -> rsfmri_22 (-0.164)
   -> tfmri_5 (-0.164)
   -> rsfmri_15 (-0.159)
   -> rsfmri_14 (-0.156)
   -> rsfmri_19 (-0.150)
   -> swi (-0.149)
   -> tfmri_2 (-0.148)
   -> tfmri_1 (-0.145)
   -> jacobian (-0.145)
   -> tfmri_c_5 (-0.142)
   -> tfmri_c_2 (-0.136)
   -> tbss_L1 (-0.128)
   -> rsfmri_23 (-0.127)
   -> tfmri_c_1 (-0.126)
   -> tbss_MD (-0.122)
   -> tbss_FA (-0.122)
   -> vbm (-0.121)
   -> rsfmri_3 (-0.117)
   -> tbss_ISOVF_s (-0.113)
   -> tbss_MO (-0.111)
   -> tbss_OD (-0.111)
   -> tbss_L3 (-0.108

   Modalities:
   -> rsfmri_0 (-0.272)
   -> rsfmri_21 (-0.268)
   -> rsfmri_5 (-0.253)
   -> rsfmri_13 (-0.246)
   -> rsfmri_11 (-0.240)
   -> rsfmri_6 (-0.240)
   -> rsfmri_16 (-0.240)
   -> rsfmri_2 (-0.235)
   -> rsfmri_4 (-0.232)
   -> rsfmri_9 (-0.231)
   -> rsfmri_10 (-0.230)
   -> rsfmri_7 (-0.228)
   -> rsfmri_12 (-0.227)
   -> rsfmri_20 (-0.227)
   -> rsfmri_17 (-0.222)
   -> rsfmri_24 (-0.220)
   -> rsfmri_1 (-0.219)
   -> tfmri_5 (-0.216)
   -> rsfmri_8 (-0.213)
   -> rsfmri_22 (-0.212)
   -> rsfmri_14 (-0.205)
   -> rsfmri_15 (-0.205)
   -> rsfmri_18 (-0.202)
   -> tfmri_2 (-0.199)
   -> rsfmri_19 (-0.193)
   -> rsfmri_23 (-0.184)
   -> tfmri_c_5 (-0.178)
   -> tfmri_c_2 (-0.169)
   -> tfmri_1 (-0.167)
   -> swi (-0.165)
   -> tfmri_c_1 (-0.165)
   -> jacobian (-0.164)
   -> rsfmri_3 (-0.164)
   -> tbss_FA (-0.159)
   -> tbss_OD (-0.154)
   -> tbss_L1 (-0.152)
   -> tbss_MD (-0.147)
   -> tbss_L3_s (-0.141)
   -> tbss_L2_s (-0.137)
   -> tbss_MO (-0.136)
   -> tbss_MO_s (-

   -> tbss_ICVF_s (-0.122)
   -> tbss_MO_s (-0.120)
   -> tbss_FA_s (-0.118)
   -> tbss_MD_s (-0.117)
   -> tracts (-0.117)
   -> tbss_L3 (-0.114)
   -> tbss_ISOVF (-0.113)
   -> tbss_ICVF (-0.112)
   -> T1_linear (-0.111)
   -> tbss_L2 (-0.110)
   -> rsfmri_2 (-0.267)
Var: Ribs BMC (bone mineral content) (2.0)
   Modalities:
   -> rsfmri_21 (-0.221)
   -> rsfmri_11 (-0.211)
   -> rsfmri_0 (-0.209)
   -> rsfmri_16 (-0.208)
   -> rsfmri_13 (-0.206)
   -> rsfmri_2 (-0.201)
   -> rsfmri_6 (-0.192)
   -> rsfmri_17 (-0.191)
   -> rsfmri_9 (-0.189)
   -> rsfmri_22 (-0.187)
   -> rsfmri_10 (-0.183)
   -> rsfmri_14 (-0.181)
   -> rsfmri_7 (-0.180)
   -> rsfmri_18 (-0.173)
   -> rsfmri_5 (-0.171)
   -> rsfmri_12 (-0.169)
   -> rsfmri_20 (-0.168)
   -> rsfmri_4 (-0.164)
   -> rsfmri_23 (-0.162)
   -> rsfmri_1 (-0.160)
   -> rsfmri_15 (-0.159)
   -> rsfmri_3 (-0.157)
   -> tfmri_5 (-0.151)
   -> rsfmri_8 (-0.147)
   -> tfmri_c_1 (-0.143)
   -> rsfmri_19 (-0.142)
   -> rsfmri_24 (-0.139)
   -> tfm

   -> rsfmri_13 (-0.236)
   -> rsfmri_2 (-0.235)
   -> rsfmri_5 (-0.231)
   -> rsfmri_6 (-0.223)
   -> rsfmri_9 (-0.221)
   -> rsfmri_17 (-0.215)
   -> rsfmri_20 (-0.212)
   -> rsfmri_22 (-0.209)
   -> rsfmri_10 (-0.208)
   -> rsfmri_12 (-0.207)
   -> rsfmri_7 (-0.203)
   -> rsfmri_4 (-0.200)
   -> rsfmri_18 (-0.199)
   -> rsfmri_14 (-0.197)
   -> rsfmri_15 (-0.191)
   -> rsfmri_8 (-0.187)
   -> rsfmri_1 (-0.186)
   -> rsfmri_19 (-0.181)
   -> rsfmri_3 (-0.178)
   -> rsfmri_24 (-0.177)
   -> rsfmri_23 (-0.175)
   -> tfmri_5 (-0.169)
   -> tfmri_c_1 (-0.146)
   -> tfmri_c_5 (-0.145)
   -> tfmri_2 (-0.145)
   -> swi (-0.143)
   -> tfmri_c_2 (-0.141)
   -> tfmri_1 (-0.141)
   -> jacobian (-0.129)
   -> tbss_MO (-0.126)
   -> tbss_FA (-0.117)
   -> tbss_OD_s (-0.116)
   -> tbss_L1 (-0.113)
   -> tbss_MO_s (-0.111)
   -> tbss_L1_s (-0.109)
   -> tbss_L3_s (-0.104)
   -> tbss_MD (-0.103)
   -> tbss_OD (-0.102)
   -> tbss_ISOVF_s (-0.102)
   -> vbm (-0.099)
   -> rsfmri_2 (-0.235)
Var: Trunk 

In [29]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()

        if len(mods)!=0 and len(mods0)!=0:
        
            prs_abs = np.abs(prs).tolist()
            _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
            _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))

            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        
        
            print('Var: {}'.format(name))
            print('   Modalities:')
            print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
            print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
        if len(mods0) == 0:
#             print('======== ATTENTION! NEW VAR ! ==========')
            pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')

Category: Alcohol
------------------------------------------------
Var: Alcohol intake frequency. (0.0)
   Modalities:
   -> tbss_L1_s (0.122)
   -> tbss_FA (0.100)
Var: Alcohol intake frequency. (2.0)
   Modalities:
   -> tbss_L1_s (0.118)
   -> tbss_OD (0.094)


Category: Blood Assays
------------------------------------------------
Var: C-reactive protein (0.0)
   Modalities:
   -> rsfmri_0 (-0.097)
   -> rsfmri_2 (-0.108)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Systolic blood pressure, automated reading (0.1)
   Modalities:
   -> tbss_L1_s (0.113)
   -> tbss_FA_s (0.113)


Category: Medical History
------------------------------------------------


Category: Physical Measurements
------------------------------------------------
Var: Android total mass (2.0)
   Modalities:
   -> rsfmri_11 (-0.132)
   -> rsfmri_11 (-0.114)
Var: Arm fat mass (right) (0.0)
   Modalities:
   -> rsfmri_11 (-0.108)
   -> rsfmri_11 (-0.094)
Var:

Var: Legs BMC (bone mineral content) (2.0)
   Modalities:
   -> rsfmri_21 (-0.228)
   -> rsfmri_2 (-0.226)
Var: Legs BMD (bone mineral density) (2.0)
   Modalities:
   -> rsfmri_21 (-0.286)
   -> rsfmri_2 (-0.286)
Var: Pelvis BMC (bone mineral content) (2.0)
   Modalities:
   -> rsfmri_21 (-0.235)
   -> rsfmri_2 (-0.231)
Var: Pelvis BMD (bone mineral density) (2.0)
   Modalities:
   -> rsfmri_21 (-0.282)
   -> rsfmri_2 (-0.267)
Var: Ribs BMC (bone mineral content) (2.0)
   Modalities:
   -> rsfmri_21 (-0.221)
   -> rsfmri_8 (-0.180)
Var: Ribs BMD (bone mineral density) (2.0)
   Modalities:
   -> rsfmri_21 (-0.282)
   -> rsfmri_2 (-0.250)
Var: Speed of sound through heel (left) (0.0)
   Modalities:
   -> rsfmri_0 (-0.192)
   -> rsfmri_0 (-0.160)
Var: Speed of sound through heel (left) (2.0)
   Modalities:
   -> rsfmri_0 (-0.187)
   -> rsfmri_0 (-0.218)
Var: Speed of sound through heel (right) (1.0)
   Modalities:
   -> rsfmri_1 (-0.261)
   -> rsfmri_0 (-0.262)
Var: Speed of sound throug

In [30]:
counter = 0

for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()

        if len(mods)!=0 and len(mods0)!=0:
        
            prs_abs = np.abs(prs).tolist()
            _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
            _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))

            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Alcohol
------------------------------------------------
Var: Alcohol intake frequency. (0.0)
Var: Alcohol intake frequency. (2.0)


Category: Blood Assays
------------------------------------------------
Var: C-reactive protein (0.0)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Systolic blood pressure, automated reading (0.1)


Category: Medical History
------------------------------------------------


Category: Physical Measurements
------------------------------------------------
Var: Android total mass (2.0)
Var: Arm fat mass (right) (0.0)
Var: Arm fat mass (right) (2.0)
Var: Arm fat percentage (left) (2.0)
Var: Arm fat percentage (right) (2.0)
Var: Arms total mass (2.0)
Var: Body fat percentage (0.0)
Var: Body fat percentage (2.0)
Var: Body mass index (BMI) (2.0)
Var: Body surface area (2.0)
Var: Gynoid bone mass (2.0)
Var: Gynoid total mass (2.0)
Var: Hip circumference (0.0)
Var: Leg fat mass (right) (0.0)
Var: L

In [31]:
counter = 0

for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()
        mods1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].modalities.to_list()
        prs1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].pearson_r.to_list()
        
        if len(mods)!=0 and len(mods0)!=0 and len(mods1)!=0:
        
            prs_abs = np.abs(prs).tolist()
            _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
            _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))

            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
            
            prs_abs1 = np.abs(prs1).tolist()
            _, mods1 = zip(*sorted(zip(prs_abs1[0], mods1[0]), reverse=True))
            _, prs1 = zip(*sorted(zip(prs_abs1[0], prs1[0]), reverse=True))
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
#             print('   -> {} ({:.3f})'.format(mods1[0], prs1[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Alcohol
------------------------------------------------
Var: Alcohol intake frequency. (0.0)
Var: Alcohol intake frequency. (2.0)


Category: Blood Assays
------------------------------------------------
Var: C-reactive protein (0.0)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Systolic blood pressure, automated reading (0.1)


Category: Medical History
------------------------------------------------


Category: Physical Measurements
------------------------------------------------
Var: Android total mass (2.0)
Var: Arm fat mass (right) (0.0)
Var: Arm fat mass (right) (2.0)
Var: Arm fat percentage (left) (2.0)
Var: Arm fat percentage (right) (2.0)
Var: Arms total mass (2.0)
Var: Body fat percentage (0.0)
Var: Body fat percentage (2.0)
Var: Body mass index (BMI) (2.0)
Var: Body surface area (2.0)
Var: Gynoid bone mass (2.0)
Var: Gynoid total mass (2.0)
Var: Hip circumference (0.0)
Var: Leg fat mass (right) (0.0)
Var: L

In [32]:
counter = 0

for cat in sorted(list(set(df_vars1_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars1_over_bThr[df_vars1_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()
        mods1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].modalities.to_list()
        prs1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].pearson_r.to_list()
        
        if len(mods0)!=0 and len(mods1)!=0:

            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
            
            prs_abs1 = np.abs(prs1).tolist()
            _, mods1 = zip(*sorted(zip(prs_abs1[0], mods1[0]), reverse=True))
            _, prs1 = zip(*sorted(zip(prs_abs1[0], prs1[0]), reverse=True))
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
#             print('   -> {} ({:.3f})'.format(mods1[0], prs1[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Alcohol
------------------------------------------------
Var: Alcohol intake frequency. (0.0)
Var: Alcohol intake frequency. (2.0)
Var: Amount of alcohol drunk on a typical drinking day (0.0)
Var: Frequency of consuming six or more units of alcohol (0.0)
Var: Frequency of drinking alcohol (0.0)


Category: Blood Assays
------------------------------------------------
Var: C-reactive protein (0.0)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
Var: Systolic blood pressure, automated reading (0.0)
Var: Systolic blood pressure, automated reading (0.1)


Category: Cognitive Tests
------------------------------------------------


Category: Diet
------------------------------------------------


Category: Medical History
------------------------------------------------
Var: Diabetes diagnosed by doctor (0.0)
Var: Diagnoses - ICD10 (E109 - E10.9 Without complications)
Var: Diagnoses - ICD10 (E230 

In [33]:
counter = 0

for cat in sorted(list(set(df_vars1_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars1_over_bThr[df_vars1_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()
        mods1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].modalities.to_list()
        prs1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].pearson_r.to_list()
        
        if len(mods0)!=0 and len(mods1)!=0:
        
            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
            
            prs_abs1 = np.abs(prs1).tolist()
            _, mods1 = zip(*sorted(zip(prs_abs1[0], mods1[0]), reverse=True))
            _, prs1 = zip(*sorted(zip(prs_abs1[0], prs1[0]), reverse=True))
            
        else:
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
#             print('   -> {} ({:.3f})'.format(mods1[0], prs1[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Alcohol
------------------------------------------------
Var: Average weekly red wine intake (0.0)
Var: Average weekly spirits intake (2.0)


Category: Blood Assays
------------------------------------------------
Var: Mean corpuscular haemoglobin (0.0)
Var: Mean corpuscular volume (0.0)


Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.1)
Var: Cardiac output during PWA (2.0)
Var: Central pulse pressure during PWA (2.0)
Var: Central systolic blood pressure during PWA (2.0)
Var: Diastolic blood pressure, automated reading (0.0)
Var: Diastolic blood pressure, automated reading (0.1)
Var: Diastolic blood pressure, automated reading (2.0)
Var: Diastolic blood pressure, automated reading (2.1)
Var: Diastolic brachial blood pressure (2.0)
Var: End systolic pressure during PWA (2.0)
Var: End systolic pressure during PWA (2.1)
Var: Mean arterial pressure during PWA (2.0)
Var: Mean arterial pressure during

In [34]:
counter = 0

for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].modalities.to_list()
        prs1 = df_vars1_over_bThr[(df_vars1_over_bThr.Categories == cat) & (df_vars1_over_bThr.names == name)].pearson_r.to_list()
        
        if len(mods)!=0 and len(mods1)!=0:
        
            prs_abs = np.abs(prs).tolist()
            _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
            _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
            
            prs_abs1 = np.abs(prs1).tolist()
            _, mods1 = zip(*sorted(zip(prs_abs1[0], mods1[0]), reverse=True))
            _, prs1 = zip(*sorted(zip(prs_abs1[0], prs1[0]), reverse=True))
            
        else:
        
            counter += 1
        
            print('Var: {}'.format(name))
#             print('   Modalities:')
#             print('   -> {} ({:.3f})'.format(mods[0], prs[0]))
#             print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
#             print('   -> {} ({:.3f})'.format(mods1[0], prs1[0]))
        

        
#         for i, mod in enumerate(mods):
#             print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        
#         if len(mods0) == 0:
# #             print('======== ATTENTION! NEW VAR ! ==========')
#             pass
#         else:
#             prs_abs0 = np.abs(prs0).tolist()
#             _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
#             _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

# #         for i, mod in enumerate(mods0):
# #             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
#             
        
    print('\n')
    
print('COUNTER=', counter)

Category: Alcohol
------------------------------------------------
Var: Alcohol intake versus 10 years previously (0.0)


Category: Blood Assays
------------------------------------------------


Category: Cardiac & Circulartory Measurements
------------------------------------------------


Category: Medical History
------------------------------------------------
Var: Diagnoses - secondary ICD10 (C786 - C78.6 Secondary malignant neoplasm of retroperitoneum and peritoneum)
Var: Diagnoses - secondary ICD10 (D125 - D12.5 Sigmoid colon)
Var: Operative procedures - OPCS4 (K491 - K49.1 Percutaneous transluminal balloon angioplasty of one coronary artery)
Var: Operative procedures - main OPCS4 (K491 - K49.1 Percutaneous transluminal balloon angioplasty of one coronary artery)


Category: Physical Measurements
------------------------------------------------
Var: Leg fat percentage (right) (1.0)


Category: Skeletal Measurements
------------------------------------------------
Var: Heel quan