In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import sys
import argparse
import h5py
from scipy.stats import t as student_t
from statsmodels.stats import multitest as mt
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [2]:
# IDP_df = pd.read_pickle('male_IDP_prototype.pkl')
# vars_df = pd.read_pickle('male_vars_prototype.pkl')

IDP_df = pd.read_pickle('female_feature_direction_ICA_deltas_IDP_deconf.pkl')
vars_df = pd.read_pickle('female_feature_direction_ICA_deltas_nIDP_deconf.pkl')

IDP_names = np.loadtxt("IDP_names.txt", dtype=str, delimiter='\n')
IDP_categories = np.loadtxt("IDP_categories.txt", dtype=str, delimiter='\n')
with open('varsHeader.txt') as f:
    varsHeader = f.readlines()
    varsHeader = [l.strip('\n\r') for l in varsHeader]
    varsHeader = np.array(varsHeader)
vars_categories = np.loadtxt("vars_categories.txt", dtype=str, delimiter='\n')

In [3]:
vars_df.head()

Unnamed: 0,modality,bonf,fdr,dataframe
0,IC = 0,5.509794,3.969847,idx ...
1,IC = 1,5.509794,3.827137,idx ...
2,IC = 2,5.509794,3.934873,idx ...
3,IC = 3,5.509794,4.931917,idx ...
4,IC = 4,5.509794,,idx ...


In [4]:
vars_df.iloc[0].dataframe.head()

Unnamed: 0,idx,names,Categories,pearson_r,t_test_statistic,p_values_corrected,p_values,abs_pearson_r,log_p_values,log_p_values_corrected
0,0,Ethnic background (0.0),Ethnic Background,0.002512,0.177782,1.0,0.858902,0.002512,0.066057,-0.0
1,1,Ethnic background (1.0),Ethnic Background,0.067597,1.958962,0.795428,0.050449,0.067597,1.297145,0.099399
2,2,Ethnic background (2.0),Ethnic Background,0.036595,1.381387,0.953106,0.167377,0.036595,0.776304,0.020859
3,3,Genotype measurement batch (0.0),Genetic Markers,-0.025342,1.770507,0.852343,0.076705,0.025342,1.115176,0.069385
4,4,Heterozygosity (0.0),Genetic Markers,0.002527,0.176523,1.0,0.859891,0.002527,0.065557,-0.0


In [5]:
vars_over_bThr_list = []

for idx in range(len(vars_df)):
    df = vars_df.iloc[idx].dataframe
    bThr = vars_df.iloc[idx].bonf
    vars_over_bThr = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False).names.to_list()
    for var in vars_over_bThr:
        vars_over_bThr_list.append(var)

In [6]:
print("Number of unique vars across all modalities: ", len(set(vars_over_bThr_list)))

Number of unique vars across all modalities:  39


In [7]:
unique_vars_over_bTHr = list(set(vars_over_bThr_list))

In [8]:
unique_vars_over_bThr_categories = []

for var in unique_vars_over_bTHr:
    idx = list(varsHeader).index(var)
    category = vars_categories[idx]
    unique_vars_over_bThr_categories.append(category)

In [9]:
sorted(unique_vars_over_bTHr)

['Arms BMC (bone mineral content) (2.0)',
 'Arms BMD (bone mineral density) (2.0)',
 'Cardiac index during PWA (2.0)',
 'Cardiac output during PWA (2.0)',
 'Diabetes diagnosed by doctor (2.0)',
 'Femur neck BMD (bone mineral density) (left) (2.0)',
 'Femur neck BMD (bone mineral density) (right) (2.0)',
 'Femur neck BMD (bone mineral density) T-score (left) (2.0)',
 'Femur neck BMD (bone mineral density) T-score (right) (2.0)',
 'Femur shaft BMD (bone mineral density) (left) (2.0)',
 'Femur shaft BMD (bone mineral density) (right) (2.0)',
 'Femur total BMD (bone mineral density) (left) (2.0)',
 'Femur total BMD (bone mineral density) (right) (2.0)',
 'Femur total BMD (bone mineral density) T-score (left) (2.0)',
 'Femur total BMD (bone mineral density) T-score (right) (2.0)',
 'Femur troch BMD (bone mineral density) (left) (2.0)',
 'Femur troch BMD (bone mineral density) (right) (2.0)',
 'Femur troch BMD (bone mineral density) T-score (left) (2.0)',
 'Femur troch BMD (bone mineral dens

In [10]:
modalities = []
pearson_rs_list = []
for var in unique_vars_over_bTHr:
    modalities_with_var = []
    pearsonRs_with_var = []
    for idx in range(len(vars_df)):
        df = vars_df.iloc[idx].dataframe
        modality = vars_df.iloc[idx].modality
        bThr = vars_df.iloc[idx].bonf
        df = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False)
        vars_over_bThr = df.names.to_list()
        pearson_r_over_bThr = df.pearson_r.to_list()
        if var in vars_over_bThr:
            modalities_with_var.append(modality)
            pearsonRs_with_var.append(pearson_r_over_bThr[vars_over_bThr.index(var)])
    modalities.append(modalities_with_var)
    pearson_rs_list.append(pearsonRs_with_var)

In [11]:
len(modalities)

39

In [12]:
df_vars_over_bThr = pd.DataFrame.from_dict({
    'names': unique_vars_over_bTHr,
    'Categories': unique_vars_over_bThr_categories,
    'modalities': modalities,
    'pearson_r': pearson_rs_list
})

In [13]:
df_vars_over_bThr.head()

Unnamed: 0,names,Categories,modalities,pearson_r
0,L1-L4 BMC (bone mineral content) (2.0),Skeletal Measurements,"[IC = 0, IC = 1, IC = 2]","[0.0936307009229699, -0.09072726440664206, 0.0..."
1,Cardiac output during PWA (2.0),Cardiac & Circulartory Measurements,[IC = 1],[-0.07174121597437325]
2,Spine BMC (bone mineral content) (2.0),Skeletal Measurements,"[IC = 0, IC = 1, IC = 2]","[0.07874222081926234, -0.08806966752290066, 0...."
3,Femur total BMD (bone mineral density) T-score...,Skeletal Measurements,"[IC = 0, IC = 1]","[0.07912080839431165, -0.08105849550785356]"
4,Head BMC (bone mineral content) (2.0),Skeletal Measurements,"[IC = 0, IC = 1, IC = 2]","[0.12709496361993075, -0.12560390521316778, 0...."


In [14]:
set(df_vars_over_bThr.Categories.to_list())

{'Cardiac & Circulartory Measurements',
 'Medical History',
 'Skeletal Measurements'}

In [15]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        print('Var: {}'.format(name))
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        print('   Modalities:')
        for i, mod in enumerate(mods[0]):
            print('   -> {} ({:.3f})'.format(mod, prs[0][i]))
        
    print('\n')

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
   Modalities:
   -> IC = 1 (-0.074)
Var: Cardiac output during PWA (2.0)
   Modalities:
   -> IC = 1 (-0.072)


Category: Medical History
------------------------------------------------
Var: Diabetes diagnosed by doctor (2.0)
   Modalities:
   -> IC = 3 (-0.068)


Category: Skeletal Measurements
------------------------------------------------
Var: Arms BMC (bone mineral content) (2.0)
   Modalities:
   -> IC = 0 (0.084)
   -> IC = 1 (-0.077)
   -> IC = 2 (0.070)
Var: Arms BMD (bone mineral density) (2.0)
   Modalities:
   -> IC = 0 (0.099)
   -> IC = 1 (-0.094)
   -> IC = 2 (0.079)
Var: Femur neck BMD (bone mineral density) (left) (2.0)
   Modalities:
   -> IC = 0 (0.084)
Var: Femur neck BMD (bone mineral density) (right) (2.0)
   Modalities:
   -> IC = 0 (0.080)
Var: Femur neck BMD (bone mineral density) T-score (left) (2.0)
   Modalities:
   -> IC = 0 

In [16]:
sorted(prs[0], reverse=True)

[0.11141622746904599, 0.09007906148135993, -0.10492006853245371]

In [17]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        print('Var: {}'.format(name))
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        print('   Modalities:')
        
        prs_abs = np.abs(prs).tolist()
        _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
        _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
        
        for i, mod in enumerate(mods):
            print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
    print('\n')

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
   Modalities:
   -> IC = 1 (-0.074)
Var: Cardiac output during PWA (2.0)
   Modalities:
   -> IC = 1 (-0.072)


Category: Medical History
------------------------------------------------
Var: Diabetes diagnosed by doctor (2.0)
   Modalities:
   -> IC = 3 (-0.068)


Category: Skeletal Measurements
------------------------------------------------
Var: Arms BMC (bone mineral content) (2.0)
   Modalities:
   -> IC = 0 (0.084)
   -> IC = 1 (-0.077)
   -> IC = 2 (0.070)
Var: Arms BMD (bone mineral density) (2.0)
   Modalities:
   -> IC = 0 (0.099)
   -> IC = 1 (-0.094)
   -> IC = 2 (0.079)
Var: Femur neck BMD (bone mineral density) (left) (2.0)
   Modalities:
   -> IC = 0 (0.084)
Var: Femur neck BMD (bone mineral density) (right) (2.0)
   Modalities:
   -> IC = 0 (0.080)
Var: Femur neck BMD (bone mineral density) T-score (left) (2.0)
   Modalities:
   -> IC = 0 

In [18]:
variance_threshold = 0.1

for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        
        
        prs_abs = np.abs(prs).tolist()
        _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
        _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
        
        if prs[0] >= variance_threshold:
            print('Var: {}'.format(name))
            print('   Modalities:')
            for i, mod in enumerate(mods):
                if prs[i] >= variance_threshold:
                    print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
    print('\n')

Category: Cardiac & Circulartory Measurements
------------------------------------------------


Category: Medical History
------------------------------------------------


Category: Skeletal Measurements
------------------------------------------------
Var: Head BMC (bone mineral content) (2.0)
   Modalities:
   -> IC = 0 (0.127)
   -> IC = 2 (0.109)
Var: Head BMD (bone mineral density) (2.0)
   Modalities:
   -> IC = 0 (0.152)
   -> IC = 2 (0.113)
Var: Legs BMD (bone mineral density) (2.0)
   Modalities:
   -> IC = 0 (0.102)
Var: Pelvis BMD (bone mineral density) (2.0)
   Modalities:
   -> IC = 0 (0.102)
Var: Spine BMD (bone mineral density) (2.0)
   Modalities:
   -> IC = 0 (0.107)
Var: Total BMC (bone mineral content) (2.0)
   Modalities:
   -> IC = 0 (0.107)
Var: Total BMD (bone mineral density) (2.0)
   Modalities:
   -> IC = 0 (0.133)
   -> IC = 2 (0.105)
Var: Total BMD (bone mineral density) T-score (2.0)
   Modalities:
   -> IC = 0 (0.134)
   -> IC = 2 (0.106)
Var: Trunk BMD 

In [19]:
mods

('IC = 0', 'IC = 1', 'IC = 2')

In [20]:
counter = 0
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()[0]
        if 'ElasticNetFull' in mods or 'ElasticNetFull' in mods or 'ElasticNetFull' in mods:
            print('Var: {}'.format(name))
            counter += 1
        else:
            pass
    print('\n')
    
print("There are {} vars independent from the conventional modalities".format(counter))

Category: Cardiac & Circulartory Measurements
------------------------------------------------


Category: Medical History
------------------------------------------------


Category: Skeletal Measurements
------------------------------------------------


There are 0 vars independent from the conventional modalities


In [25]:
IDP_df0 = pd.read_pickle('female_IDP.pkl')
vars_df0 = pd.read_pickle('female_vars.pkl')

In [26]:
vars0_over_bThr_list = []

for idx in range(len(vars_df0)):
    df = vars_df0.iloc[idx].dataframe
    bThr = vars_df0.iloc[idx].bonf
    vars_over_bThr = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False).names.to_list()
    for var in vars_over_bThr:
        vars0_over_bThr_list.append(var)

In [27]:
print("Number of unique vars across all modalities: ", len(set(vars0_over_bThr_list)))

Number of unique vars across all modalities:  214


In [28]:
unique_vars0_over_bTHr = list(set(vars0_over_bThr_list))
unique_vars0_over_bThr_categories = []

for var in unique_vars0_over_bTHr:
    idx = list(varsHeader).index(var)
    category = vars_categories[idx]
    unique_vars0_over_bThr_categories.append(category)
    
modalities0 = []
pearson_rs_list0 = []
for var in unique_vars0_over_bTHr:
    modalities_with_var = []
    pearsonRs_with_var = []
    for idx in range(len(vars_df0)):
        df = vars_df0.iloc[idx].dataframe
        modality = vars_df0.iloc[idx].modality
        bThr = vars_df0.iloc[idx].bonf
        df = df[df.log_p_values>=bThr].sort_values('log_p_values', ascending=False)
        vars_over_bThr = df.names.to_list()
        pearson_r_over_bThr = df.pearson_r.to_list()
        if var in vars_over_bThr:
            modalities_with_var.append(modality)
            pearsonRs_with_var.append(pearson_r_over_bThr[vars_over_bThr.index(var)])
    modalities0.append(modalities_with_var)
    pearson_rs_list0.append(pearsonRs_with_var)

In [29]:
df_vars0_over_bThr = pd.DataFrame.from_dict({
    'names': unique_vars0_over_bTHr,
    'Categories': unique_vars0_over_bThr_categories,
    'modalities': modalities0,
    'pearson_r': pearson_rs_list0
})

In [30]:
df_vars0_over_bThr[df_vars0_over_bThr.names=='Alcohol intake frequency. (2.0)']

Unnamed: 0,names,Categories,modalities,pearson_r
170,Alcohol intake frequency. (2.0),Alcohol,"[jacobian, vbm, swi, rsfmri_0, rsfmri_7, rsfmr...","[0.07935210185576791, 0.07653801567002164, 0.0..."


In [31]:
for cat in sorted(list(set(df_vars_over_bThr.Categories.to_list()))):
    print("Category: {}".format(cat))
    print('------------------------------------------------')
    names = df_vars_over_bThr[df_vars_over_bThr.Categories == cat].names.to_list()
    for name in sorted(names):
        print('Var: {}'.format(name))
        mods = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].modalities.to_list()
        prs = df_vars_over_bThr[(df_vars_over_bThr.Categories == cat) & (df_vars_over_bThr.names == name)].pearson_r.to_list()
        mods0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].modalities.to_list()
        prs0 = df_vars0_over_bThr[(df_vars0_over_bThr.Categories == cat) & (df_vars0_over_bThr.names == name)].pearson_r.to_list()
        print('   Modalities:')
        
        prs_abs = np.abs(prs).tolist()
        _, mods = zip(*sorted(zip(prs_abs[0], mods[0]), reverse=True))
        _, prs = zip(*sorted(zip(prs_abs[0], prs[0]), reverse=True))
        
        for i, mod in enumerate(mods):
            print('   -> {} ({:.3f})'.format(mod, prs[i]))
        
        
        if len(mods0) == 0:
            print('======== ATTENTION! NEW VAR ! ==========')
        else:
            prs_abs0 = np.abs(prs0).tolist()
            _, mods0 = zip(*sorted(zip(prs_abs0[0], mods0[0]), reverse=True))
            _, prs0 = zip(*sorted(zip(prs_abs0[0], prs0[0]), reverse=True))
        

#         for i, mod in enumerate(mods0):
#             print('   -> {} ({:.3f})'.format(mod, prs0[i]))
            print('   -> {} ({:.3f})'.format(mods0[0], prs0[0]))
        
    print('\n')

Category: Cardiac & Circulartory Measurements
------------------------------------------------
Var: Cardiac index during PWA (2.0)
   Modalities:
   -> IC = 1 (-0.074)
   -> tbss_L1_s (0.091)
Var: Cardiac output during PWA (2.0)
   Modalities:
   -> IC = 1 (-0.072)
   -> tbss_L1_s (0.073)


Category: Medical History
------------------------------------------------
Var: Diabetes diagnosed by doctor (2.0)
   Modalities:
   -> IC = 3 (-0.068)
   -> T2_nonlinear (0.067)


Category: Skeletal Measurements
------------------------------------------------
Var: Arms BMC (bone mineral content) (2.0)
   Modalities:
   -> IC = 0 (0.084)
   -> IC = 1 (-0.077)
   -> IC = 2 (0.070)
   -> rsfmri_2 (-0.252)
Var: Arms BMD (bone mineral density) (2.0)
   Modalities:
   -> IC = 0 (0.099)
   -> IC = 1 (-0.094)
   -> IC = 2 (0.079)
   -> rsfmri_2 (-0.299)
Var: Femur neck BMD (bone mineral density) (left) (2.0)
   Modalities:
   -> IC = 0 (0.084)
   -> rsfmri_5 (-0.248)
Var: Femur neck BMD (bone mineral dens