In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from os import makedirs
from os.path import join, exists
from nilearn.plotting import plot_connectome, plot_roi, find_parcellation_cut_coords
import bct
import datetime
from nilearn.mass_univariate import permuted_ols
from scipy.stats import pearsonr, spearmanr
from sklearn.impute import KNNImputer

sns.set(context='poster', style='ticks')

In [2]:
crayons_l = sns.crayon_palette(['Vivid Tangerine', 'Cornflower'])
crayons_d = sns.crayon_palette(['Brick Red', 'Midnight Blue'])
grays = sns.light_palette('#999999', n_colors=3, reverse=True)

f_2 = sns.crayon_palette(['Red Orange', 'Vivid Tangerine'])
m_2 = sns.crayon_palette(['Cornflower', 'Cerulean'])

In [3]:
def jili_sidak_mc(data, alpha):
    import math
    import numpy as np

    mc_corrmat = data.corr()
    mc_corrmat.fillna(0, inplace=True)
    eigvals, eigvecs = np.linalg.eig(mc_corrmat)

    M_eff = 0
    for eigval in eigvals:
        if abs(eigval) >= 0:
            if abs(eigval) >= 1:
                M_eff += 1
            else:
                M_eff += abs(eigval) - math.floor(abs(eigval))
        else:
            M_eff += 0
    print('Number of effective comparisons: {0}'.format(M_eff))

    # and now applying M_eff to the Sidak procedure
    sidak_p = 1 - (1 - alpha)**(1/M_eff)
    if sidak_p < 0.00001:
        print('Critical value of {:.3f}'.format(
            alpha), 'becomes {:2e} after corrections'.format(sidak_p))
    else:
        print('Critical value of {:.3f}'.format(
            alpha), 'becomes {:.6f} after corrections'.format(sidak_p))
    return sidak_p, M_eff

In [4]:
subjects = ['101', '102', '103', '104', '106', '107', '108', '110', '212', '213',
            '214', '215', '216', '217', '218', '219', '320', '321', '322', '323',
            '324', '325', '327', '328', '329', '330', '331', '332', '333', '334',
            '335', '336', '337', '338', '339', '340', '341', '342', '343', '344',
            '345', '346', '347', '348', '349', '350', '451', '452', '453', '455',
            '456', '457', '458', '459', '460', '462', '463', '464', '465', '467',
            '468', '469', '470', '502', '503', '571', '572', '573', '574', '575',
            '577', '578', '579', '580', '581', '582', '584', '585', '586', '587',
            '588', '589', '590', '591', '592', '593', '594', '595', '596', '597',
            '598', '604', '605', '606', '607', '608', '609', '610', '611', '612',
            '613', '614', '615', '616', '617', '618', '619', '620', '621', '622',
            '623', '624', '625', '626', '627', '628', '629', '630', '631', '633',
            '634']
#subjects = ['101', '102']

sink_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output'
fig_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/figures/'

shen = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/shen2015_2mm_268_parcellation.nii.gz'
craddock = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz'
masks = ['shen2015', 'craddock2012']

tasks = {'retr': [{'conditions': ['Physics', 'General']},
                  {'runs': [0, 1]}],
         'fci': [{'conditions': ['Physics', 'NonPhysics']},
                 {'runs': [0, 1, 2]}]}

sessions = [0, 1]
sesh = ['pre', 'post']
conds = ['high-level', 'lower-level']
iqs = ['VCI', 'WMI', 'PRI', 'PSI', 'FSIQ']

index = pd.MultiIndex.from_product([subjects, sessions, tasks, conds, masks],
                                   names=['subject', 'session', 'task', 'condition', 'mask'])

In [5]:
data_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/data'
b_df = pd.read_csv(
    join(data_dir, 'rescored', 'physics_learning-nonbrain_OLS-imputed.csv'), index_col=0, header=0)

In [6]:
b_df['SexXClass'] = b_df['F'] * b_df['Mod']
for iq in iqs:
    b_df['{0}2XSex'.format(iq)] = b_df['F'] * b_df['{0}2'.format(iq)]
    b_df['{0}2XClass'.format(iq)] = b_df['Mod'] * b_df['{0}2'.format(iq)]
    b_df['{0}2XClassXSex'.format(iq)] = b_df['F'] * b_df['Mod'] * b_df['{0}2'.format(iq)]
    b_df['delta{0}XSex'.format(iq)] = b_df['F'] * b_df['delta{0}'.format(iq)]
    b_df['delta{0}XClass'.format(iq)] = b_df['Mod'] * b_df['delta{0}'.format(iq)]
    b_df['delta{0}XClassXSex'.format(iq)] = b_df['F'] * b_df['Mod'] * b_df['delta{0}'.format(iq)]


In [7]:
head_size = pd.read_csv(join(
    data_dir, 'head-size_2019-05-29 15:19:53.287525.csv'), index_col=0, header=0)
head_size['normalized head size'] = (head_size['average_head_size']-np.mean(
    head_size['average_head_size']))/np.std(head_size['average_head_size'])

In [8]:
fd = pd.read_csv(join(
    data_dir, 'avg-fd-per-condition-per-run_2019-05-29.csv'), index_col=0, header=0)
fd['normalized fd'] = (
    fd['average fd']-np.mean(fd['average fd']))/np.std(fd['average fd'])
retr_fd = fd[fd['task'] == 'retr']
fci_fd = fd[fd['task'] == 'fci']

df_pivot = retr_fd[retr_fd['condition'] == 'high-level'].reset_index()
retr_phys_fd = df_pivot.pivot(
    index='subject', columns='session', values='average fd')
retr_phys_fd.rename(
    {'pre': 'pre phys retr fd', 'post': 'post phys retr fd'}, axis=1, inplace=True)

df_pivot = retr_fd[retr_fd['condition'] == 'lower-level'].reset_index()
retr_genr_fd = df_pivot.pivot(
    index='subject', columns='session', values='average fd')
retr_genr_fd.rename(
    {'pre': 'pre gen retr fd', 'post': 'post gen retr fd'}, axis=1, inplace=True)

df_pivot = fci_fd[fci_fd['condition'] == 'high-level'].reset_index()
fci_phys_fd = df_pivot.pivot(
    index='subject', columns='session', values='average fd')
fci_phys_fd.rename(
    {'pre': 'pre phys fci fd', 'post': 'post phys fci fd'}, axis=1, inplace=True)

df_pivot = fci_fd[fci_fd['condition'] == 'lower-level'].reset_index()
fci_ctrl_fd = df_pivot.pivot(
    index='subject', columns='session', values='average fd')
fci_ctrl_fd.rename(
    {'pre': 'pre ctrl fci fd', 'post': 'post ctrl fci fd'}, axis=1, inplace=True)

rest_fd = pd.read_csv(
    join(data_dir, 'avg-fd-per-run-rest_2019-05-31.csv'), index_col=0, header=0)
rest_fd['normalized fd'] = (
    rest_fd['average fd']-np.mean(rest_fd['average fd']))/np.std(rest_fd['average fd'])

df_pivot = rest_fd.reset_index()
rest_fd = df_pivot.pivot(
    index='subject', columns='session', values='normalized fd')
rest_fd.rename({'pre': 'pre rest fd', 'post': 'post rest fd'},
               axis=1, inplace=True)

In [9]:
big_df = pd.concat([b_df, retr_phys_fd, retr_genr_fd,
                    fci_phys_fd, fci_ctrl_fd], axis=1)

## First, we'll test connectivity during the physics knowledge task
We'll run the permuted OLS regressions with few permutations for a first pass look at how brain connectivity explains variance in different subscores of the WAIS. Significant regressions at this step will be re-run later with more permutations, for more accurate <i>p</i>- and <i>t</i>-values. This is a more efficient use of computational resources than running all possible regressions with many permutations right off the bat.

In [10]:
# read in every person's connectivity matrix (yikes)
# one task & condition at a time, I think. otherwise it becomes a memory issue
post_retr_conn = pd.DataFrame(columns=np.arange(0, 268**2))

for subject in subjects:
    try:
        corrmat = np.genfromtxt(join(sink_dir, 'corrmats', '{0}-session-1_retr-Physics_shen2015-corrmat.csv'.format(subject)),
                                delimiter=' ')
        post_retr_conn.at[subject] = np.ravel(corrmat, order='F')
    except Exception as e:
        print(subject, e)

213 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/213-session-1_retr-Physics_shen2015-corrmat.csv not found.
217 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/217-session-1_retr-Physics_shen2015-corrmat.csv not found.
322 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/322-session-1_retr-Physics_shen2015-corrmat.csv not found.
329 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/329-session-1_retr-Physics_shen2015-corrmat.csv not found.
332 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/332-session-1_retr-Physics_shen2015-corrmat.csv not found.
452 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/452-session-1_retr-Physics_shen2015-corrmat.csv not found.
456 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/456-session-1_retr-Physics_shen2015-corrmat.csv not found.
457 /Users/kbottenh/Dropbox/Projects/physics-retrieval/

In [11]:
brain_impute = KNNImputer(n_neighbors=5, weights='distance')
imp_conns = brain_impute.fit_transform(post_retr_conn)
imp_conn_df = pd.DataFrame(data=imp_conns, 
                           columns=post_retr_conn.columns, 
                           index=post_retr_conn.index)

In [12]:
for column in imp_conn_df.columns:
    num = np.nonzero(imp_conn_df[column].values)[0].shape
    if num[0] <= 5:
        imp_conn_df.drop(column, axis=1, inplace=True)

In [13]:
big_df.index = big_df.index.astype(int)
imp_conn_df.index = post_retr_conn.index.astype(int)
imp_conn_df = imp_conn_df.astype('float')

all_data = pd.concat([big_df, imp_conn_df], axis=1)
all_data.dropna(how='any', axis=0, inplace=True)
conns = list(set(imp_conn_df.columns))

In [14]:
sig = {}
n_perm = 10000
retr_iqs = ['VCI2', 'WMI2', 'FSIQ2', 'deltaWMI']

for iq in retr_iqs:
    p, t, _ = permuted_ols(all_data['{0}'.format(iq)].values,
                           all_data[conns].values,
                           all_data[['{0}XSex'.format(iq),
                                     '{0}XClass'.format(iq),
                                     '{0}XClassXSex'.format(iq),
                                     'F', 'StrtLvl', 'SexXClass',
                                     'Age', 'Mod', 'post phys retr fd']].values,
                           n_perm=n_perm)
    sig['post (IQ): {0}'.format(iq)] = np.max(p[0])
    # if np.max(p) > 1:
    #    nodaleff_sig['{0}2 {1} p'.format(iq, key)] = p.T
    #    nodaleff_sig['{0}2 {1} t'.format(iq, key)] = t.T

    p, t, _ = permuted_ols(all_data['{0}XSex'.format(iq)].values,
                           all_data[conns].values,
                           all_data[['{0}'.format(iq),
                                     '{0}XClass'.format(iq),
                                     '{0}XClassXSex'.format(iq),
                                     'F', 'StrtLvl', 'SexXClass',
                                     'Age', 'Mod', 'post phys retr fd']].values,
                           n_perm=n_perm)
    sig['post (IQXSex): {0}'.format(iq)] = np.max(p[0])

    p, t, _ = permuted_ols(all_data['{0}XClass'.format(iq)].values,
                           all_data[conns].values,
                           all_data[['{0}'.format(iq),
                                     '{0}XSex'.format(iq),
                                     '{0}XClassXSex'.format(iq),
                                     'F', 'StrtLvl', 'SexXClass',
                                     'Age', 'Mod', 'post phys retr fd']].values,
                           n_perm=n_perm)

    sig['post (IQXClass): {0}'.format(iq)] = np.max(p[0])
    p, t, _ = permuted_ols(all_data['{0}XClassXSex'.format(iq)].values,
                           all_data[conns].values,
                           all_data[['{0}'.format(iq),
                                     '{0}XSex'.format(iq),
                                     '{0}XClass'.format(iq),
                                     'F', 'StrtLvl', 'SexXClass',
                                     'Age', 'Mod', 'post phys retr fd']].values,
                           n_perm=n_perm)
    sig['post (IQXSexXClass): {0}'.format(iq)] = np.max(p[0])

In [15]:
sig

{'post (IQ): VCI2': 0.5807487055234022,
 'post (IQXSex): VCI2': 0.3483778233539271,
 'post (IQXClass): VCI2': 1.007489909444727,
 'post (IQXSexXClass): VCI2': 0.16371931157011096,
 'post (IQ): WMI2': 1.0128241973688579,
 'post (IQXSex): WMI2': 0.8532954136462229,
 'post (IQXClass): WMI2': 0.7994665005220145,
 'post (IQXSexXClass): WMI2': 0.9504378146818895,
 'post (IQ): FSIQ2': 0.6010623606187315,
 'post (IQXSex): FSIQ2': 0.6273154863912672,
 'post (IQXClass): FSIQ2': 0.4759070506842941,
 'post (IQXSexXClass): FSIQ2': 2.107948824586382,
 'post (IQ): deltaWMI': 0.9423773233670334,
 'post (IQXSex): deltaWMI': 0.5698070738653522,
 'post (IQXClass): deltaWMI': 1.0570438339108221,
 'post (IQXSexXClass): deltaWMI': 0.3052627912648011}

### For all significant OLS regressions, max p-value goes in a dataframe

In [16]:
sessions = ['post', 'delta']
variables = ['IQ', 'IQXSex', 'IQXClass', 'IQXClassXSex']
index = pd.MultiIndex.from_product([sessions, iqs])
significant = pd.DataFrame(index=index)
for key in sig.keys():
    if sig[key] >= 1.5:
        #print(key, sig[key])
        sig_keys = key.split(' ')
        sesh = sig_keys[0]
        iq = sig_keys[-1]
        variable = sig_keys[1].strip('():')
        significant.at[(sesh, iq), variable] = sig[key]

In [17]:
significant.to_csv(
    join(sink_dir, 'whole_brain-retr-permuted_ols-most_sig_pval.csv'))
sig_keys = significant.dropna(how='all').index
print(sig_keys)

MultiIndex([('post', 'FSIQ2')],
           )


In [18]:
keys = []
for i in np.arange(0, len(sig_keys)):
    if sig_keys[i][0] == 'post':
        keys.append(str(sig_keys[i][1] + '2'))
    if sig_keys[i][0] == 'delta':
        keys.append(str(sig_keys[i][0] + sig_keys[i][1]))

In [19]:
shen_nii = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/shen2015_2mm_268_parcellation.nii.gz'
coordinates = find_parcellation_cut_coords(labels_img=shen_nii)

In [20]:
datetime.datetime.now().strftime("%H:%M:%S")

'17:04:16'

In [21]:
post_retr_conn = None

## And now we do it all over again for FCI

In [36]:
# read in every person's connectivity matrix (yikes)
# one task & condition at a time, I think. otherwise it becomes a memory issue
post_fci_conn = pd.DataFrame(columns=np.arange(0, 268**2))

for subject in subjects:
    try:
        corrmat = np.genfromtxt(join(sink_dir, 'corrmats', '{0}-session-1_fci-Physics_shen2015-corrmat.csv'.format(subject)),
                                delimiter=' ')
        post_fci_conn.at[subject] = np.ravel(corrmat, order='F')
    except Exception as e:
        print(subject, e)

213 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/213-session-1_fci-Physics_shen2015-corrmat.csv not found.
217 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/217-session-1_fci-Physics_shen2015-corrmat.csv not found.
322 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/322-session-1_fci-Physics_shen2015-corrmat.csv not found.
329 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/329-session-1_fci-Physics_shen2015-corrmat.csv not found.
332 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/332-session-1_fci-Physics_shen2015-corrmat.csv not found.
348 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/348-session-1_fci-Physics_shen2015-corrmat.csv not found.
452 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output/corrmats/452-session-1_fci-Physics_shen2015-corrmat.csv not found.
456 /Users/kbottenh/Dropbox/Projects/physics-retrieval/data/ou

In [37]:
brain_impute = KNNImputer(n_neighbors=5, weights='distance')
imp_conns = brain_impute.fit_transform(post_fci_conn)
imp_conn_df = pd.DataFrame(data=imp_conns, 
                           columns=post_fci_conn.columns, 
                           index=post_fci_conn.index)

In [38]:
for column in imp_conn_df.columns:
    num = np.nonzero(imp_conn_df[column].values)[0].shape
    if num[0] <= 5:
        imp_conn_df.drop(column, axis=1, inplace=True)

In [30]:
big_df.index = big_df.index.astype(int)
imp_conn_df.index = post_fci_conn.index.astype(int)
imp_conn_df = imp_conn_df.astype('float')

all_data = pd.concat([big_df, imp_conn_df], axis=1)
all_data.dropna(how='any', axis=0, inplace=True)
conns = list(set(imp_conn_df.columns))

In [39]:
sig = {}
n_perm = 10000
fci_iqs = ['VCI2', 'deltaPRI', 'deltaFSIQ']

for iq in fci_iqs:
    p, t, _ = permuted_ols(all_data['{0}'.format(iq)].values,
                           all_data[conns].values,
                           all_data[['{0}XSex'.format(iq),
                                     '{0}XClass'.format(iq),
                                     '{0}XClassXSex'.format(iq),
                                     'F', 'StrtLvl', 'SexXClass',
                                     'Age', 'Mod', 'post phys fci fd']].values,
                           n_perm=n_perm, verbose=2, n_jobs=2)
    sig['post (IQ): {0}'.format(iq)] = np.max(p[0])
    # if np.max(p) > 1:
    #    nodaleff_sig['{0}2 {1} p'.format(iq, key)] = p.T
    #    nodaleff_sig['{0}2 {1} t'.format(iq, key)] = t.T

    p, t, _ = permuted_ols(all_data['{0}XSex'.format(iq)].values,
                           all_data[conns].values,
                           all_data[['{0}'.format(iq),
                                     '{0}XClass'.format(iq),
                                     '{0}XClassXSex'.format(iq),
                                     'F', 'StrtLvl', 'SexXClass',
                                     'Age', 'Mod', 'post phys fci fd']].values,
                           n_perm=n_perm, verbose=2)
    sig['post (IQXSex): {0}'.format(iq)] = np.max(p[0])

    p, t, _ = permuted_ols(all_data['{0}XClass'.format(iq)].values,
                           all_data[conns].values,
                           all_data[['{0}'.format(iq),
                                     '{0}XSex'.format(iq),
                                     '{0}XClassXSex'.format(iq),
                                     'F', 'StrtLvl', 'SexXClass',
                                     'Age', 'Mod', 'post phys fci fd']].values,
                           n_perm=n_perm, verbose=2)

    sig['post (IQXClass): {0}'.format(iq)] = np.max(p[0])
    p, t, _ = permuted_ols(all_data['{0}XClassXSex'.format(iq)].values,
                           all_data[conns].values,
                           all_data[['{0}'.format(iq),
                                     '{0}XSex'.format(iq),
                                     '{0}XClass'.format(iq),
                                     'F', 'StrtLvl', 'SexXClass',
                                     'Age', 'Mod', 'post phys fci fd']].values,
                           n_perm=n_perm, verbose=2)
    sig['post (IQXSexXClass): {0}'.format(iq)] = np.max(p[0])

[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   2 out of   2 | elapsed:  4.7min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.8min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.8min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.8min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.8min finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   2 out of   2 | elapsed:  5.5min finished
[Parallel(n_jobs=

KeyboardInterrupt: 

In [41]:
sig

{'post (IQ): VCI2': 0.5473372007658337,
 'post (IQXSex): VCI2': 0.2745401584175471,
 'post (IQXClass): VCI2': 0.03952508449615473,
 'post (IQXSexXClass): VCI2': 0.07118689369495199,
 'post (IQ): deltaPRI': 1.793217551245013}

In [None]:
sessions = ['post', 'delta']
variables = ['IQ', 'IQXSex', 'IQXClass', 'IQXClassXSex']
index = pd.MultiIndex.from_product([sessions, iqs])
significant = pd.DataFrame(index=index)
for key in sig.keys():
    if sig[key] >= 1.5:
        #print(key, sig[key])
        sig_keys = key.split(' ')
        sesh = sig_keys[0]
        iq = sig_keys[-1]
        variable = sig_keys[1].strip('():')
        significant.at[(sesh, iq), variable] = sig[key]

In [None]:
significant.to_csv(
    join(sink_dir, 'whole_brain-fci-permuted_ols-most_sig_pval.csv'))
sig_keys = significant.dropna(how='all').index
print(sig_keys)

In [None]:
keys = []
for i in np.arange(0, len(sig_keys)):
    if sig_keys[i][0] == 'post':
        keys.append(str(sig_keys[i][1] + '2'))
    if sig_keys[i][0] == 'delta':
        keys.append(str(sig_keys[i][0] + sig_keys[0][1]))