- check the direction of main effects by comparing mean differences
- repeat the GLM analysis and compare with findings form pipeline (p_value, T...)
- run a t-test between the two groups and to determine the significance of the difference
- compute an effect size for the difference

In [31]:
import os
import numpy as np
import pandas as pd
import nibabel as nib
import nilearn as nil
from scipy import io as sio
from matplotlib import gridspec
from nilearn import plotting as nlp
from matplotlib import pyplot as plt
import statsmodels.formula.api as smf

In [2]:
%matplotlib inline

In [3]:
sbt_effects = ((1,2),(1,5),(2,5),(3,1),(3,2),(6,1),(6,5))

In [9]:
base_path = '/data1/guilimin/abide/subtype/sc7/full_maybe/'
mod_path = '/data1/guilimin/abide/pheno/sc7/model_full_maybe_sc7.csv'

In [10]:
model = pd.read_csv(mod_path)

In [36]:
model.shape

(568, 20)

In [37]:
np.mean(model.AGE_AT_SCAN)

16.512765845070422

In [38]:
np.std(model.AGE_AT_SCAN)

7.199480288893878

In [11]:
test = pd.read_csv('/data1/guilimin/abide/pheno/merged_abide_full_maybe.csv')
test.DX_GROUP.value_counts()

2    284
1    284
Name: DX_GROUP, dtype: int64

In [28]:
out_path = '/data1/guilimin/abide/subtype/sc7/full_maybe/'
for s_id, eff in enumerate(sbt_effects):
    network, subtype = eff
    sub_name = 'sub{}'.format(subtype)
    w_tmp = 'sbt_weights_net_{}.csv'
    weights = pd.read_csv(os.path.join(base_path, w_tmp.format(network)))
    weights.rename(columns={' ':'SUB_ID'}, inplace=True)
    weights.columns = weights.columns.str.strip(' ')
    weights.SUB_ID = weights.SUB_ID.str.extract('(?<=sub_)(\d+)').astype(int)
    merged = pd.merge(model, weights, on='SUB_ID', how='inner')
    # Relabel the controls and patients
    merged.DX_GROUP.replace({1:'patients', 2:'controls'}, inplace=True)
    
    # Test the direction of the effect:
    a = np.mean(merged[merged.DX_GROUP == 'patients'][sub_name].values) - np.mean(merged[merged.DX_GROUP == 'controls'][sub_name].values)
    if a > 0:
        print('net {} sbt {} negative ({:.4f})'.format(network, subtype, a))
    else:
        print('net {} sbt {} positive ({:.4f})'.format(network, subtype, a))

net 1 sbt 2 negative (0.0480)
net 1 sbt 5 positive (-0.0401)
net 2 sbt 5 negative (0.0603)
net 3 sbt 1 negative (0.0451)
net 3 sbt 2 positive (-0.0525)
net 6 sbt 1 positive (-0.0423)
net 6 sbt 5 negative (0.0518)




Ok, the directions of the effects match the reports. Now for the significance of the findings

In [35]:
out_path = '/data1/guilimin/abide/subtype/sc7/full_maybe/'
for s_id, eff in enumerate(sbt_effects):
    network, subtype = eff
    sub_name = 'sub{}'.format(subtype)
    w_tmp = 'sbt_weights_net_{}.csv'
    weights = pd.read_csv(os.path.join(base_path, w_tmp.format(network)))
    weights.rename(columns={' ':'SUB_ID'}, inplace=True)
    weights.columns = weights.columns.str.strip(' ')
    weights.SUB_ID = weights.SUB_ID.str.extract('(?<=sub_)(\d+)').astype(int)
    merged = pd.merge(model, weights, on='SUB_ID', how='inner')
    # Relabel the controls and patients
    merged.DX_GROUP.replace({1:'patients', 2:'controls'}, inplace=True)
    
    smresults = smf.ols('{} ~ DX_GROUP'.format(sub_name), merged).fit()
    print('net {} sbt {}'.format(network, sub_name))
    print(smresults.summary2())



net 1 sbt sub2
                  Results: Ordinary least squares
Model:                OLS              Adj. R-squared:     0.020    
Dependent Variable:   sub2             AIC:                -460.0200
Date:                 2016-06-25 12:54 BIC:                -451.3358
No. Observations:     568              Log-Likelihood:     232.01   
Df Model:             1                F-statistic:        12.63    
Df Residuals:         566              Prob (F-statistic): 0.000412 
R-squared:            0.022            Scale:              0.025958 
--------------------------------------------------------------------
                      Coef.  Std.Err.    t    P>|t|   [0.025  0.975]
--------------------------------------------------------------------
Intercept            -0.0226   0.0096 -2.3660 0.0183 -0.0414 -0.0038
DX_GROUP[T.patients]  0.0480   0.0135  3.5536 0.0004  0.0215  0.0746
--------------------------------------------------------------------
Omnibus:               6.234         D

Ok, same results here as well (apart for the fact that this model used the patients as the reference and thus has inverted signs). Fucking hell it's hot here.

In [None]:
out_path = '/data1/guilimin/abide/subtype/sc7/full_maybe/'
for s_id, eff in enumerate(sbt_effects):
    network, subtype = eff
    sub_name = 'sub{}'.format(subtype)
    w_tmp = 'sbt_weights_net_{}.csv'
    weights = pd.read_csv(os.path.join(base_path, w_tmp.format(network)))
    weights.rename(columns={' ':'SUB_ID'}, inplace=True)
    weights.columns = weights.columns.str.strip(' ')
    weights.SUB_ID = weights.SUB_ID.str.extract('(?<=sub_)(\d+)').astype(int)
    merged = pd.merge(model, weights, on='SUB_ID', how='inner')
    # Relabel the controls and patients
    merged.DX_GROUP.replace({1:'patients', 2:'controls'}, inplace=True)
    
    smresults = smf.ols('{} ~ DX_GROUP'.format(sub_name), merged).fit()
    print('net {} sbt {}'.format(network, sub_name))
    print(smresults.summary2())