In [65]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import pingouin as pg
from os.path import join
import seaborn as sns
from scipy.stats import ttest_ind, spearmanr, pearsonr, ttest_rel,  wilcoxon, mannwhitneyu, ttest_ind_from_stats, iqr

In [66]:
def liji_sidak_mc(data, alpha):
    import math
    import numpy as np

    mc_corrmat = data.corr()
    mc_corrmat.fillna(0, inplace=True)
    eigvals, eigvecs = np.linalg.eig(mc_corrmat)

    M_eff = 0
    for eigval in eigvals:
        if abs(eigval) >= 0:
            if abs(eigval) >= 1:
                M_eff += 1
            else:
                M_eff += abs(eigval) - math.floor(abs(eigval))
        else:
            M_eff += 0
    print('Number of effective comparisons: {0}'.format(M_eff))

    #and now applying M_eff to the Sidak procedure
    sidak_p = 1 - (1 - alpha)**(1/M_eff)
    if sidak_p < 0.00001:
        print('Critical value of {:.3f}'.format(alpha),'becomes {:2e} after corrections'.format(sidak_p))
    else:
        print('Critical value of {:.3f}'.format(alpha),'becomes {:.6f} after corrections'.format(sidak_p))
    return sidak_p, M_eff

In [67]:
subjects = ['101', '102', '103', '104', '106', '107', '108', '110', '212', '213',
            '214', '215', '216', '217', '218', '219', '320', '321', '322', '323',
            '324', '325', '327', '328', '329', '330', '331', '332', '333', '334',
            '335', '336', '337', '338', '339', '340', '341', '342', '343', '344',
            '345', '346', '347', '348', '349', '350', '451', '452', '453', '455',
            '456', '457', '458', '459', '460', '462', '463', '464', '465', '467',
            '468', '469', '470', '502', '503', '571', '572', '573', '574', '575',
            '577', '578', '579', '580', '581', '582', '584', '585', '586', '587',
            '588', '589', '590', '591', '592', '593', '594', '595', '596', '597',
            '598', '604', '605', '606', '607', '608', '609', '610', '611', '612',
            '613', '614', '615', '616', '617', '618', '619', '620', '621', '622',
            '623', '624', '625', '626', '627', '628', '629', '630', '631', '633',
            '634']
#subjects = ['101', '102']

sink_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/data/output'
data_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/data'
fig_dir = '/Users/kbottenh/Dropbox/Projects/physics-retrieval/figures/'

masks = ['shen2015', 'craddock2012']

tasks = {'retr': [{'conditions': ['Physics', 'General']},
                  {'runs': [0,1]}], 
         'fci': [{'conditions': ['Physics', 'NonPhysics']},
                  {'runs': [0,1,2]}]}

sessions = [0,1]
sesh = ['pre', 'post']
conds = ['high-level', 'lower-level']

pre_iq = ['VCI1', 'PRI1', 'WMI1', 'PSI1', 'FSIQ1']
post_iq = ['VCI2', 'PRI2', 'WMI2', 'PSI2', 'FSIQ2']
iq_vars = pre_iq + post_iq
iqs = ['VCI', 'PRI', 'WMI', 'PSI', 'FSIQ']

In [68]:
#load in all-data df
all_df = pd.read_csv(join(data_dir, 'rescored', 'physics_learning-nonbrain.csv'), index_col=0)
all_df['Subject'] = all_df.index

## Are the WAIS score changes in this sample different from previously reported retest changes?

Here, we compare with 3-month retest changes in the WAIS-IV, as reported by Estevis et al. 20??

In [69]:
mc_alpha,_ = liji_sidak_mc(all_df[['PRI1', 'WMI1',
                                   'PSI1', 'FSIQ1',
                                   'VCI2', 'PRI2',
                                   'WMI2', 'PSI2', 'FSIQ2']], 
                           0.05)

Number of effective comparisons: 5.602006828731573
Critical value of 0.050 becomes 0.009114 after corrections


In [70]:
deltas = {}
for iq in iqs:
    deltas['{0}mean'.format(iq)] = np.mean(all_df['delta{0}'.format(iq)])
    deltas['{0}sdev'.format(iq)] = np.std(all_df['delta{0}'.format(iq)])

In [71]:
#3-month changes in WAIS-III
#basso_deltas = {}
#basso_deltas['VCImean'] = 115.8 - 111.5
#basso_deltas['VCIsdev'] = np.sqrt(11.9**2 + 12.3**2 - 2*11.9*12.3*.85)

#basso_deltas['PSImean'] = 116.4 - 109.3
#basso_deltas['PSIsdev'] = np.sqrt(13.0**2 + 14.5**2 - 2*13.0*14.5*.80)

#basso_deltas['WMImean'] = 108.6 - 106.9
#basso_deltas['WMIsdev'] = np.sqrt(12.4**2 + 13.1**2 - 2*12.4*13.1*.84)

#basso_deltas['PRImean'] = 114.4 - 106.1
#basso_deltas['PRIsdev'] = np.sqrt(14.1**2 + 14.1**2 - 2*14.1*14.1*.86)

#basso_deltas['FSIQmean'] = 115.04 - 109.4
#basso_deltas['FSIQsdev'] = np.sqrt(11.6**2 + 12.1**2 - 2*11.6*12.1*.90)

In [72]:
#3-month changes in WAIS-IV
estevis_deltas = {}
estevis_deltas['VCImean'] = 118.4 - 114.2
estevis_deltas['VCIsdev'] = np.sqrt(12.8**2 + 14.4**2 - 2*12.8*14.4*.85)

estevis_deltas['PSImean'] = 123.4 - 112.9
estevis_deltas['PSIsdev'] = np.sqrt(10.7**2 + 13.6**2 - 2*10.7*13.6*.72)

estevis_deltas['WMImean'] = 108.9 - 105.7
estevis_deltas['WMIsdev'] = np.sqrt(11.4**2 + 12.2**2 - 2*11.4*12.2*.92)

estevis_deltas['PRImean'] = 111. - 107.4
estevis_deltas['PRIsdev'] = np.sqrt(10.**2 + 9.1**2 - 2*9.1*10.*.83)

estevis_deltas['FSIQmean'] = 119.1 - 112.4
estevis_deltas['FSIQsdev'] = np.sqrt(11.5**2 + 8.4**2 - 2*11.5*8.4*.91)

In [73]:
estevis_deltas

{'VCImean': 4.200000000000003,
 'VCIsdev': 7.606313167363016,
 'PSImean': 10.5,
 'PSIsdev': 9.481624333414606,
 'WMImean': 3.200000000000003,
 'WMIsdev': 4.784642097377814,
 'PRImean': 3.5999999999999943,
 'PRIsdev': 5.634713834792322,
 'FSIQmean': 6.699999999999989,
 'FSIQsdev': 5.195959969052878}

In [9]:
#are WAIS score changes in our sample different from previous research?
diff_from_basso = {}
diff_from_estevis = {}
for iq in iqs:
    #diff_from_basso[iq] = ttest_ind_from_stats(deltas['{0}mean'.format(iq)], 
    #                                           deltas['{0}sdev'.format(iq)], len(subjects),
    #                                           basso_deltas['{0}mean'.format(iq)], 
    #                                           basso_deltas['{0}sdev'.format(iq)], 51,
    #                                           equal_var=False)
    diff_from_estevis[iq] = ttest_ind_from_stats(deltas['{0}mean'.format(iq)], 
                                               deltas['{0}sdev'.format(iq)], len(subjects),
                                               estevis_deltas['{0}mean'.format(iq)], 
                                               estevis_deltas['{0}sdev'.format(iq)], 51,
                                               equal_var=False)
    if diff_from_estevis[iq][1] < mc_alpha:
        print(iq, 'significantly different from Estevis, p =', diff_from_estevis[iq][1])
    else:
        print(iq, 'not significantly different from Estevis')

VCI significantly different from Estevis, p = 0.1526131266748517
PRI significantly different from Estevis, p = 0.06569310755210452
WMI significantly different from Estevis, p = 0.29353320709443576
PSI significantly different from Estevis, p = 0.48449121827512365
FSIQ significantly different from Estevis, p = 0.9992618463311265


In [11]:
diff_from_estevis

{'VCI': Ttest_indResult(statistic=-1.4398010846130203, pvalue=0.1526131266748517),
 'PRI': Ttest_indResult(statistic=1.8535152635573084, pvalue=0.06569310755210452),
 'WMI': Ttest_indResult(statistic=-1.053762462613446, pvalue=0.29353320709443576),
 'PSI': Ttest_indResult(statistic=0.7007123926118671, pvalue=0.48449121827512365),
 'FSIQ': Ttest_indResult(statistic=0.0009268368479731841, pvalue=0.9992618463311265)}

## Are there significant changes in WAIS score from pre- to post-instruction?
Across the whole sample? Within classes? Within each sex?

In [16]:
mc_alpha,_ = liji_sidak_mc(all_df[['deltaVCI', 'deltaPRI',
                                   'deltaWMI', 'deltaPSI', 'deltaFSIQ']], 
                           0.05)

Number of effective comparisons: 3.6840250415842424
Critical value of 0.050 becomes 0.013827 after corrections


In [17]:
coursetype = ['All', 'Modeling', 'Lecture']

delta_tests = pd.DataFrame(index=pd.MultiIndex.from_product((iqs, coursetype)), 
                           columns=['delta', 'sdev(delta)', 't(delta)', 'p(t)', 'w+', 'p(w+)'])
for i in iqs:
    delta_iq = ttest_rel(all_df['{0}2'.format(i)], all_df['{0}1'.format(i)], nan_policy='omit')
    delta_tests.at[(i, 'All'), 'delta'] = np.average(all_df['{0}2'.format(i)].dropna())-np.average(all_df['{0}1'.format(i)].dropna())
    delta_tests.at[(i, 'All'), 't(delta)'] = delta_iq.statistic
    delta_tests.at[(i, 'All'), 'p(t)'] = delta_iq.pvalue

    delta_tests.at[(i, 'All'), 'sdev(delta)'] = np.std(all_df['delta{0}'.format(i)])
    delta_tests.at[(i, 'Modeling'), 'sdev(delta)'] = np.std(all_df[all_df['Class.Type'] == 'Mod']['delta{0}'.format(i)])
    delta_tests.at[(i, 'Lecture'), 'sdev(delta)'] = np.std(all_df[all_df['Class.Type'] == 'Lec']['delta{0}'.format(i)])

    
    delta_iq = wilcoxon(all_df['{0}2'.format(i)], all_df['{0}1'.format(i)])
    delta_tests.at[(i, 'All'), 'w+'] = delta_iq.statistic
    delta_tests.at[(i, 'All'), 'p(w+)'] = delta_iq.pvalue
    
        
delta_tests.to_csv(join(sink_dir, 'ttests', 'change-in-iq.csv'))

In [18]:
delta_tests

Unnamed: 0,Unnamed: 1,delta,sdev(delta),t(delta),p(t),w+,p(w+)
VCI,All,2.90331,9.48075,2.4527,0.015765,1564.5,1.71351e-07
VCI,Modeling,2.90267,8.12726,2.43923,0.0179128,411.0,0.000205396
VCI,Lecture,3.04087,10.737,1.04885,0.299192,370.5,0.000289663
PRI,All,6.90004,10.0171,6.02374,2.44702e-08,978.5,8.16995e-13
PRI,Modeling,6.92736,9.33906,4.90644,8.63444e-06,249.0,3.29841e-07
PRI,Lecture,6.66117,10.7234,3.49096,0.0010153,245.0,8.0447e-07
WMI,All,1.84063,9.53062,2.23084,0.0277583,1718.5,2.39679e-06
WMI,Modeling,1.35984,9.55272,1.34685,0.183455,415.5,0.00482869
WMI,Lecture,2.27045,9.55335,1.69803,0.0957184,460.5,0.000294521
PSI,All,12.2099,17.9223,6.9745,2.48637e-10,783.0,8.84455e-14


## Are those changes different based on class or sex? 
### It's ANOVA time!
Repeated measures ANOVAs for class & sex will be done in R, as there are currently no Python packages that will do a 3-way repeated-measures ANOVA. Setting up data in long form below.

In [61]:
iq = iqs[0]

iq_long = all_df.dropna(how='any').melt(id_vars=['Sex', 'Class.Type', 'Subject'],
                      value_vars=iq_vars,
                      value_name='IQ', var_name='Measure')
for subject in iq_long.index:
    iq_long.loc[subject, 'Time'] = iq_long.at[subject, 'Measure'][-1]
    iq_long.at[subject, 'Measure'] = iq_long.loc[subject, 'Measure'][:-1]

In [64]:
iq_long.to_csv(join(data_dir, 'rescored/all_iq_sex_class-long.csv'))