In [154]:
from pathlib import Path 
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd
from collections import OrderedDict
import sys
import os
import seaborn as sns
import researchpy as rp
import statsmodels.formula.api as smf
import scipy.stats as stats
from sklearn.preprocessing import StandardScaler
from numpy.linalg import LinAlgError

#sys.path.append('/Users/alina/Desktop/MIT/code/ADHD/MTA/helper')
from helper import rr, prep, var_dict

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [97]:
#%reload_ext autoreload

In [98]:
if Path('/Volumes/Samsung_T5/MIT/mta').exists():
    data_root =     '/Volumes/Samsung_T5/MIT/mta'
    derived_data  = '/Volumes/Samsung_T5/MIT/mta/output/derived_data'
else: 
    data_root = '/Users/alina/Desktop/MIT/code/data'
    derived_data  = '/Users/alina/Desktop/MIT/code/data/output/derived_data'
#os.listdir(data_root)


In [99]:
baseline_var = ['src_subject_id', 'interview_date', 'interview_age', 'sex', 'site', 'days_baseline']
dtypes_baseline = { 'src_subject_id' : 'str',
                    'interview_date': 'str' , 
                    'interview_age' : 'int64' ,
                    'sex' : 'str', 
                    'site' : 'int64' ,
                    'days_baseline':  'int64',
                    'version_form': 'str'}
version_form = ['version_form']

qsts = ['snap', 'ssrs',  'pc', 'wechsler'] #masc to many missing data 

In [100]:
#outcome variablles 
snap_vars = ['snainatx','snahix' , 'snaoddx'] #inattention_mean, hyperactie mean'snahypax'
ssrs_vars = ['sspintx', 'ssptossx']# social skills mean, internalizing mean 
#masc_vars = ['masc_masctotalt']
pc_vars = ['pcrcpax', 'pcrcprx'] # power assertion, personal closeness
wechsler_vars = ['w1readb','w2math','w3spell' ]
outcomes_dict  = {'snap' : snap_vars, 'ssrs' : ssrs_vars,  'pc': pc_vars, 'wechsler': wechsler_vars}


In [101]:
interaction_predictors = ['days_baseline', 'site', 'trtname'] #time, site, treatment group

# mediator variables
comorb_mediators  = ['cdorodd' , 'pso', 'psoi', 'pag', 'pagi', 'pga', 'pgai' ,'psa'] #ODD/CD or anx excluding specific phobia 
services_mediators =  ['demo61'] #reciept of public assistance 
prev_med_mediators = ['hi_24'] #medication intake prior to study 

#moderator variables 
accept_moderator = ['d2dresp'] # initail acceptance of treatment 

In [102]:
# load files, drop rows if missing date, drop duplicates 

snap_file = 'snap01.txt'
ssrs_file = 'ssrs01.txt'
#masc_file = 'masc_p01.txt'
parent_child_file = 'pcrc01.txt'
wechsler_file = 'wiat_iiip201.txt'
treat_group = 'treatment_groups.csv'

snap_ = pd.read_csv(Path(data_root, snap_file), delimiter="\t", usecols=np.concatenate((baseline_var, snap_vars, version_form)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates()
ssrs_ = pd.read_csv(Path(data_root, ssrs_file), delimiter="\t", usecols=np.concatenate((baseline_var, ssrs_vars, version_form)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates()
#masc_ = pd.read_csv(Path(data_root, masc_file), delimiter="\t", usecols=np.concatenate((baseline_var, masc_vars, version_form)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates()
pc_ = pd.read_csv(Path(data_root, parent_child_file), delimiter="\t", usecols=np.concatenate((baseline_var, pc_vars, version_form)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates()
wechsler_ = pd.read_csv(Path(data_root, wechsler_file), delimiter="\t", usecols=np.concatenate((baseline_var, wechsler_vars)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates()
treat_group = pd.read_csv(Path(derived_data, treat_group))
orig_rr_14 = pd.read_csv(Path(derived_data, 'original_rr_results_14.csv'))

  snap_ = pd.read_csv(Path(data_root, snap_file), delimiter="\t", usecols=np.concatenate((baseline_var, snap_vars, version_form)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates()
  ssrs_ = pd.read_csv(Path(data_root, ssrs_file), delimiter="\t", usecols=np.concatenate((baseline_var, ssrs_vars, version_form)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates()
  pc_ = pd.read_csv(Path(data_root, parent_child_file), delimiter="\t", usecols=np.concatenate((baseline_var, pc_vars, version_form)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates()
  wechsler_ = pd.read_csv(Path(data_root, wechsler_file), delimiter="\t", usecols=np.concatenate((baseline_var, wechsler_vars)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates()


In [103]:
# merge with treatment group info convert colum data to appropriate dtypes 

snap = prep.set_baseline_dtypes(pd.merge(snap_, treat_group, how='inner', on = 'src_subject_id')).dropna()#.table with relevant snap vales, rater, and treatment group 
ssrs = prep.set_baseline_dtypes(pd.merge(ssrs_, treat_group, how='inner', on = 'src_subject_id').dropna())#.dropna() #table with relevant snap vales, rater, and treatment group 
#masc = prep.set_baseline_dtypes(pd.merge(masc_, treat_group, how='inner', on = 'src_subject_id')).dropna()#.dropna() #table with relevant snap vales, rater, and treatment group 
pc = prep.set_baseline_dtypes(pd.merge(pc_, treat_group, how='inner', on = 'src_subject_id').dropna())#.dropna() #table with relevant snap vales, rater, and treatment group 
wechsler = prep.set_baseline_dtypes(pd.merge(wechsler_, treat_group, how='inner', on = 'src_subject_id')).dropna()#.dropna() #table with relevant snap vales, rater, and treatment group 

print(snap_.shape, ssrs.shape, pc.shape, wechsler.shape)

Success
Success
Success
Success
(14544, 10) (10735, 10) (8627, 10) (4571, 10)


In [104]:
ssrs.loc[ssrs['version_form'].str.startswith('Teacher'), 'version_form'] = 'Teacher'
ssrs.loc[ssrs['version_form'].str.startswith('Parent'), 'version_form'] = 'Parent'

In [105]:
comorb_file = 'diagpsx01.txt' # contains odd/cd and anx comorbid diagnoses 
demog_file = 'demgr01.txt' # contains recieot of public assistance 
health_qst_file  = 'health01.txt' # contains previous medication 
init_sat_file = 'debrief01.txt' # contains rating of initial acceptance of treatment goup

comorb = prep.set_baseline_dtypes(pd.read_csv(Path(data_root, comorb_file), delimiter= '\t', usecols = np.concatenate((baseline_var, comorb_mediators)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates())
demog = prep.set_baseline_dtypes(pd.read_csv(Path(data_root, demog_file), delimiter= '\t', usecols  =np.concatenate((baseline_var, services_mediators)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates())
health_qst = prep.set_baseline_dtypes(pd.read_csv(Path(data_root, health_qst_file), delimiter= '\t', usecols=np.concatenate((baseline_var, prev_med_mediators)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates())
init_sat_file = prep.set_baseline_dtypes(pd.read_csv(Path(data_root, init_sat_file), delimiter= '\t', usecols=np.concatenate((baseline_var,  accept_moderator)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates())



Success
Success
Success
Success


  comorb = prep.set_baseline_dtypes(pd.read_csv(Path(data_root, comorb_file), delimiter= '\t', usecols = np.concatenate((baseline_var, comorb_mediators)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates())
  demog = prep.set_baseline_dtypes(pd.read_csv(Path(data_root, demog_file), delimiter= '\t', usecols  =np.concatenate((baseline_var, services_mediators)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates())
  health_qst = prep.set_baseline_dtypes(pd.read_csv(Path(data_root, health_qst_file), delimiter= '\t', usecols=np.concatenate((baseline_var, prev_med_mediators)), skiprows=[1] , parse_dates=['days_baseline']).dropna(subset='days_baseline').drop_duplicates())


In [106]:
timepoints = [46 , 168, 319, 500]

In [109]:
snap_split_dict = prep.split_data_from_timepoints(snap)
ssrs_split_dict = prep.split_data_from_timepoints(ssrs)
pc_split_dict = prep.split_data_from_timepoints(pc)
wechsler_split_dict = prep.split_data_from_timepoints(wechsler)
data_dict = {'snap' : snap_split_dict, 'ssrs': ssrs_split_dict, 'pc': pc_split_dict, 'wechsler': wechsler_split_dict}

No timepoints specifed. Using (46,168,319,500) by default.
No timepoints specifed. Using (46,168,319,500) by default.
No timepoints specifed. Using (46,168,319,500) by default.
No timepoints specifed. Using (46,168,319,500) by default.


In [166]:
gen_interact_formula = 'C(trtname, Treatment(reference = "A"))'# * days_baseline'#+  C(trtname, Treatment(reference = "A")) * C(site)' # reapeat with log days 
formulas =  [[' ~ '.join((var, gen_interact_formula)) for var in values] for values in outcomes_dict.values()]
formulas_dict = dict(zip(outcomes_dict.keys(), formulas))
formulas_dict

{'snap': ['snainatx ~ C(trtname, Treatment(reference = "A"))',
  'snahix ~ C(trtname, Treatment(reference = "A"))',
  'snaoddx ~ C(trtname, Treatment(reference = "A"))'],
 'ssrs': ['sspintx ~ C(trtname, Treatment(reference = "A"))',
  'ssptossx ~ C(trtname, Treatment(reference = "A"))'],
 'pc': ['pcrcpax ~ C(trtname, Treatment(reference = "A"))',
  'pcrcprx ~ C(trtname, Treatment(reference = "A"))'],
 'wechsler': ['w1readb ~ C(trtname, Treatment(reference = "A"))',
  'w2math ~ C(trtname, Treatment(reference = "A"))',
  'w3spell ~ C(trtname, Treatment(reference = "A"))']}

In [167]:

groups = 'src_subject_id'
alpha = 0.05
hyps_interactions = var_dict.get_hyps_interactions()

In [168]:
hyps_interactions

{'time_treat': 'C(trtname, Treatment(reference="A"))[T.M]:days_baseline = C(trtname, Treatment(reference="A"))[T.P]:days_baseline  = C(trtname, Treatment(reference="A"))[T.C]:days_baseline = 0',
 'site_treat': 'C(trtname, Treatment(reference="A"))[T.M]:C(site)[T.2] = C(trtname, Treatment(reference="A"))[T.P]:C(site)[T.2] = C(trtname, Treatment(reference="A"))[T.C]:C(site)[T.2] = C(trtname, Treatment(reference="A"))[T.M]:C(site)[T.3] = C(trtname, Treatment(reference="A"))[T.P]:C(site)[T.3] = C(trtname, Treatment(reference="A"))[T.C]:C(site)[T.3] = C(trtname, Treatment(reference="A"))[T.M]:C(site)[T.4] = C(trtname, Treatment(reference="A"))[T.P]:C(site)[T.4] = C(trtname, Treatment(reference="A"))[T.C]:C(site)[T.4] = C(trtname, Treatment(reference="A"))[T.M]:C(site)[T.5] = C(trtname, Treatment(reference="A"))[T.P]:C(site)[T.5] = C(trtname, Treatment(reference="A"))[T.C]:C(site)[T.5] = C(trtname, Treatment(reference="A"))[T.M]:C(site)[T.6] = C(trtname, Treatment(reference="A"))[T.P]:C(site

In [169]:
# result, summ, hsmm = rr.get_RR_stats(formula, data, groups=groups, alpha= alpha)
# rr.f_test_interactions(result, hyps_interactions, alpha)
data = data_dict['ssrs']['14'][data_dict['ssrs']['14']['version_form'] == 'Teacher']
formula = formulas_dict['ssrs'][0]
raters = ['Teacher', 'Parent']


In [170]:
data_dict['snap']['14']['snahix'][data_dict['snap']['14']['version_form'] == 'Parent'].dropna().shape


(2608,)

In [171]:
formula

'sspintx ~ C(trtname, Treatment(reference = "A"))'

In [172]:
smf.mixedlm(formula, data, groups = groups).fit()

<statsmodels.regression.mixed_linear_model.MixedLMResultsWrapper at 0x7fdb630a4340>

In [173]:
formulas_dict

{'snap': ['snainatx ~ C(trtname, Treatment(reference = "A"))',
  'snahix ~ C(trtname, Treatment(reference = "A"))',
  'snaoddx ~ C(trtname, Treatment(reference = "A"))'],
 'ssrs': ['sspintx ~ C(trtname, Treatment(reference = "A"))',
  'ssptossx ~ C(trtname, Treatment(reference = "A"))'],
 'pc': ['pcrcpax ~ C(trtname, Treatment(reference = "A"))',
  'pcrcprx ~ C(trtname, Treatment(reference = "A"))'],
 'wechsler': ['w1readb ~ C(trtname, Treatment(reference = "A"))',
  'w2math ~ C(trtname, Treatment(reference = "A"))',
  'w3spell ~ C(trtname, Treatment(reference = "A"))']}

In [174]:
result_snap = []
for rater in raters:
    for i in range(len(snap_vars)):
        try : 
            print(rater, snap_vars[i])
            result_snap.append(smf.mixedlm(formulas_dict['snap'][i], data_dict['snap']['14'][data_dict['snap']['14']['version_form'] == rater], groups = groups).fit() )
        except LinAlgError:
            print('Error', rater, snap_vars[i])
            print(data_dict['snap']['14'][data_dict['snap']['14']['version_form'] == rater].shape)
            continue
        

Teacher snainatx
Teacher snahix
Teacher snaoddx
Parent snainatx
Parent snahix
Parent snaoddx


In [176]:
result_snap = [[ smf.mixedlm(formulas_dict['snap'][i], data_dict['snap']['14'][data_dict['snap']['14']['version_form'] == rater], groups = groups).fit() for rater in raters] for i in range(len(snap_vars))]

In [177]:

result_ssrs = []
for rater in raters:
    for i in range(len(ssrs_vars)):

        try : 
            print(rater, ssrs_vars[i])
            result_ssrs.append(smf.mixedlm(formulas_dict['ssrs'][i], data_dict['ssrs']['14'][data_dict['ssrs']['14']['version_form'] == rater], groups = groups).fit() )
        except LinAlgError:
            print('Error', rater, ssrs_vars[i])
            continue
            

Teacher sspintx
Teacher ssptossx
Parent sspintx
Parent ssptossx


In [148]:
result_ssrs = [[ smf.mixedlm(formulas_dict['ssrs'][i], data_dict['ssrs']['14'][data_dict['ssrs']['14']['version_form'] == rater], groups = groups).fit() for rater in raters] for i in range(len(ssrs_vars))]

LinAlgError: Singular matrix

In [50]:
#test = data['pc']['14'][data['pc']['14']['version_form'] == 'Parent'].copy()

In [51]:
#scaler = StandardScaler()

In [52]:
#test['days_baseline'] = scaler.fit_transform(np.array(test['days_baseline']).reshape(-1, 1))

In [53]:
#test_pc_formula = 'pcrcpax ~ C(trtname, Treatment(reference = "L"))* days_baseline  '

In [54]:
#result = smf.mixedlm(formulas_dict['pc'][0], data['pc']['14'][data['pc']['14']['version_form'] == 'Parent'], groups = groups).fit() 

In [55]:
#result.summary()

In [56]:
# from statsmodels.stats.outliers_influence import variance_inflation_factor
# X = result.model.exog
# vif = pd.DataFrame()
# vif["VIF Factor"] = [variance_inflation_factor(X, i) for i in range(X.shape[1])]
# vif["features"] = result.model.exog_names
# print(vif)

In [57]:
#result_pc2 =  smf.mixedlm(formulas_dict['pc'][1], data['pc']['14'][data['pc']['14']['version_form'] == 'Parent'], groups = groups).fit()  

In [58]:
# result_wechlser = [ smf.mixedlm(formulas_dict['wechsler'][i], data['wechsler']['14'], groups = groups).fit()  for i in range(len(wechsler_vars))]

In [59]:
## worked for snap, ssrs, wechsler

In [60]:
hyps_interactions = var_dict.get_hyps_interactions()
hyps_interactions

{'time_treat': 'C(trtname, Treatment(reference="L"))[T.M]:days_baseline = C(trtname, Treatment(reference="L"))[T.P]:days_baseline  = C(trtname, Treatment(reference="L"))[T.C]:days_baseline = 0',
 'site_treat': 'C(trtname, Treatment(reference="L"))[T.M]:C(site)[T.2] = C(trtname, Treatment(reference="L"))[T.P]:C(site)[T.2] = C(trtname, Treatment(reference="L"))[T.C]:C(site)[T.2] = C(trtname, Treatment(reference="L"))[T.M]:C(site)[T.3] = C(trtname, Treatment(reference="L"))[T.P]:C(site)[T.3] = C(trtname, Treatment(reference="L"))[T.C]:C(site)[T.3] = C(trtname, Treatment(reference="L"))[T.M]:C(site)[T.4] = C(trtname, Treatment(reference="L"))[T.P]:C(site)[T.4] = C(trtname, Treatment(reference="L"))[T.C]:C(site)[T.4] = C(trtname, Treatment(reference="L"))[T.M]:C(site)[T.5] = C(trtname, Treatment(reference="L"))[T.P]:C(site)[T.5] = C(trtname, Treatment(reference="L"))[T.C]:C(site)[T.5] = C(trtname, Treatment(reference="L"))[T.M]:C(site)[T.6] = C(trtname, Treatment(reference="L"))[T.P]:C(site

In [61]:
for i, var in enumerate(snap_vars):
    for res in result_snap[i]:
        print(var, rr.f_test_interactions(res, hyps_interactions, alpha))

snainatx   Description     Significance    F-Value       P-Value
0  time_treat    *Significant*  16.150493  2.064850e-10
1  site_treat  Not Significant   1.464445  1.095676e-01
2        site  Not Significant   0.286445  9.207133e-01
snainatx   Description     Significance    F-Value       P-Value
0  time_treat    *Significant*  11.426767  1.839055e-07
1  site_treat  Not Significant   0.785404  6.953991e-01
2        site  Not Significant   0.472572  7.969670e-01
snahix   Description     Significance    F-Value       P-Value
0  time_treat    *Significant*  20.265596  5.369400e-13
1  site_treat  Not Significant   1.416968  1.298028e-01
2        site  Not Significant   1.069276  3.753282e-01
snahix   Description     Significance    F-Value       P-Value
0  time_treat    *Significant*  13.696682  6.844294e-09
1  site_treat    *Significant*   1.796873  2.946871e-02
2        site  Not Significant   0.178910  9.705996e-01
snaoddx   Description     Significance   F-Value   P-Value
0  time_treat

In [62]:
for i, var in enumerate(ssrs_vars):
    for res in result_ssrs[i]:
        print(var, rr.f_test_interactions(res, hyps_interactions, alpha))

sspintx   Description     Significance   F-Value   P-Value
0  time_treat  Not Significant  0.100833  0.959549
1  site_treat  Not Significant  0.456635  0.961461
2        site  Not Significant  0.368243  0.870591
sspintx   Description     Significance   F-Value   P-Value
0  time_treat  Not Significant  1.776329  0.149427
1  site_treat  Not Significant  0.771165  0.711268
2        site  Not Significant  0.631833  0.675473
ssptossx   Description     Significance   F-Value   P-Value
0  time_treat    *Significant*  5.101886  0.001608
1  site_treat  Not Significant  0.496734  0.943616
2        site  Not Significant  0.485249  0.787516
ssptossx   Description     Significance   F-Value   P-Value
0  time_treat  Not Significant  0.861553  0.460281
1  site_treat  Not Significant  0.935707  0.522976
2        site  Not Significant  0.970836  0.434083


In [63]:
alpha

0.05

In [64]:
f_result_snap  =[]
for i, var in enumerate(snap_vars):
    for res in result_snap[i]:
        f_result_snap.append( rr.f_test_interactions(res, hyps_interactions, alpha))

In [65]:
for f in f_result_snap:
    f['F-Value'] = f['F-Value'].round(2)
    f['P-Value'] = f['P-Value'].round(2)

In [66]:
f_result_snap

[  Description     Significance  F-Value  P-Value
 0  time_treat    *Significant*    16.15     0.00
 1  site_treat  Not Significant     1.46     0.11
 2        site  Not Significant     0.29     0.92,
   Description     Significance  F-Value  P-Value
 0  time_treat    *Significant*    11.43      0.0
 1  site_treat  Not Significant     0.79      0.7
 2        site  Not Significant     0.47      0.8,
   Description     Significance  F-Value  P-Value
 0  time_treat    *Significant*    20.27     0.00
 1  site_treat  Not Significant     1.42     0.13
 2        site  Not Significant     1.07     0.38,
   Description     Significance  F-Value  P-Value
 0  time_treat    *Significant*    13.70     0.00
 1  site_treat    *Significant*     1.80     0.03
 2        site  Not Significant     0.18     0.97,
   Description     Significance  F-Value  P-Value
 0  time_treat    *Significant*     7.17     0.00
 1  site_treat  Not Significant     1.17     0.29
 2        site  Not Significant     0.72     0

In [67]:
f_result_ssrs  =[]
for i, var in enumerate(ssrs_vars):
    for res in result_ssrs[i]:
        f_result_ssrs.append( rr.f_test_interactions(res, hyps_interactions, alpha))

In [68]:
for f in f_result_ssrs:
    f['F-Value'] = f['F-Value'].round(2)
    f['P-Value'] = f['P-Value'].round(2)

In [69]:

f_results = np.concatenate((f_result_snap, f_result_ssrs))

In [70]:
orig_rr_14 = pd.read_csv(Path(derived_data,  'original_rr_results_14.csv'))

In [71]:
measures = ['Inattention', 'Hyperactive-impulsive',  'ODD = aggression',  'SSRS Internalizing', 'SSRS social skills']

In [72]:
outcomes_available = np.concatenate((outcomes_dict['snap'], outcomes_dict['ssrs']))
outcomes_available_dict = dict(zip(outcomes_available, measures))
outcomes_available_dict

{'snainatx': 'Inattention',
 'snahix': 'Hyperactive-impulsive',
 'snaoddx': 'ODD = aggression',
 'sspintx': 'SSRS Internalizing',
 'ssptossx': 'SSRS social skills'}

In [73]:
f_results[0][:,2]

array([16.15, 1.46, 0.29], dtype=object)

In [74]:
orig_rr_14['replicated F_value'] = pd.NA  
orig_rr_14['replicated P_value'] = pd.NA  

In [75]:
# j = 0  

# for i, (var, measure) in enumerate(zip(outcomes_available_dict.keys(), outcomes_available_dict.values())):
#     print(i, var, measure)

#     orig_rr_14.loc[orig_rr_14['Measure'] == measure, 'variable'] = var
#     first_index = orig_rr_14.index[orig_rr_14['Measure'] == measure].tolist()[0]
#     orig_rr_14.iloc[first_index:first_index+3, orig_rr_14.columns.get_loc('replicated F_value')] = f_results[i][:, 2]
#     orig_rr_14.iloc[first_index:first_index+3, orig_rr_14.columns.get_loc('replicated P_value')] = f_results[i][:, 3]

#     j = first_index + 3
#     print(j)

In [76]:
f_results

array([[['time_treat', '*Significant*', 16.15, 0.0],
        ['site_treat', 'Not Significant', 1.46, 0.11],
        ['site', 'Not Significant', 0.29, 0.92]],

       [['time_treat', '*Significant*', 11.43, 0.0],
        ['site_treat', 'Not Significant', 0.79, 0.7],
        ['site', 'Not Significant', 0.47, 0.8]],

       [['time_treat', '*Significant*', 20.27, 0.0],
        ['site_treat', 'Not Significant', 1.42, 0.13],
        ['site', 'Not Significant', 1.07, 0.38]],

       [['time_treat', '*Significant*', 13.7, 0.0],
        ['site_treat', '*Significant*', 1.8, 0.03],
        ['site', 'Not Significant', 0.18, 0.97]],

       [['time_treat', '*Significant*', 7.17, 0.0],
        ['site_treat', 'Not Significant', 1.17, 0.29],
        ['site', 'Not Significant', 0.72, 0.61]],

       [['time_treat', '*Significant*', 3.42, 0.02],
        ['site_treat', '*Significant*', 1.75, 0.04],
        ['site', 'Not Significant', 0.4, 0.85]],

       [['time_treat', 'Not Significant', 0.1, 0.96],
  

In [77]:
j = 0  

for i, (var, measure) in enumerate(zip(outcomes_available_dict.keys(), outcomes_available_dict.values())):
    print(i, var, measure)

    # Find the first index of the measure in the DataFrame
    orig_rr_14.loc[orig_rr_14['Measure'] == measure, 'variable'] = var
    first_index = orig_rr_14.index[orig_rr_14['Measure'] == measure].tolist()[0]
    
    # Populate first block of rows (first_index : first_index + 3)
    orig_rr_14.iloc[first_index:first_index+3, orig_rr_14.columns.get_loc('replicated F_value')] = f_results[i][:, 2]
    orig_rr_14.iloc[first_index:first_index+3, orig_rr_14.columns.get_loc('replicated P_value')] = f_results[i][:, 3]
    
    # Populate second block of rows (first_index + 3 : first_index + 6) with next element in f_results
    if i + 1 < len(f_results):  # Ensure that i+1 exists
        orig_rr_14.iloc[first_index+3:first_index+6, orig_rr_14.columns.get_loc('replicated F_value')] = f_results[i+1][:, 2]
        orig_rr_14.iloc[first_index+3:first_index+6, orig_rr_14.columns.get_loc('replicated P_value')] = f_results[i+1][:, 3]
    
    # Update j and print it for tracking
    j = first_index + 6
    print(j)

0 snainatx Inattention
6
1 snahix Hyperactive-impulsive
12
2 snaoddx ODD = aggression
21
3 sspintx SSRS Internalizing
30
4 ssptossx SSRS social skills
39


In [78]:
orig_rr_14

Unnamed: 0.1,Unnamed: 0,Outcome Domain,Measure,Rater,Test,F_value,P_value,replicated F_value,replicated P_value,variable
0,0,ADHD symptoms,Inattention,Teacher,Treatment x time,10.6,<.001,16.15,0.0,snainatx
1,1,ADHD symptoms,Inattention,Teacher,Treatment x site,0.9,.56,1.46,0.11,snainatx
2,2,ADHD symptoms,Inattention,Teacher,Site,2.7,.02,0.29,0.92,snainatx
3,3,ADHD symptoms,Inattention,Parent,Treatment x time,21.5,<.001,11.43,0.0,snainatx
4,4,ADHD symptoms,Inattention,Parent,Treatment x site,0.6,.88,0.79,0.7,snainatx
5,5,ADHD symptoms,Inattention,Parent,Site,3.0,.02,0.47,0.8,snainatx
6,6,ADHD symptoms,Hyperactive-impulsive,Teacher,Treatment x time,10.0,<.001,11.43,0.0,snahix
7,7,ADHD symptoms,Hyperactive-impulsive,Teacher,Treatment x site,1.3,.49,0.79,0.7,snahix
8,8,ADHD symptoms,Hyperactive-impulsive,Teacher,Site,3.0,.02,0.47,0.8,snahix
9,9,ADHD symptoms,Hyperactive-impulsive,Parent,Treatment x time,21.5,<.001,20.27,0.0,snahix


In [79]:
#orig_rr_14.to_csv(Path(derived_data, 'original_and_new_rr_14.csv'))