In [278]:
from pathlib import Path 
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd
from collections import OrderedDict
import sys
import os
import seaborn as sns
import researchpy as rp
import statsmodels.api as sm
import scipy.stats as stats

In [279]:
data_root = '/Volumes/Samsung_T5/MIT/mta'
#os.listdir(data_root)


#### Assessments 
 - ADHD symptoms: assessed by the inattentive and hyperactive subscale of the SNAP (parent and teacher rated
- ODD symptoms: assessed by the oppositional/aggressive subscale o the SNAP (parent and teacher rated) 
- 	Social skills: assessed by the Social Skill Rating System (SSRS), parent and teacher rated. 
- 	Internalizing: assessed with the internalizing subscale of the SSRS (parent and teacher rated) and the child-reported Multidimensional Anxiety scale for children (MASC) 
- 	Parent-child relations: assessed with a parent-child relationship questionnaire.
- 	Academic achievement: assessed with the reading, mathematics, and spelling subscale of the Wechsler Individual Achievement test.


In [280]:
os.listdir(data_root)

['Collection_Documents',
 'debrief1401.txt.partial',
 'loeber01.txt',
 'debrief1401.txt',
 'snap01.txt',
 'debrief01.txt',
 'demgr01.txt',
 'medse01.txt',
 'README.pdf',
 'scapijus01.txt',
 'side_effects01.txt',
 'scid01.txt',
 '._README.pdf',
 'cbcl01.txt',
 'casr01.txt',
 '._casr01.txt',
 'codd01.txt',
 '._codd01.txt',
 'codd01.txt.partial',
 '._codd01.txt.partial',
 'cohi01.txt',
 '._cohi01.txt',
 'cis_c01.txt',
 '._cis_c01.txt',
 'cohi01.txt.partial',
 '._cohi01.txt.partial',
 'cis_c01.txt.partial',
 '._cis_c01.txt.partial',
 'cope01.txt',
 '._cope01.txt',
 'cptc01.txt',
 '._cptc01.txt',
 'cptc01.txt.partial',
 '._cptc01.txt.partial',
 'ctrs_199702.txt',
 '._ctrs_199702.txt',
 'diagpsx_p201.txt',
 '._diagpsx_p201.txt',
 'diagpsx_p301.txt',
 '._diagpsx_p301.txt',
 'sipirs01.txt',
 'agg01.txt',
 'diagpsx_p301.txt.partial',
 '._diagpsx_p301.txt.partial',
 'cprs_200202.txt',
 '._cprs_200202.txt',
 'cisp_p01.txt',
 '._cisp_p01.txt',
 'diagpsx_p401.txt',
 '._diagpsx_p401.txt',
 'diagpsx_

In [281]:
snap_file = 'snap01.txt'
ssrs_file = 'ssrs01.txt'
masc_file = 'masc_p01.txt'
parent_child_file = 'pcrc01.txt'
wechsler_file = 'wiat_iiip201.txt'

snap = pd.read_csv(Path(data_root, snap_file), delimiter="\t")
ssrs = pd.read_csv(Path(data_root, ssrs_file), delimiter="\t")
masc = pd.read_csv(Path(data_root, masc_file), delimiter="\t")
pc = pd.read_csv(Path(data_root, parent_child_file), delimiter="\t")
wechsler = pd.read_csv(Path(data_root, wechsler_file), delimiter="\t")



  snap = pd.read_csv(Path(data_root, snap_file), delimiter="\t")
  ssrs = pd.read_csv(Path(data_root, ssrs_file), delimiter="\t")
  masc = pd.read_csv(Path(data_root, masc_file), delimiter="\t")
  pc = pd.read_csv(Path(data_root, parent_child_file), delimiter="\t")
  wechsler = pd.read_csv(Path(data_root, wechsler_file), delimiter="\t")


In [282]:
snap.keys()

Index(['collection_id', 'snap01_id', 'dataset_id', 'subjectkey',
       'src_subject_id', 'interview_date', 'interview_age', 'sex',
       'respondent', 'respondent_other_specify',
       ...
       'deportment_other', 'deportment_rules', 'snap_iv_pac_score_io',
       'snap_iv_pac_score_ad', 'cgi_tot', 'snap_iv_pac_score_academic',
       'academic_orienting', 'academic_maintaining', 'academic_directing',
       'collection_title'],
      dtype='object', length=160)

In [283]:
print(snap.iloc[0], ssrs.iloc[0], masc.iloc[0], pc.iloc[0], wechsler.iloc[0])

collection_id                                                     collection_id
snap01_id                                                             snap01_id
dataset_id                                                           dataset_id
subjectkey                    The NDAR Global Unique Identifier (GUID) for r...
src_subject_id                       Subject ID how it's defined in lab/project
                                                    ...                        
snap_iv_pac_score_academic                                    ACADEMIC subscore
academic_orienting                                           Orienting subscore
academic_maintaining                                       Maintaining subscore
academic_directing                                           Directing subscore
collection_title                                               collection_title
Name: 0, Length: 160, dtype: object collection_id                                           collection_id
ssrs01_id     

In [284]:
snap = snap.drop(0)
ssrs = ssrs.drop(0)
masc = masc.drop(0)
pc = pc.drop(0)
wechsler = wechsler.drop(0)

In [285]:
print(snap.iloc[0], ssrs.iloc[0], masc.iloc[0], pc.iloc[0], wechsler.iloc[0])

collection_id                                                             2155
snap01_id                                                                19789
dataset_id                                                               29641
subjectkey                                                    NDAR_INVXP413AM1
src_subject_id                                                           P1001
                                                    ...                       
snap_iv_pac_score_academic                                                 NaN
academic_orienting                                                         NaN
academic_maintaining                                                       NaN
academic_directing                                                         NaN
collection_title              Multimodal Treatment Study of Children With ADHD
Name: 1, Length: 160, dtype: object collection_id                                                   2155
ssrs01_id                 

## SSRS 


Relationship : 1 = Biological mom; 2 = Biological dad; 3 = Grandparent; 4 = Special education (sped) teacher; 5 = General education teacher; 6 = Occupational therapist; 7 = Speech and language therapist; 8 = Behavioral therapist; 9 = Paraprofessional; 10 = Aide; 11 = Principal; 12 = Administrator; 14 = Content teacher; 15 = Parent center director; 16 = Self; 17 = Adoptive mother; 18 = Adoptive father; 19 = Foster mother; 20 = Foster father; 21 = Grandmother; 22 = Grandfather; 23 = Step-mother; 24 = Step-father; 25 = Aunt; 26 = Uncle; 28 = Both parents; 31 = Grandmother from mother side; 32 = Grandfather from mother side; 33 = Grandmother from father side; 34 = Grandfather from father side; 36 = Brother; 37 = Sister; 38 = Cousin; 39 = Female caregiver; 40 = Male caregiver; 41 = Female child; 42 = Male child; 43 = Spouse/Mate; 44 = Friend; 45 = Parent; 46 = Significant other; 47 = Sibling; 48 = Son/Daughter; 49 = Son-in-law/Daughter-in law; 50 = Other Relative; 51 = Paid caregiver; 52 = Friends; 53 = Roommate; 54 = Supervisor; 55 = Mother's boyfriend; 56 = Other parental figure; 57 = Summary; 58 = Counselor; 59 = Other female relative; 60 = Other male relative; 61 = Non-relative ; 62 = Maternal Aunt; 63 = Maternal Uncle; 64 = Maternal Cousin; 65 = Paternal Aunt; 66 = Paternal Uncle; 67 = Paternal Cousin; 68 = Biological/Adoptive Mother and Grandmother; 69 = Biological/Adoptive Mother and Stepmother and Grandmother; 70 = Biological/Adoptive Mother and Grandmother and Foster Father; 71 = Biological/Adoptive Mother and Stepmother and Foster Mother; 72 = Biological/Adoptive Mother and Foster Mother; 73 = Biological/Adoptive Mother and Biological/Adoptive Father; 74 = Biological/Adoptive Mother and Stepmother and Biological/Adoptive Father; 75 = Biological/Adoptive Mother and Other; 76 = Biological/Adoptive Mother and Stepmother and Stepfather; 77 = Biological/Adoptive Mother and Stepfather; 78 = Biological/Adoptive Mother and Grandfather; 79 = Biological/Adoptive Mother and Stepmother and Foster Father; 80 = Biological/Adoptive Mother and Stepmother; 81 = Guardian, female; 82 = Other female; 83 = Guardian, male; 84 = Other male; 85 = Other/Grandparent/Nanny; 86 = Mother, Father, Guardian; 87 = Daughter, son, grandchild; 88 = Professional (e.g., social worker, nurse, therapist, psychiatrist, or group home staff); -999 = Missing; 89 = Biological parent; 90 = Other; 91 = Stepparent; 92 = Adoptive parent; 93 = Foster parent; 94 = Co-worker; 95 = Independent Evaluator

In [286]:
relationship = {
    'Biological mom': 1,
    'Biological dad': 2,
    'Grandparent': 3,
    'Special education (sped) teacher': 4,
    'General education teacher': 5,
    'Occupational therapist': 6,
    'Speech and language therapist': 7,
    'Behavioral therapist': 8,
    'Paraprofessional': 9,
    'Aide': 10,
    'Principal': 11,
    'Administrator': 12,
    'Content teacher': 14,
    'Parent center director': 15,
    'Self': 16,
    'Adoptive mother': 17,
    'Adoptive father': 18,
    'Foster mother': 19,
    'Foster father': 20,
    'Grandmother': 21,
    'Grandfather': 22,
    'Step-mother': 23,
    'Step-father': 24,
    'Aunt': 25,
    'Uncle': 26,
    'Both parents': 28,
    'Grandmother from mother side': 31,
    'Grandfather from mother side': 32,
    'Grandmother from father side': 33,
    'Grandfather from father side': 34,
    'Brother': 36,
    'Sister': 37,
    'Cousin': 38,
    'Female caregiver': 39,
    'Male caregiver': 40,
    'Female child': 41,
    'Male child': 42,
    'Spouse/Mate': 43,
    'Friend': 44,
    'Parent': 45,
    'Significant other': 46,
    'Sibling': 47,
    'Son/Daughter': 48,
    'Son-in-law/Daughter-in-law': 49,
    'Other Relative': 50,
    'Paid caregiver': 51,
    'Friends': 52,
    'Roommate': 53,
    'Supervisor': 54,
    "Mother's boyfriend": 55,
    'Other parental figure': 56,
    'Summary': 57,
    'Counselor': 58,
    'Other female relative': 59,
    'Other male relative': 60,
    'Non-relative': 61,
    'Maternal Aunt': 62,
    'Maternal Uncle': 63,
    'Maternal Cousin': 64,
    'Paternal Aunt': 65,
    'Paternal Uncle': 66,
    'Paternal Cousin': 67,
    'Biological/Adoptive Mother and Grandmother': 68,
    'Biological/Adoptive Mother and Stepmother and Grandmother': 69,
    'Biological/Adoptive Mother and Grandmother and Foster Father': 70,
    'Biological/Adoptive Mother and Stepmother and Foster Mother': 71,
    'Biological/Adoptive Mother and Foster Mother': 72,
    'Biological/Adoptive Mother and Biological/Adoptive Father': 73,
    'Biological/Adoptive Mother and Stepmother and Biological/Adoptive Father': 74,
    'Biological/Adoptive Mother and Other': 75,
    'Biological/Adoptive Mother and Stepmother and Stepfather': 76,
    'Biological/Adoptive Mother and Stepfather': 77,
    'Biological/Adoptive Mother and Grandfather': 78,
    'Biological/Adoptive Mother and Stepmother and Foster Father': 79,
    'Biological/Adoptive Mother and Stepmother': 80,
    'Guardian, female': 81,
    'Other female': 82,
    'Guardian, male': 83,
    'Other male': 84,
    'Other/Grandparent/Nanny': 85,
    'Mother, Father, Guardian': 86,
    'Daughter, son, grandchild': 87,
    'Professional (e.g., social worker, nurse, therapist, psychiatrist, or group home staff)': 88,
    'Missing': -999,
    'Biological parent': 89,
    'Other': 90,
    'Stepparent': 91,
    'Adoptive parent': 92,
    'Foster parent': 93,
    'Co-worker': 94,
    'Independent Evaluator': 95
}
rev_relationship = {v: k for k, v in relationship.items()}

In [287]:
teacher_dict = {
    'Special education (sped) teacher': 4,
    'General education teacher': 5,
    'Content teacher': 14,
}
rev_teacher_dict = {v: k for k, v in teacher_dict.items()}


In [288]:
parent_dict = {    'Biological mom': 1,
    'Biological dad': 2,
    'Grandparent': 3,
    'Adoptive mother': 17,
    'Adoptive father': 18,
    'Foster mother': 19,
    'Foster father': 20,
    'Grandmother': 21,
    'Grandfather': 22,
    'Step-mother': 23,
    'Step-father': 24,
    'Both parents': 28,
    "Mother's boyfriend": 55,
    'Other parental figure': 56,
    'Biological/Adoptive Mother and Grandmother': 68,
    'Biological/Adoptive Mother and Stepmother and Grandmother': 69,
    'Biological/Adoptive Mother and Grandmother and Foster Father': 70,
    'Biological/Adoptive Mother and Stepmother and Foster Mother': 71,
    'Biological/Adoptive Mother and Foster Mother': 72,
    'Biological/Adoptive Mother and Biological/Adoptive Father': 73,
    'Biological/Adoptive Mother and Stepmother and Biological/Adoptive Father': 74,
    'Biological/Adoptive Mother and Other': 75,
    'Biological/Adoptive Mother and Stepmother and Stepfather': 76,
    'Biological/Adoptive Mother and Stepfather': 77,
    'Biological/Adoptive Mother and Grandfather': 78,
    'Biological/Adoptive Mother and Stepmother and Foster Father': 79,
    'Biological/Adoptive Mother and Stepmother': 80,
    'Guardian, female': 81,    
    'Guardian, male': 83,
    'Mother, Father, Guardian': 86,
    'Biological parent': 89,
    'Stepparent': 91,
    'Adoptive parent': 92,
    'Foster parent': 93,
    }
rev_parent_dict = {v: k for k, v in parent_dict.items()}

In [289]:
baseline_var = ['src_subject_id', 'interview_date', 'interview_age', 'sex', 'site']

In [290]:
ssrs.keys()

Index(['collection_id', 'ssrs01_id', 'dataset_id', 'subjectkey',
       'src_subject_id', 'interview_date', 'interview_age', 'sex',
       'days_baseline', 'assbdic',
       ...
       'c_s_ssq_emp', 'c_s_ssq_sc', 'c_s_ssq_tot_rs', 'c_s_ssq_tot_ss',
       'socpart27', 'socpart28', 'socpart29', 'socpart30', 'timepoint_label',
       'collection_title'],
      dtype='object', length=304)

In [291]:
ssrs_dict = {
    'relationship' : 'relationship', # just use relationship directly
    'ssrs_ss_mean' :'ssptossx', #social skilla
    'ssrs_ss_std' : 'ssptosst',
    'ssrs_int_mean' : 'sspintx', #internalizing 
    'ssrs_int_std' : 'sspintt'
}

ssrs_col2extract_ = [val for val in ssrs_dict.values()] #valiable columns to extracts
ssrs_col2extract = np.concatenate((baseline_var, ssrs_col2extract_)) # add basline vars 
print(ssrs_col2extract)
#ssrs_t_ss_tot_raw = 't_c_ssq_tot_rs' #teacher # no data 
#ssrs_t_int_raw = 't_c_ssq_int' # teacher }

for var in ssrs_dict.values(): 
    print(var, ssrs[var].dropna().shape)
    
for var in relationship.values():
    count = ssrs['relationship'].value_counts().get(var)
    if  count is not None:
        print(rev_relationship[var], count)
    
#extract parent answers 
# ssrs_parent_mask = ssrs['relationship'].isin(rev_parent_dict)
# ssrs_teacher_mask = ssrs['relationship'].isin(rev_teacher_dict)
# ssrs_parent = ssrs[ssrs_parent_mask]
# ssrs_teacher = ssrs[ssrs_teacher_mask]
# print(ssrs.shape, ssrs_parent.shape, ssrs_teacher.shape)
# print(ssrs_parent['src_subject_id'].unique().shape, ssrs_teacher['src_subject_id'].unique().shape)

ssrs_p_t_mask = ssrs['relationship'].isin(rev_parent_dict ) | ssrs['relationship'].isin(rev_teacher_dict)
ssrs_p_t = ssrs[ssrs_p_t_mask][ssrs_col2extract]

ssrs_p_t['p_t_bool'] =  np.where(ssrs_p_t['relationship'].isin(rev_parent_dict), 1, # code all parent souce ratings as 1, teacher source ratings as 2 
                                 np.where(ssrs_p_t['relationship'].isin(rev_teacher_dict), 2, 0))

print(ssrs_p_t['relationship'].value_counts().get(0), ssrs_p_t['relationship'].value_counts().get(1), ssrs_p_t['relationship'].value_counts().get(2)) #check why let values than individualp parent + teacher 


['src_subject_id' 'interview_date' 'interview_age' 'sex' 'site'
 'relationship' 'ssptossx' 'ssptosst' 'sspintx' 'sspintt']
relationship (11038,)
ssptossx (10804,)
ssptosst (10804,)
sspintx (10955,)
sspintt (10955,)
Biological mom 3147
Biological dad 1635
Special education (sped) teacher 28
General education teacher 2097
Content teacher 1627
Foster mother 21
Foster father 8
Grandmother 97
Grandfather 54
Step-mother 42
Step-father 187
Aunt 31
Uncle 5
Other Relative 12
None 3147 1635


### set types

In [292]:
ssrs_p_t[['interview_age', 'relationship', 'p_t_bool', 'site']] = ssrs_p_t[['interview_age', 'relationship', 'p_t_bool', 'site']].astype(int)
ssrs_p_t[['sspintx', 'sspintt', 'ssptossx','ssptosst']]= ssrs_p_t[['sspintx', 'sspintt', 'ssptossx','ssptosst']].astype(float)
print(ssrs_p_t.dtypes)

src_subject_id     object
interview_date     object
interview_age       int64
sex                object
site                int64
relationship        int64
ssptossx          float64
ssptosst          float64
sspintx           float64
sspintt           float64
p_t_bool            int64
dtype: object


### SNAP 

In [293]:
snap_dict = {
    'relationship' : 'relationship',
    'snap_inatt_mean' : 'snainatx', #inattentuin 
    'snap_inatt_tot' :'snainatt', # hyperactie 
    'snap_hyp_mean' : 'snahypax',
    'snap_hyp_tot' : 'snahypat',
    'snap_imp_mean' : 'snaimpux', #impusive 
    'snap_imp_tot': 'snaimput',
    'snap_odd_mean' :'snaoddx', #oppositional defiant 
    'snap_odd_tot' :  'snaoddt'

}

snap_col2extract_ = [val for val in snap_dict.values()] #valiable columns to extracts
snap_col2extract = np.concatenate((baseline_var, snap_col2extract_)) # add basline vars 
print(snap_col2extract)

for var in snap_dict.values(): 
    print(var, snap[var].dropna().shape)

#snap_inatt_sum =  'snap_inattn_totalscore'  # no data 
#snap_inatt_av = 'snap_inattn_avg'
#snap_hyp_sum = 'snap_hyp_totalscore'
#snap_hyp_av = 'snap_hyp_avg'
for var in relationship.values():
    count = snap['relationship'].value_counts().get(var)
    if  count is not None:
        print(rev_relationship[var], count)
        
# snap_parent_mask = snap['relationship'].isin(rev_parent_dict) #extract parent figures 
# snap_teacher_mask = snap['relationship'].isin(rev_teacher_dict)
# snap_parent = snap[snap_parent_mask][snap_col2extract] # parent figures only relevant columns
# snap_teacher = snap[snap_teacher_mask][snap_col2extract]

snap_p_t_mask = snap['relationship'].isin(rev_parent_dict ) | snap['relationship'].isin(rev_teacher_dict)
snap_p_t = snap[snap_p_t_mask][snap_col2extract]

snap_p_t['p_t_bool'] =  np.where(snap_p_t['relationship'].isin(rev_parent_dict), 1, # code all parent souce ratings as 1, teacher source ratings as 2 
                                 np.where(snap_p_t['relationship'].isin(rev_teacher_dict), 2, 0))

# print(snap.shape, snap_parent.shape,  snap_teacher.shape, snap_p_t.shape)
# print(snap_parent['src_subject_id'].unique().shape, snap_teacher['src_subject_id'].unique().shape)

print(snap_p_t['relationship'].value_counts().get(0), snap_p_t['relationship'].value_counts().get(1), snap_p_t['relationship'].value_counts().get(2)) #check why let values than individualp parent + teacher 

['src_subject_id' 'interview_date' 'interview_age' 'sex' 'site'
 'relationship' 'snainatx' 'snainatt' 'snahypax' 'snahypat' 'snaimpux'
 'snaimput' 'snaoddx' 'snaoddt']
relationship (14580,)
snainatx (14530,)
snainatt (14530,)
snahypax (14488,)
snahypat (14488,)
snaimpux (14447,)
snaimput (14447,)
snaoddx (14489,)
snaoddt (14489,)
Biological mom 2585
Biological dad 1167
Special education (sped) teacher 47
General education teacher 2694
Content teacher 2849
Foster mother 20
Foster father 10
Grandmother 86
Grandfather 53
Step-mother 29
Step-father 117
Aunt 23
None 2585 1167


### set types

In [294]:
snap_p_t[['interview_age', 'relationship', 'p_t_bool', 'site']] = snap_p_t[['interview_age', 'relationship', 'p_t_bool', 'site']].astype(int)
snap_p_t[['snainatx', 'snainatt', 'snahypax', 'snahypat','snaimpux', 'snaimput', 'snaoddx', 'snaoddt']]= snap_p_t[['snainatx', 'snainatt', 'snahypax', 'snahypat','snaimpux', 'snaimput', 'snaoddx', 'snaoddt']].astype(float)
print(snap_p_t.dtypes)

src_subject_id     object
interview_date     object
interview_age       int64
sex                object
site                int64
relationship        int64
snainatx          float64
snainatt          float64
snahypax          float64
snahypat          float64
snaimpux          float64
snaimput          float64
snaoddx           float64
snaoddt           float64
p_t_bool            int64
dtype: object


### MASC

In [295]:
masc_dict = {
    'relationship' : 'relationship', 
    'masc_tot_T_score' : 'masc_masctotalt'}  #total score 
    #'masc10_tit_imputed_vals' : 'masctotv'}
    
masc_col2extract_ = [val for val in masc_dict.values()] #valiable columns to extracts
masc_col2extract = np.concatenate((baseline_var, masc_col2extract_)) # add basline vars 
print(masc_col2extract)

for var in masc_dict.values(): 
    print(var, masc[var].dropna().shape)
    
for var in relationship.values():
    count = masc['relationship'].value_counts().get(var)
    if  count is not None:
        print(rev_relationship[var], count)

masc_child = masc[masc_col2extract]
print(masc.shape, masc_child.shape)

['src_subject_id' 'interview_date' 'interview_age' 'sex' 'site'
 'relationship' 'masc_masctotalt']
relationship (4703,)
masc_masctotalt (4703,)
Self 608
(4703, 214) (4703, 7)


### set types

In [296]:
masc_child.dtypes
masc_child[['interview_age', 'relationship', 'site']] = masc_child[['interview_age', 'relationship', 'site']].astype(int)
masc_child['masc_masctotalt'] = masc_child['masc_masctotalt'].astype(float)
print(masc_child.dtypes)

src_subject_id      object
interview_date      object
interview_age        int64
sex                 object
site                 int64
relationship         int64
masc_masctotalt    float64
dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  masc_child[['interview_age', 'relationship', 'site']] = masc_child[['interview_age', 'relationship', 'site']].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  masc_child['masc_masctotalt'] = masc_child['masc_masctotalt'].astype(float)


### Parent-Child


In [297]:
pc_dict = {
    'pc_relationship' : 'relationship', 
    'pc_dominance_mean' : 'pcrcpax', #power assertion
    'pc_pro_social_mean' : 'pcrcprx' #personal closeness 
}
for var in pc_dict.values(): 
    print(var, pc[var].dropna().shape)

pc_col2extract_ = [val for val in pc_dict.values()] #valiable columns to extracts
pc_col2extract = np.concatenate((baseline_var, pc_col2extract_)) # add basline vars 
print(pc_col2extract)

for var in relationship.values():
    count = pc['relationship'].value_counts().get(var)
    if  count is not None:
        print(rev_relationship[var], count)

pc_parent_mask = pc['relationship'].isin(rev_parent_dict)
pc_teacher_mask = pc['relationship'].isin(rev_teacher_dict) # no data 
pc_parent = pc[pc_parent_mask][pc_col2extract]
pc_teacher = pc[pc_teacher_mask][pc_col2extract]
print(pc.shape, pc_parent.shape, pc_teacher.shape)
print(pc_parent['src_subject_id'].unique().shape, pc_teacher['src_subject_id'].unique().shape)


relationship (8712,)
pcrcpax (8667,)
pcrcprx (8673,)
['src_subject_id' 'interview_date' 'interview_age' 'sex' 'site'
 'relationship' 'pcrcpax' 'pcrcprx']
Biological mom 269
Biological dad 191
Grandmother 14
Grandfather 9
Step-mother 15
Step-father 23
(8712, 80) (521, 8) (0, 8)
(82,) (0,)


In [298]:
pc_parent.keys()

Index(['src_subject_id', 'interview_date', 'interview_age', 'sex', 'site',
       'relationship', 'pcrcpax', 'pcrcprx'],
      dtype='object')

### set types 

In [299]:
pc_parent[['interview_age', 'relationship', 'site']] = pc_parent[['interview_age', 'relationship', 'site']].astype(int)
pc_parent[['pcrcpax', 'pcrcprx']]= pc_parent[['pcrcpax', 'pcrcprx']].astype(float)
print(pc_parent.dtypes)

src_subject_id     object
interview_date     object
interview_age       int64
sex                object
site                int64
relationship        int64
pcrcpax           float64
pcrcprx           float64
dtype: object


### Wechsler

In [300]:
wechsler_dict = {
    'relationship' : 'relationship',
    'wiat_reading_sc': 'w1readb', # scaled scores 
    'wiat_math_sc' : 'w2math', 
    'wiat_read_sc' : 'w3spell'
}

wechsler_col2extract_ = [val for val in wechsler_dict.values()] #valiable columns to extracts
wechsler_col2extract = np.concatenate((baseline_var, wechsler_col2extract_)) # add basline vars 
print(wechsler_col2extract)

for var in wechsler_dict.values(): 
    print(var, wechsler[var].dropna().shape)

for var in relationship.values():
    count = wechsler['relationship'].value_counts().get(var)
    if  count is not None:
        print(rev_relationship[var], count)

wechsler_child = wechsler[wechsler_col2extract]
print(wechsler.shape, wechsler_child.shape)

['src_subject_id' 'interview_date' 'interview_age' 'sex' 'site'
 'relationship' 'w1readb' 'w2math' 'w3spell']
relationship (6243,)
w1readb (6242,)
w2math (6241,)
w3spell (4572,)
Self 4196
(6243, 406) (6243, 9)


In [301]:
wechsler_child.keys()

Index(['src_subject_id', 'interview_date', 'interview_age', 'sex', 'site',
       'relationship', 'w1readb', 'w2math', 'w3spell'],
      dtype='object')

### set types

In [302]:
wechsler_child[['interview_age', 'relationship', 'site']] = wechsler_child[['interview_age', 'relationship', 'site']].astype(int)
wechsler_child[['w1readb', 'w2math', 'w3spell']] = wechsler_child[['w1readb', 'w2math', 'w3spell']].astype(float)
print(wechsler_child.dtypes)

src_subject_id     object
interview_date     object
interview_age       int64
sex                object
site                int64
relationship        int64
w1readb           float64
w2math            float64
w3spell           float64
dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wechsler_child[['interview_age', 'relationship', 'site']] = wechsler_child[['interview_age', 'relationship', 'site']].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wechsler_child[['w1readb', 'w2math', 'w3spell']] = wechsler_child[['w1readb', 'w2math', 'w3spell']].astype(float)


In [303]:
(ssrs.sjtyp == 1).sum()

7398

#### Treatment groups 


In [304]:
end_treat_file = 'debrief1401.txt'
end_treat = pd.read_csv(Path(data_root, end_treat_file), delimiter='\t')

treat_group = end_treat[['src_subject_id', 'trtname']].astype(str)
treat_group['trtname'].unique()

sub_m = treat_group.query("trtname == 'M'") #medi
sub_c = treat_group.query("trtname == 'C'") #'combined'
sub_p = treat_group.query("trtname == 'P'") # psychosocial 
treat_group.dtypes

src_subject_id    object
trtname           object
dtype: object

In [305]:
treat_group.dropna().shape

(434, 2)

In [306]:
print((treat_group['src_subject_id'].isin(snap.src_subject_id)).sum())
print(snap.src_subject_id.isin(treat_group['src_subject_id']).sum())
print(snap.src_subject_id.unique().shape)

433
8590
(868,)


In [307]:
snap_p_t_treat = pd.merge(snap_p_t, treat_group, how='inner', on = 'src_subject_id') #table with relevant snap vales, rater, and treatment group 
ssrs_p_t_treat = pd.merge(ssrs_p_t, treat_group, how='inner', on = 'src_subject_id') #table with relevant snap vales, rater, and treatment group 
masc_c_treat = pd.merge(masc_child, treat_group, how='inner', on = 'src_subject_id') #table with relevant snap vales, rater, and treatment group 
pc_p_treat = pd.merge(pc_parent, treat_group, how='inner', on = 'src_subject_id') #table with relevant snap vales, rater, and treatment group 
wechsler_c_treat = pd.merge(wechsler_child, treat_group, how='inner', on = 'src_subject_id') #table with relevant snap vales, rater, and treatment group 

#print(snap_p_t_treat.info(), ssrs_p_t_treat.info(), masc_c_treat.info(), pc_p_treat.info(), wechsler_c_treat.info())

In [308]:
snap_p_t_treat

Unnamed: 0,src_subject_id,interview_date,interview_age,sex,site,relationship,snainatx,snainatt,snahypax,snahypat,snaimpux,snaimput,snaoddx,snaoddt,p_t_bool,trtname
0,P1434,12/26/2004,203,M,2,1,1.78,16.0,1.50,9.0,1.33,4.0,2.13,17.0,1,P
1,P1435,09/03/1995,77,F,6,1,3.00,27.0,3.00,18.0,3.00,9.0,2.75,22.0,1,M
2,P1435,07/21/1996,88,F,6,1,1.89,17.0,2.83,17.0,3.00,9.0,2.38,19.0,1,M
3,P1435,10/02/1996,90,F,6,1,1.78,16.0,1.83,11.0,1.67,5.0,2.38,19.0,1,M
4,P1435,04/30/1997,97,F,6,1,0.89,8.0,1.00,6.0,0.67,2.0,1.75,14.0,1,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5472,P1842,08/21/1998,115,M,4,4,2.22,20.0,0.67,4.0,0.67,2.0,0.63,5.0,2,M
5473,P1842,10/05/2001,153,M,4,14,1.11,10.0,0.00,0.0,0.33,1.0,0.38,3.0,2,M
5474,P1842,10/09/2001,153,M,4,14,1.00,9.0,0.00,0.0,0.33,1.0,0.63,5.0,2,M
5475,P1842,02/23/2003,169,M,4,4,1.22,11.0,0.00,0.0,0.00,0.0,0.00,0.0,2,M


In [309]:
snap_p_t_treat.dtypes

src_subject_id     object
interview_date     object
interview_age       int64
sex                object
site                int64
relationship        int64
snainatx          float64
snainatt          float64
snahypax          float64
snahypat          float64
snaimpux          float64
snaimput          float64
snaoddx           float64
snaoddt           float64
p_t_bool            int64
trtname            object
dtype: object

In [310]:
rp.summary_cont(snap_p_t_treat.groupby('trtname')['snainatx'])





Unnamed: 0_level_0,N,Mean,SD,SE,95% Conf.,Interval
trtname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
C,1896,1.5033,0.8204,0.0188,1.4664,1.5403
M,1589,1.5238,0.857,0.0215,1.4817,1.566
P,1971,1.6849,0.8056,0.0181,1.6493,1.7205
