In [1]:
import numpy as np
import pandas as pd
# Will not submit each raw data file: just need one of each (Psychopy and Testable).
# Leave 'anx_theta' and 'ang_theta' blank for now.
# Send the version of the PROMIS anxiety scale actually used to Joel (version 8a), highlighting the relevant columns in the template file.
# Only upload/put in these NDA submission files/use in RO1 analysis the original 277 files that initially were read.

In [2]:
# ***** IMPORT DATA FILE *****
summary_fall2020 = pd.read_csv('summary_fall2020.csv').iloc[0:277] # only include the first 277 people (as reported)
print(summary_fall2020)
n = summary_fall2020.shape[0] # Total number of participants in the data set.

     Unnamed: 0                 ident schedule           age     sex latinx  \
0             0          NDARKM468VR6   design         18-24    Male     No   
1             1          NDARJJ637MVP   design         18-24  Female     No   
2             2          NDARTX594HBJ   design         18-24  Female     No   
3             3          NDARRR883WV4   design         18-24  Female     No   
4             4          NDARMA408TN4   design         18-24    Male     No   
..          ...                   ...      ...           ...     ...    ...   
272         272  666579_201203_005413   design  18 years old  Female     No   
273         273  666579_201202_014604   design  20 years old    Male     No   
274         274  666579_201130_223828   design  19 years old  Female     No   
275         275  666579_201203_235030   design  18 years old    Male     No   
276         276  666579_201203_010208   design  18 years old  Female     No   

       race             employment  tutorial_0a_las

In [3]:
# ***** CALCULATE AGE IN MONTHS *****
age_code_inlab = pd.Series({'18-24': 12*21, # Take the middle of each bracket and multiply by 12 months/year.
                          '25-34': 12*29.5, 
                          '35-44': 12*39.5, 
                          '45-54': 12*49.4, 
                          '55-64': 12*59.5, 
                          '65-74': 12*69.5, 
                          '75+': 12*75})

age_code_online = pd.Series({'18 years old': 12*18, '19 years old': 12*19, '20 years old': 12*20, '21 years old': 12*21, '22 years old': 12*22, '23 years old': 12*23, '24 years or older': 12*24})

age_months = np.zeros(n)
for i in range(n):
    if summary_fall2020['source'].iloc[i] == 'inlab':
        age_months[i] = age_code_inlab[summary_fall2020['age'].iloc[i]]
    else:
        age_months[i] = age_code_online[summary_fall2020['age'].iloc[i]]

In [4]:
# ***** DEFINE RELEVANT DICTIONARIES *****

sex_code = pd.Series({'Female': 'F', 'Male': 'M', 'Intersex': 'O', 'Prefer not to answer': 'NR'})

platform_code = pd.Series({'inlab': 'Psychopy', 'online': 'Testable'})

# 1=Unemployed; 2=Unemployed Stay-at-home parent; 3=Part-Time Student; 4=Full-Time Student; 5=Disability/Unemployed; 6=Disability/Part-Time Employment; 7=Employed Part Time; 8=Employed Full Time; 9=Retired/Part-Time Employment; 10=Retired/Unemployed.; 11 = Student; 12 = Caring for children, elders and house; 13 = Unemployed, volunteer work
employment_code = pd.Series({'employed full-time': 8,
                           'employed part-time': 7,
                           'disabled (not working because of permanent or temporary disability)': 5,
                           'homemaker': 12,
                           'full-time student': 4,
                           'retired': 9, 
                           'seeking opportunities': 1,
                           'prefer not to answer': ''}) # There's no numeric code given for this response, so I'll leave it blank.
ethnic_code = pd.Series({'Yes': 'Hispanic/Latinx', 'No': 'Not Hispanic/Latinx', 'Prefer not to answer': ''})

In [5]:
# ***** CREATE DATAFRAMES FOR NDA SUBMISSION *****

fast = pd.DataFrame({'subjectkey': summary_fall2020['guid'].values, # GUID
                     'src_subject_id': summary_fall2020['ident'].values, # Lab ID
                     'interview_date': summary_fall2020['date'].values, # Date of Test MM/DD/YYYY
                     'interview_age': age_months, # Age (months)
                     'sex': sex_code[summary_fall2020['sex']].values, # M;F; O; NR
                     'taskname': n*['facial_affect_salience_task'],
                     'platform': platform_code[summary_fall2020['source']].values, # Software platform used to display the task.
                     'setting': summary_fall2020['source'].values, # Setting of the task, e.g. online at home, in lab, online in lab.
                     'data_file': n*[''], # Output data file from task. LEAVE BLANK FOR NOW.
                     'data_file_type': n*['']}) # Type of data file.  LEAVE BLANK FOR NOW.

ndar_subject01 = pd.DataFrame({'subjectkey': summary_fall2020['guid'].values,
                               'src_subject_id': summary_fall2020['ident'].values,
                               'interview_date': summary_fall2020['date'].values,
                               'interview_age': age_months, # Age in months at the time of the interview/test/sampling/imaging.
                               'sex': sex_code[summary_fall2020['sex']].values, # M = Male; F = Female; O=Other; NR = Not reported; Gender (if mismatch with natal sex, report Other or Not Reported)
                               'race': summary_fall2020['race'].values, # American Indian/Alaska Native; Asian; Hawaiian or Pacific Islander; Black or African American; White; More than one race; Unknown or not reported
                               'ethnic_group': ethnic_code[summary_fall2020['latinx']].values, # demo_ethnicity,demqrea1,ethnicity_dem,ethnicityid,hispanic
                               'phenotype': summary_fall2020['source'].values, # undergrad/online *** CHECK THIS ***
                               'employ_status': employment_code[summary_fall2020['employment']].values})

prang01 = pd.DataFrame({'subjectkey': summary_fall2020['guid'].values,
                        'src_subject_id': summary_fall2020['ident'].values,
                        'interview_age': age_months,
                        'interview_date': summary_fall2020['date'].values,
                        'sex': sex_code[summary_fall2020['sex']].values,
                        'edang03': summary_fall2020['promis_ang_resp1'].values, # I was irritated more than people knew
                        'edang09': summary_fall2020['promis_ang_resp2'].values, # I felt angry
                        'edang15': summary_fall2020['promis_ang_resp3'].values, # I felt like I was ready to explode
                        'edang30': summary_fall2020['promis_ang_resp4'].values, # I was grouchy
                        'edang35': summary_fall2020['promis_ang_resp5'].values, # I felt annoyed
                        'anger_rs': summary_fall2020['promis_ang_sum'].values,
                        'anger_ts': summary_fall2020['promis_ang'].values,
                        'anger_se': n*[''], # LEAVE BLANK FOR NOW.
                        'anger_theta': n*[''], # LEAVE BLANK FOR NOW.
                        'version_form': n*['PROMIS Item Bank v. 1.1 – Emotional Distress - Anger - Short Form 5a']}) # Form used/assessment name

preda01 = pd.DataFrame({'subjectkey': summary_fall2020['guid'].values,
                        'src_subject_id': summary_fall2020['ident'].values,
                        'interview_age': age_months, # Age in months at the time of the interview/test/sampling/imaging.
                        'interview_date': summary_fall2020['date'].values, # Date on which the interview/genetic test/sampling/imaging/biospecimen was completed. MM/DD/YYYY
                        'sex': sex_code[summary_fall2020['sex']].values,
                        'edanx01': summary_fall2020['promis_anx_resp1'].values, # I felt fearful
                        'edanx40': summary_fall2020['promis_anx_resp2'].values, # I found it hard to focus on anything other than my anxiety
                        'edanx41': summary_fall2020['promis_anx_resp3'].values, # My worries overwhelmed me
                        'edanx53': summary_fall2020['promis_anx_resp4'].values, # I felt uneasy
                        'edanx46': summary_fall2020['promis_anx_resp5'].values, # I felt nervous
                        'edanx07': summary_fall2020['promis_anx_resp6'].values, # I felt like I needed help for my anxiety
                        'edanx05': summary_fall2020['promis_anx_resp7'].values, # I felt anxious
                        'edanx54': summary_fall2020['promis_anx_resp8'].values, # I felt tense
                        'anx_rs': summary_fall2020['promis_anx_sum'].values, # PROMIS Anxiety raw score
                        'anx_ts': summary_fall2020['promis_anx'].values, # PROMIS Anxiety T score
                        'anx_se': n*[''], # LEAVE BLANK FOR NOW.
                        'anx_theta': n*[''], # LEAVE BLANK FOR NOW.
                        'version_form': n*['PROMIS Item Bank v1.0 – Emotional Distress – Anxiety – Short Form 8a']}) # Form used/assessment name

print(fast.head())
print(ndar_subject01.head())
print(prang01.head())
print(preda01.head())

     subjectkey src_subject_id interview_date  interview_age sex  \
0  NDARKM468VR6   NDARKM468VR6      9/18/2020          252.0   M   
1  NDARJJ637MVP   NDARJJ637MVP     10/29/2020          252.0   F   
2  NDARTX594HBJ   NDARTX594HBJ     10/07/2020          252.0   F   
3  NDARRR883WV4   NDARRR883WV4     10/20/2020          252.0   F   
4  NDARMA408TN4   NDARMA408TN4     10/07/2020          252.0   M   

                      taskname  platform setting data_file data_file_type  
0  facial_affect_salience_task  Psychopy   inlab                           
1  facial_affect_salience_task  Psychopy   inlab                           
2  facial_affect_salience_task  Psychopy   inlab                           
3  facial_affect_salience_task  Psychopy   inlab                           
4  facial_affect_salience_task  Psychopy   inlab                           
     subjectkey src_subject_id interview_date  interview_age sex    race  \
0  NDARKM468VR6   NDARKM468VR6      9/18/2020          252.

In [6]:
# ***** EXPORT FILES *****
fast.to_csv('fast.csv', index = False)
ndar_subject01.to_csv('ndar_subject01.csv', index = False)
prang01.to_csv('prang01.csv', index = False)
preda01.to_csv('preda01.csv', index = False)