In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

#### Read in first dataset - Form I dataset includes baseline demographic and clinical information of persons who met eligibility criteria.

In [None]:
f1 = pd.read_csv('C:/Users/dmora/Documents/NSS/Capstone/data/f1_public_2021.csv')
f1

#### Create df of base variables

In [None]:
f1_base = (
    f1
    [[
        'UniID', 'AAdmDt', 'ARbAdmDt', 'ADisDt',
        'AI2ADays', 'AI2RhADa', 'AInjAge', 'ASex', 
        'APResInj', 'APResDis',
        'AMarStIj', 'AEducLvl','ABPHQ1', 
        'ABPHQ2', 'ABPHQ9', 'ABPHQ6', 'ABPHQMDS',
        'ABPHQSDS', 'APrLvlSt', 'AFmIncLv'
        
        
    ]]
   
)

f1_base

#### Rename columns for easier understanding.

In [None]:
f1_base = f1_base.rename(columns={'UniID' : 'unique_id', 'AAdmDt' : 'sys_adm_dt', 'ARbAdmDt' : 'reh_adm_dt', 'ADisDt' : 'discharge_dt', 
                          'AI2ADays' : 'days_inj_to_sys', 'AI2RhADa' : 'days_inj_to_reh',  'AInjAge' : 'age_at_injury', 'ASex': 'sex',
                          'APResInj' : 'res_at_inj','APResDis' : 'res_at_dis', 
                          'AMarStIj' : 'marital_stat_ai', 'AEducLvl' : 'educ_lvl_ai', 'ABPHQ1' : 'interest_or_pleasure', 
                          'ABPHQ2' : 'depressed', 'ABPHQ9' : 'self_harm', 'ABPHQ6' : 'feel_bad', 'ABPHQMDS': 'depressive_syndrome', 
                          'ABPHQSDS' : 'severity_of_depression', 'APrLvlSt' : 'occup_status', 'AFmIncLv' : 'income_level'})

f1_base

#### Fill in n/a values in rehab_adm_dt column, so we can then change to int for year consistency across the df.

In [None]:
f1_base['reh_adm_dt'] = f1_base['reh_adm_dt'].fillna(0)
f1_base

#### Change dtype of reh_adm_dt to integer

In [None]:
f1_base['reh_adm_dt'] = f1_base['reh_adm_dt'].astype('int')

#### Run loop on sex column to create gender column to include gender name

In [None]:
for ind, row in f1_base.iterrows():
    if row.sex == 1:
        f1_base.loc[ind, 'gender'] = 'm'
    elif row.sex == 2:
        f1_base.loc[ind, 'gender'] = 'f'
    elif row.sex == 3:
        f1_base.loc[ind, 'gender'] = 't'
    elif row.sex == 9:
        f1_base.loc[ind, 'gender'] = 'u'

#### Drop sex column

In [None]:
f1_base = f1_base.drop('sex', axis = 1)
f1_base

#### Run loop on res_at_inj column to create res_at_inj column to include type of residence instead of #

In [None]:
for ind, row in f1_base.iterrows():
    if row.res_at_inj == 1:
        f1_base.loc[ind, 'res_at_injb'] = 'pri_res'
    elif row.res_at_inj == 2:
        f1_base.loc[ind, 'res_at_injb'] = 'hospital'
    elif row.res_at_inj == 3:
        f1_base.loc[ind, 'res_at_injb'] = 'nurs_home'
    elif row.res_at_inj == 4:
        f1_base.loc[ind, 'res_at_injb'] = 'group'
    elif row.res_at_inj == 5:
        f1_base.loc[ind, 'res_at_injb'] = 'correctional'
    elif row.res_at_inj == 6:
        f1_base.loc[ind, 'res_at_injb'] = 'motel'
    elif row.res_at_inj == 7:
        f1_base.loc[ind, 'res_at_injb'] = 'deceased'
    elif row.res_at_inj == 8:
        f1_base.loc[ind, 'res_at_injb'] = 'other'
    elif row.res_at_inj == 9:
        f1_base.loc[ind, 'res_at_injb'] = 'homeless'    
    elif row.res_at_inj == 10:
        f1_base.loc[ind, 'res_at_injb'] = 'assisted'    
    elif row.res_at_inj == 99:
        f1_base.loc[ind, 'res_at_injb'] = 'unknown'     

#### Drop original numerical res_at_inj column

In [None]:
f1_base = f1_base.drop('res_at_inj', axis = 1)
f1_base

#### Run loop on res_at_dis column to create res_at_dis column to include type of residence instead of #

In [None]:
for ind, row in f1_base.iterrows():
    if row.res_at_dis == 1:
        f1_base.loc[ind, 'res_at_disb'] = 'pri_res'
    elif row.res_at_dis == 2:
        f1_base.loc[ind, 'res_at_disb'] = 'hospital'
    elif row.res_at_dis == 3:
        f1_base.loc[ind, 'res_at_disb'] = 'nurs_home'
    elif row.res_at_dis == 4:
        f1_base.loc[ind, 'res_at_disb'] = 'group'
    elif row.res_at_dis == 5:
        f1_base.loc[ind, 'res_at_disb'] = 'correctional'
    elif row.res_at_dis == 6:
        f1_base.loc[ind, 'res_at_disb'] = 'motel'
    elif row.res_at_dis == 7:
        f1_base.loc[ind, 'res_at_disb'] = 'deceased'
    elif row.res_at_dis == 8:
        f1_base.loc[ind, 'res_at_disb'] = 'other'
    elif row.res_at_dis == 9:
        f1_base.loc[ind, 'res_at_disb'] = 'homeless'    
    elif row.res_at_dis == 10:
        f1_base.loc[ind, 'res_at_disb'] = 'assisted'    
    elif row.res_at_dis == 99:
        f1_base.loc[ind, 'res_at_disb'] = 'unknown'

#### Drop original res_at_dis column

In [None]:
f1_base = f1_base.drop('res_at_dis', axis = 1)
f1_base

In [None]:
f1_base = f1_base.rename(columns={'res_at_injb' : 'res_at_inj', 'res_at_disb' : 'res_at_dis'})
f1_base

In [None]:
for ind, row in f1_base.iterrows():
    if row.marital_stat_ai == 1:
        f1_base.loc[ind, 'marital_stat_aib'] = 'single'
    elif row.marital_stat_ai == 2:
        f1_base.loc[ind, 'marital_stat_aib'] = 'married'
    elif row.marital_stat_ai == 3:
        f1_base.loc[ind, 'marital_stat_aib'] = 'divorced'
    elif row.marital_stat_ai == 4:
        f1_base.loc[ind, 'marital_stat_aib'] = 'separated'
    elif row.marital_stat_ai == 5:
        f1_base.loc[ind, 'marital_stat_aib'] = 'widowed'
    elif row.marital_stat_ai == 6:
        f1_base.loc[ind, 'marital_stat_aib'] = 'other'
    elif row.marital_stat_ai == 7:
        f1_base.loc[ind, 'marital_stat_aib'] = 'living_with'
    elif row.marital_stat_ai == 9:
        f1_base.loc[ind, 'marital_stat_aib'] = 'unknown'    

In [None]:
f1_base

In [None]:
f1_base = f1_base.drop('marital_stat_ai', axis = 1)
f1_base

In [None]:
for ind, row in f1_base.iterrows():
    if row.educ_lvl_ai == 1:
        f1_base.loc[ind, 'educ_lvl_aib'] = '8th_or_less'
    elif row.educ_lvl_ai == 2:
        f1_base.loc[ind, 'educ_lvl_aib'] = '9th_11th'
    elif row.educ_lvl_ai == 3:
        f1_base.loc[ind, 'educ_lvl_aib'] = 'hsd'
    elif row.educ_lvl_ai == 4:
        f1_base.loc[ind, 'educ_lvl_aib'] = 'ad'
    elif row.educ_lvl_ai == 5:
        f1_base.loc[ind, 'educ_lvl_aib'] = 'bd'
    elif row.educ_lvl_ai == 6:
        f1_base.loc[ind, 'educ_lvl_aib'] = 'md'
    elif row.educ_lvl_ai == 7:
        f1_base.loc[ind, 'educ_lvl_aib'] = 'doc'
    elif row.educ_lvl_ai == 8:
        f1_base.loc[ind, 'educ_lvl_aib'] = 'other'    
    elif row.educ_lvl_ai == 9:
        f1_base.loc[ind, 'educ_lvl_aib'] = 'unknown'    

In [None]:
f1_base = f1_base.drop('educ_lvl_ai', axis = 1)
f1_base

In [None]:
for ind, row in f1_base.iterrows():
    if row.interest_or_pleasure == 0:
        f1_base.loc[ind, 'int_or_pleas'] = 'none'
    elif row.interest_or_pleasure == 1:
        f1_base.loc[ind, 'int_or_pleas'] = 'several_days'
    elif row.interest_or_pleasure == 2:
        f1_base.loc[ind, 'int_or_pleas'] = 'more_than_half'
    elif row.interest_or_pleasure == 3:
        f1_base.loc[ind, 'int_or_pleas'] = 'nearly_every_day'
    elif row.interest_or_pleasure == 7:
        f1_base.loc[ind, 'int_or_pleas'] = 'declined'
    elif row.interest_or_pleasure == 9:
        f1_base.loc[ind, 'int_or_pleas'] = 'unknown'

In [None]:
f1_base = f1_base.drop('interest_or_pleasure', axis = 1)
f1_base

In [None]:
f1_base

In [None]:
for ind, row in f1_base.iterrows():
    if row.depressed == 0:
        f1_base.loc[ind, 'depressedb'] = 'none'
    elif row.depressed == 1:
        f1_base.loc[ind, 'depressedb'] = 'several_days'
    elif row.depressed == 2:
        f1_base.loc[ind, 'depressedb'] = 'more_than_half'
    elif row.depressed == 3:
        f1_base.loc[ind, 'depressedb'] = 'nearly_every_day'
    elif row.depressed == 7:
        f1_base.loc[ind, 'depressedb'] = 'declined'
    elif row.depressed == 9:
        f1_base.loc[ind, 'depressedb'] = 'unknown'

In [None]:
f1_base = f1_base.drop('depressed', axis = 1)
f1_base

In [None]:
for ind, row in f1_base.iterrows():
    if row.self_harm == 0:
        f1_base.loc[ind, 'self_harmb'] = 'none'
    elif row.self_harm == 1:
        f1_base.loc[ind, 'self_harmb'] = 'several_days'
    elif row.self_harm == 2:
        f1_base.loc[ind, 'self_harmb'] = 'more_than_half'
    elif row.self_harm == 3:
        f1_base.loc[ind, 'self_harmb'] = 'nearly_every_day'
    elif row.self_harm == 7:
        f1_base.loc[ind, 'self_harmb'] = 'declined'
    elif row.self_harm == 9:
        f1_base.loc[ind, 'self_harmb'] = 'unknown'

In [None]:
f1_base = f1_base.drop('self_harm', axis = 1)
f1_base

In [None]:
for ind, row in f1_base.iterrows():
    if row.feel_bad == 0:
        f1_base.loc[ind, 'feel_badb'] = 'none'
    elif row.feel_bad == 1:
        f1_base.loc[ind, 'feel_badb'] = 'several_days'
    elif row.feel_bad == 2:
        f1_base.loc[ind, 'feel_badb'] = 'more_than_half'
    elif row.feel_bad == 3:
        f1_base.loc[ind, 'feel_badb'] = 'nearly_every_day'
    elif row.feel_bad == 7:
        f1_base.loc[ind, 'feel_badb'] = 'declined'
    elif row.feel_bad == 9:
        f1_base.loc[ind, 'feel_badb'] = 'unknown'

In [None]:
f1_base = f1_base.drop('feel_bad', axis = 1)
f1_base

In [None]:
for ind, row in f1_base.iterrows():
    if row.depressive_syndrome == 0:
        f1_base.loc[ind, 'dep_syn'] = 'none'
    elif row.depressive_syndrome == 1:
        f1_base.loc[ind, 'dep_syn'] = 'major'
    elif row.depressive_syndrome == 2:
        f1_base.loc[ind, 'dep_syn'] = 'other'
    elif row.depressive_syndrome == 9:
        f1_base.loc[ind, 'dep_syn'] = 'unknown'

In [None]:
f1_base = f1_base.drop('depressive_syndrome', axis = 1)
f1_base

In [None]:
for ind, row in f1_base.iterrows():
    if row.occup_status == 1:
        f1_base.loc[ind, 'occ_stat'] = 'working'
    elif row.occup_status == 2:
        f1_base.loc[ind, 'occ_stat'] = 'homemaker'
    elif row.occup_status == 3:
        f1_base.loc[ind, 'occ_stat'] = 'otj_train'
    elif row.occup_status == 4:
        f1_base.loc[ind, 'occ_stat'] = 'shel_work'
    elif row.occup_status == 5:
        f1_base.loc[ind, 'occ_stat'] = 'retired'
    elif row.occup_status == 6:
        f1_base.loc[ind, 'occ_stat'] = 'student'
    elif row.occup_status == 7:
        f1_base.loc[ind, 'occ_stat'] = 'unemployed'
    elif row.occup_status == 8:
        f1_base.loc[ind, 'occ_stat'] = 'retired_dis'
    elif row.occup_status == 9:
        f1_base.loc[ind, 'occ_stat'] = 'retired_nondis'   
    elif row.occup_status == 10:
        f1_base.loc[ind, 'occ_stat'] = 'other'
    elif row.occup_status == 99:
        f1_base.loc[ind, 'occ_stat'] = 'unknown'       

In [None]:
f1_base = f1_base.drop('occup_status', axis = 1)
f1_base

In [None]:
for ind, row in f1_base.iterrows():
    if row.income_level == 1:
        f1_base.loc[ind, 'inc_lvl1'] = 'lt25k'
    elif row.income_level == 2:
        f1_base.loc[ind, 'inc_lvl1'] = '25k_49999'
    elif row.income_level == 3:
        f1_base.loc[ind, 'inc_lvl1'] = '50k_74999'
    elif row.income_level == 4:
        f1_base.loc[ind, 'inc_lvl1'] = '75pl'
    elif row.income_level == 6:
        f1_base.loc[ind, 'inc_lvl1'] = 'not_sure'
    elif row.income_level == 7:
        f1_base.loc[ind, 'inc_lvl1'] = 'declined'
    elif row.income_level == 9:
        f1_base.loc[ind, 'inc_lvl1'] = 'unknown'   

In [None]:
f1_base = f1_base.drop('income_level', axis = 1)
f1_base

In [None]:
f1_base = f1_base.rename(columns={'marital_stat_aib' : 'mar_stat1', 'educ_lvl_aib' : 'educ_lvl1', 'int_or_pleas' : 'int_pleas1',
                                  'depressedb' : 'dep1', 'self_harmb' : 'self_harm1', 'feel_badb' : 'feel_bad1', 'dep_syn' : 'dep_syn1',
                                  'occ_stat' : 'occ_stat1', 'severity_of_depression' : 'sev_dep', 'age_at_injury' : 'age1', 'res_at_inj' : 'res1'})

f1_base

In [None]:
f1_base.sev_dep.value_counts()

In [None]:
f2 = pd.read_csv('C:/Users/dmora/Documents/NSS/Capstone/data/f2_public_2021.csv')
f2

In [None]:
f2_base = (
    f2
    [[
        'UniID', 'BYear', 'BPlcRes', 'BMarStat',
        'BMarStCh', 'BEducLvl', 'BBPHQ1', 'BBPHQ2',
        'BBPHQ9', 'BBPHQ6', 'BBPHQMDS', 'BBPHQSDS',
        'BPrLvlSt', 'BFmIncLv'
        
        
    ]]
   
)

f2_base

In [None]:
f2_base = f2_base.rename(columns={'UniID' : 'unique_id', 'BYear' : 'post_inj_yr', 'BPlcRes' : 'res_pi', 'BMarStat' : 'marital_stat_pi',
        'BMarStCh' : 'marital_stat_change', 'BEducLvl' : 'educ_lvl_pi', 'BBPHQ1' : 'interest_or_pleaseure_pi', 'BBPHQ2' : 'depressed_pi',
        'BBPHQ9' : 'self_harm_pi','BBPHQ6' : 'feel_bad_pi','BBPHQMDS': 'depressive_syndrome_pi', 'BBPHQSDS' : 'severity_of_depression_pi',
        'BPrLvlSt' : 'occup_status_pi', 'BFmIncLv' : 'income_level'})

f2_base

In [None]:
f2_base['res_pi'] = f2_base['res_pi'].fillna(0)
f2_base

In [None]:
f2_base['res_pi'] = f2_base['res_pi'].astype('int')

In [None]:
f2_base

In [None]:
for ind, row in f2_base.iterrows():
    if row.res_pi == 1:
        f2_base.loc[ind, 'res_postb'] = 'pri_res'
    elif row.res_pi == 2:
        f2_base.loc[ind, 'res_postb'] = 'hospital'
    elif row.res_pi == 3:
        f2_base.loc[ind, 'res_postb'] = 'nurs_home'
    elif row.res_pi == 4:
        f2_base.loc[ind, 'res_postb'] = 'group'
    elif row.res_pi == 5:
        f2_base.loc[ind, 'res_postb'] = 'correctional'
    elif row.res_pi == 6:
        f2_base.loc[ind, 'res_postb'] = 'motel'
    elif row.res_pi == 8:
        f2_base.loc[ind, 'res_postb'] = 'other'
    elif row.res_pi == 9:
        f2_base.loc[ind, 'res_postb'] = 'homeless'    
    elif row.res_pi == 10:
        f2_base.loc[ind, 'res_postb'] = 'assisted'    
    elif row.res_pi == 99:
        f2_base.loc[ind, 'res_postb'] = 'unknown'   

In [None]:
f2_base

In [None]:
f2_base = f2_base.drop('res_pi', axis = 1)
f2_base

In [None]:
f2_base = f2_base.rename(columns={'res_postb' : 'res_pi'})

f2_base