### Data source: National Survey on Drug Use and Health (NSDUH)
https://www.datafiles.samhsa.gov/dataset/nsduh-2002-2019-ds0001-nsduh-2002-2019-ds0001

In [2]:
import pyreadstat
import pandas as pd

In [3]:
df, meta = pyreadstat.read_dta('NSDUH_2002_2019.dta')

In [6]:
cols = ['questid2', # id
        'year', # date
        'age2', # age
        'irsex', # sex
        'newrace2', # race
        'irfamin3', # level of income
        
        # covered by medicare, medicaid, champus, private, or group health insurance
        'medicare',
        'caidchip',
        'champus',
        'prvhltin',
        'grphltin',
        
        # type of mental health treatment received in past year (utilization)
        'amhinp2', #inpatient
        'amhoutp3', #outpatient
        'amhrx2', # prescription
        
        'hltinmnt', # health insurance covers mental or emotional difficulties
        
        'auunmtyr', # unmet need for MH care
        'auunrim2', # reasons for not getting MH care
        
        # how much paid out of pocket
        'aupinfm2', # inpatient
        'aupopamt', # outpatient
        
        # who paid for MH care
        'aupopmos', # outpatient
        # inpatient
        'aupinslf', # self or family who lives with you
        'aupinofm', # family who does not live with you
        'aupinphi', # private insurance
        'aupinmcr', # medicare
        'aupinmcd', # medicaid
        'aupinemp', # employer
        'aupinmil', # military
        'aupinpub', # other public source
        'aupinprv', # other private source
        'aupinfre' # care was free
        
        
        
       ]

In [7]:
df_MH = df[cols]

In [12]:
# rename columns
df_MH = df_MH.rename(columns = {'questid2':'ID', 
                     'age2':'age',
                     'irsex':'sex', 
                     'newrace2':'race', 
                     'irfamin3':'fam_income', 
                     'amhinp2':'rcvd_inpat',
                     'amhoutp3':'rcvd_outpat', 
                     'amhrx2':'rcvd_rx', 
                     'hltinmnt':'MH_covered', 
                     'auunmtyr':'unmet_need', 
                     'auunrim2':'no_care_reasons',
                     'aupinfm2':'amt_paid_inpat',
                     'aupopamt':'amt_paid_outpat', 
                     'aupopmos':'who_paid_outpat', 
                     'aupinslf':'fam_paid_inpat', 
                     'aupinofm':'fam_paid_inpat_away', 
                     'aupinphi':'private_ins_paid_inpat', 
                     'aupinmcr':'medicare_paid_inpat',
                     'aupinmcd':'medicaid_paid_inpat', 
                     'aupinemp':'employer_paid_inpat', 
                     'aupinmil':'military_paid_inpat',
                     'aupinpub':'other_pub_inpat',
                     'aupinprv':'other_private_inpat', 
                     'aupinfre':'care_free'})

In [13]:
df_MH

Unnamed: 0,ID,year,age,sex,race,fam_income,medicare,caidchip,champus,prvhltin,...,fam_paid_inpat,fam_paid_inpat_away,private_ins_paid_inpat,medicare_paid_inpat,medicaid_paid_inpat,employer_paid_inpat,military_paid_inpat,other_pub_inpat,other_private_inpat,care_free
0,02000024,2002,13,1,1,6,2,2,2,1,...,99,99,99,99,99,99,99,99,99,99
1,02000239,2002,12,2,5,7,2,2,2,1,...,99,99,99,99,99,99,99,99,99,99
2,02000248,2002,9,1,1,1,94,94,94,94,...,99,99,99,99,99,99,99,99,99,99
3,02000471,2002,12,1,6,1,2,2,2,1,...,99,99,99,99,99,99,99,99,99,99
4,02000630,2002,16,2,1,5,1,2,2,1,...,99,99,99,99,99,99,99,99,99,99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1005416,99985080,2019,16,1,1,4,2,2,2,2,...,99,99,99,99,99,99,99,99,99,99
1005417,99988589,2019,16,2,1,2,2,2,2,1,...,99,99,99,99,99,99,99,99,99,99
1005418,99994519,2019,14,2,1,2,2,2,2,1,...,99,99,99,99,99,99,99,99,99,99
1005419,99996339,2019,15,1,7,6,2,2,2,1,...,99,99,99,99,99,99,99,99,99,99


In [14]:
# read new dataframe to csv
df_MH.to_csv('newdata', sep = ',')

In [15]:
# check to make sure previous step worked
pd.read_csv('newdata')

Unnamed: 0.1,Unnamed: 0,ID,year,age,sex,race,fam_income,medicare,caidchip,champus,...,fam_paid_inpat,fam_paid_inpat_away,private_ins_paid_inpat,medicare_paid_inpat,medicaid_paid_inpat,employer_paid_inpat,military_paid_inpat,other_pub_inpat,other_private_inpat,care_free
0,0,2000024,2002,13,1,1,6,2,2,2,...,99,99,99,99,99,99,99,99,99,99
1,1,2000239,2002,12,2,5,7,2,2,2,...,99,99,99,99,99,99,99,99,99,99
2,2,2000248,2002,9,1,1,1,94,94,94,...,99,99,99,99,99,99,99,99,99,99
3,3,2000471,2002,12,1,6,1,2,2,2,...,99,99,99,99,99,99,99,99,99,99
4,4,2000630,2002,16,2,1,5,1,2,2,...,99,99,99,99,99,99,99,99,99,99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1005416,1005416,99985080,2019,16,1,1,4,2,2,2,...,99,99,99,99,99,99,99,99,99,99
1005417,1005417,99988589,2019,16,2,1,2,2,2,2,...,99,99,99,99,99,99,99,99,99,99
1005418,1005418,99994519,2019,14,2,1,2,2,2,2,...,99,99,99,99,99,99,99,99,99,99
1005419,1005419,99996339,2019,15,1,7,6,2,2,2,...,99,99,99,99,99,99,99,99,99,99
