In [2]:
import pandas as pd, numpy as np

#### Link to Online Codebook
https://www.census.gov/data-tools/demo/uccb/sippdict?sortby=topic

#### Load Data Dictionary Files & Format df Column Names

In [3]:
# Read data dictionary csv's, create and display data dictionary df
sipp_dict_1 = pd.read_csv('../data/raw/sipp_2018/sippdict_1_of_2.csv')
sipp_dict_2 = pd.read_csv('../data/raw/sipp_2018/sippdict_2_of_2.csv')
sipp_dict = pd.concat([sipp_dict_1, sipp_dict_2])
sipp_dict.columns = [name.lower().replace(' ', '_') for name in sipp_dict.columns]
sipp_dict.head()

Unnamed: 0,variable,topic,subtopic,survey_years,response_code,description,question,data_type,universe,universe_description,user_notes,record_level
0,EAWBCRACK,Adult and Child Well Being,Adult Well-Being,2018,1. Yes||2. No,Are there cracks in the ceiling or walls?,Are there cracks in the ceiling or walls?,Numeric,"THHLDSTATUS in (1,2,3,4)",All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household
1,EAWBCRACK,Adult and Child Well Being,Adult Well-Being,"2014 Wave 4, 2014 Wave 3, 2014 Wave 2, 2014 Wa...",1. Yes||2. No,Are there cracks in the ceiling or walls?,Are there cracks in the ceiling or walls?,Numeric,All interviewed households.,All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household
2,EAWBGAS,Adult and Child Well Being,Adult Well-Being,2018,1. Yes||2. No,Was ... unable to pay the utility bills?,Was ... unable to pay the utility bills?,Numeric,"THHLDSTATUS in (1,2,3,4)",All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household
3,EAWBGAS,Adult and Child Well Being,Adult Well-Being,"2014 Wave 4, 2014 Wave 3, 2014 Wave 2, 2014 Wa...",1. Yes||2. No,Was ... unable to pay the utility bills?,Was ... unable to pay the utility bills?,Numeric,All interviewed households.,All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household
4,EAWBHOLES,Adult and Child Well Being,Adult Well-Being,2018,1. Yes||2. No,Are there holes in the floor?,Are there holes in the floor?,Numeric,"THHLDSTATUS in (1,2,3,4)",All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household


#### Filtering Cell

In [10]:
# Create boolean masks to drill down to dictionary variables

# Survey year filter
mask_2018 = sipp_dict.survey_years.str.contains('2018')

# Topic filter
topic = 'Demographics'                        # << Type topic you want to filter by here 
list_topic = sipp_dict.topic.unique()         # Print this variable to view list of topics
mask_topic = sipp_dict.topic == topic

# Subtopic filter
subtopic = 'Age'                              # << Type subtopic you want to filter by here 
list_subtopic = sipp_dict.subtopic.unique()   # Print this variable to view list of all subtopics
mask_subtopic = sipp_dict.subtopic == subtopic

# Exclusion masks
mask_flag = ~sipp_dict.description.str.contains('Status Flag')
mask_suppressed =  ~sipp_dict.description.str.contains('Suppressed')
mask_exclusions = mask_flag & mask_suppressed

# Combines all filters groups. Exclude a filter group by commenting out line
mask_final = (mask_2018 
              & mask_topic 
              #& mask_subtopic
              & mask_exclusions
             )

#### Create & Display Filtered Data Dictionary

In [11]:
# Display filtered data dictionary
pd.set_option('display.max_colwidth', None)   # display all column text
filtered_data_dict = sipp_dict[mask_final][['variable', 'description', 'response_code', 'data_type', 'topic']]
filtered_data_dict.head()

Unnamed: 0,variable,description,response_code,data_type,topic
1709,EDOB_BMONTH,Month of birth,1. January||2. February||3. March||4. April||5. May||6. June||7. July||8. August||9. September||10. October||11. November||12. December,Numeric,Demographics
1711,TAGE,Age as of last birthday,0:87,Numeric,Demographics
1713,TAGE_EHC,Monthly age during the reference year.,0:87,Numeric,Demographics
1715,TDOB_BYEAR,Year of birth,1931:2017,Numeric,Demographics
1723,EAF_HHLD,Did anyone in this household ever serve on active duty in the U.S. Armed Forces?,1. Yes||2. No,Numeric,Demographics


#### Save Filtered Data Dictionary to csv

In [None]:
# Optional - save to csv. I find it easier to browse large tables on excel than jupyter.
filepath = '~/desktop/filtered_data_dict.csv'  # this filepath saves to your desktop on mac
filtered_data_dict.to_csv(filepath, index=False)