In [1]:
import pandas as pd, numpy as np

#### Link to Online Codebook
https://www.census.gov/data-tools/demo/uccb/sippdict?sortby=topic

#### Load Data Dictionary Files & Format df Column Names

In [129]:
# Read data dictionary csv's, create and display data dictionary df
sipp_dict_1 = pd.read_csv('../data/raw/sipp_2018/sippdict_1_of_2.csv')
sipp_dict_2 = pd.read_csv('../data/raw/sipp_2018/sippdict_2_of_2.csv')
sipp_dict = pd.concat([sipp_dict_1, sipp_dict_2])
sipp_dict.columns = [name.lower().replace(' ', '_') for name in sipp_dict.columns]
sipp_dict.head()

Unnamed: 0,variable,topic,subtopic,survey_years,response_code,description,question,data_type,universe,universe_description,user_notes,record_level
0,EAWBCRACK,Adult and Child Well Being,Adult Well-Being,2018,1. Yes||2. No,Are there cracks in the ceiling or walls?,Are there cracks in the ceiling or walls?,Numeric,"THHLDSTATUS in (1,2,3,4)",All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household
1,EAWBCRACK,Adult and Child Well Being,Adult Well-Being,"2014 Wave 4, 2014 Wave 3, 2014 Wave 2, 2014 Wa...",1. Yes||2. No,Are there cracks in the ceiling or walls?,Are there cracks in the ceiling or walls?,Numeric,All interviewed households.,All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household
2,EAWBGAS,Adult and Child Well Being,Adult Well-Being,2018,1. Yes||2. No,Was ... unable to pay the utility bills?,Was ... unable to pay the utility bills?,Numeric,"THHLDSTATUS in (1,2,3,4)",All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household
3,EAWBGAS,Adult and Child Well Being,Adult Well-Being,"2014 Wave 4, 2014 Wave 3, 2014 Wave 2, 2014 Wa...",1. Yes||2. No,Was ... unable to pay the utility bills?,Was ... unable to pay the utility bills?,Numeric,All interviewed households.,All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household
4,EAWBHOLES,Adult and Child Well Being,Adult Well-Being,2018,1. Yes||2. No,Are there holes in the floor?,Are there holes in the floor?,Numeric,"THHLDSTATUS in (1,2,3,4)",All interviewed households (asked of reference...,"Descriptions, response codes, and universes ma...",Household


#### Topics & Subtopics

In [141]:
print(sipp_dict.topic.unique())
print(sipp_dict.subtopic.unique())

['Adult and Child Well Being' 'Assets' 'Child and Dependent Care'
 'Coverage' 'Demographics' 'Disability' 'Education Enrollment'
 'Fertility History' 'Health Care' 'Health Insurance' 'ID Variables'
 'Residences' 'Labor Force' 'Marital History'
 "Parents' Natality and Mortality" 'Poverty and Income' 'Programs'
 'Type 2 People']
['Adult Well-Being' 'Child Well-Being' 'Food Security'
 'Well-Being Recodes' 'Annuities and Trusts' 'Asset Type' 'Assets'
 'Assets Recodes' 'Business Value or Debt'
 'Business Value or Debt Details' 'Cars and Trucks'
 'Cars, Trucks, and Vans' 'Credit Card Debt - Self'
 'Educational Debt - Self' 'Educational Savings Accounts'
 'Joint Bank Accounts' 'Joint Credit Card Debt' 'Joint Educational Debt'
 'Joint Mutual Funds and Stocks' 'Joint Other Debt' 'Joint Real Estate'
 'Joint Rental Property' 'Life Insurance' 'Mobile Home Loan'
 'Mobile Homes' 'Other Debt - Self' 'Property' 'Property Loan'
 'Recreational Vehicles' 'Stocks and Mutual Funds' 'Unsecured Liabilities'


#### Filtering Cell

In [147]:
# Create boolean masks to drill down to dictionary variables

# Survey year filter
mask_2018 = sipp_dict.survey_years.str.contains('2018')

# Topic filter
topic = 'Assets'                              # << Type topic you want to filter by here 
list_topic = sipp_dict.topic.unique()         # Print this variable to view list of topics
mask_topic = sipp_dict.topic == topic

# Topic filter 2 (optiona)
topic2 = 'Demographics'                              
mask_topic2 = sipp_dict.topic == topic2

# Subtopic filter
subtopic = 'Commuting and Work Schedule'                              # << Type subtopic you want to filter by here 
list_subtopic = sipp_dict.subtopic.unique()   # Print this variable to view list of all subtopics
mask_subtopic = sipp_dict.subtopic == subtopic

# Exclusion masks
mask_flag = ~sipp_dict.description.str.contains('Status Flag')
mask_suppressed =  ~sipp_dict.description.str.contains('Suppressed')
mask_exclusions = mask_flag & mask_suppressed

# Combines all filters groups. Exclude a filter group by commenting out line
mask_final = (mask_2018 
              & (mask_topic | mask_topic2)
              & mask_exclusions
             )

#### Create & Display Filtered Data Dictionary

In [148]:
# Display filtered data dictionary
pd.set_option('display.max_colwidth', None)   # display all column text
filtered_data_dict = sipp_dict[mask_final][['variable', 'data_type', 'description', 'topic', 'subtopic', 'record_level']]
filtered_data_dict.head()

Unnamed: 0,variable,data_type,description,topic,subtopic,record_level
159,EOWN_ANN,Numeric,Owned any annuities as of the last day of the reference period.,Assets,Annuities and Trusts,Person
160,EOWN_ANNEQ,Numeric,Whether respondent owned any equity in annuities.,Assets,Annuities and Trusts,Person
161,EOWN_TR,Numeric,Owned any trusts as of the last day of the reference period.,Assets,Annuities and Trusts,Person
162,EOWN_TREQ,Numeric,Whether respondent owned any equity in trusts.,Assets,Annuities and Trusts,Person
163,TANNINC,Numeric,Amount of income received from annuities during the reference period.,Assets,Annuities and Trusts,Person


#### Save Filtered Data Dictionary to csv

In [149]:
# Optional - save to csv. I find it easier to browse large tables on excel than jupyter.
filepath = '~/desktop/filtered_data_dict.csv'  # this filepath saves to your desktop on mac
filtered_data_dict.to_csv(filepath, index=False)