Filter the public use datasets for support providers

In [1]:
import pandas as pd

Set some options

In [2]:
pd.set_option("display.max_columns", 150)
pd.options.display.float_format = '{:,}'.format

Load the public use data

In [3]:
def load_wave(filename):
    wave = (
        pd.read_csv("../output/wave1.csv", dtype = {"uid": "object"})
        .assign(
            supports_parents = lambda x: x['eothsuprt1yn'] == 1,
            supports_children = lambda x: x['eothsuprt2yn'] == 1,
            tpartotamt = lambda x: x['tpartotamt'].fillna(0),
            tkidtotamt = lambda x: x['tkidtotamt'].fillna(0)
        )
    )
    
    return wave

In [4]:
wave1 = load_wave("../output/wave1.csv")
wave4 = load_wave("../output/wave4.csv")

## People who support parents

Group by age and filter by month and support status

In [5]:
def group_age(age):
    if age < 30:
        return "under 30"
    elif age < 40:
        return "30-39"
    elif age < 50:
        return "40-49"
    elif age < 60:
        return "50-59"
    else:
        return "60 years and over"

In [6]:
def group_generation(year):
    if year <= 1945:
        return "Silent"
    if year <= 1964:
        return "Boomer"
    if year <= 1980:
        return "Generation x"
    if year <= 1996:
        return "Millennials"
    if year <= 2019:
        return "Gen Z"

In [7]:
def filter_parents(wave):
    parents = wave.loc[
        lambda x: (x['monthcode'] == 12.0) & (x['supports_parents'] == True)
    ].assign(
        # age group depends on tage (age as of last birthday)
        age_group = lambda frame: frame['tage'].apply( group_age ),
        # generation depends on tdob_byear (year of birth)
        generation = lambda frame: frame['tdob_byear'].apply( group_generation )
    ).fillna(0)
    
    return parents

In [8]:
parents1 = filter_parents(wave1)
parents4 = filter_parents(wave4)

In [9]:
parents1.to_csv("../output/w1_supports_parents.csv", index = False)
parents4.to_csv("../output/w4_supports_parents.csv", index = False)

## People who support children

In [10]:
def filter_children(wave):
    children = wave.loc[
        lambda x: (x['monthcode'] == 12.0) & (x['supports_children'] == True)
    ].assign(
        # age group depends on tage (age as of last birthday)
        age_group = lambda frame: frame['tage'].apply( group_age ),
        # generation depends on tdob_byear (year of birth)
        generation = lambda frame: frame['tdob_byear'].apply( group_generation )
    ).fillna(0)
    
    return children

In [11]:
children1 = filter_children(wave1)
children4 = filter_children(wave4)

Write out uniques

In [12]:
children1.to_csv("../output/w1_supports_children.csv", index = False)
children4.to_csv("../output/w4_supports_children.csv", index = False)

---
---
---