This script imputes Temporary Assistance for Needy Families (TANF) recipients dollar benefit amount to match the aggregates with United States Department of Health & Human Services (HHS) statistics for TANF. In this current version, we used 2015 March CPS data and HHS FY2014 caseload and expenditures data on TANF. Please refer to the documentation in the same folder for more details on methodology and assumptions. The output this script is an individual level dataset that contains CPS personal level ID (PERIDNUM), individual participation indicator (tanf_participation, 0 - not a recipient, 1 - current recipient on file, 2 - imputed recipient), and benefit amount.

Input: 2015 CPS (asec2015_pubuse.csv), number of recipients and their benefits amount by state in 2014 (TANF_administrative.csv)

Output: TANF_Imputation.csv

Additional Source links: https://www.acf.hhs.gov/ofa/programs/tanf/data-reports 

In [1]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import random
import statsmodels.formula.api as sm
import matplotlib.pyplot as plt

## Variables used in TANF

In [None]:
CPS_dataset = pd.read_csv('asec2015_pubuse.csv')
columns_to_keep = ['paw_val','paw_typ','paw_mon','fpawval','marsupwt','a_age','a_sex','wsal_val','semp_val','frse_val',
                  'ss_val','rtm_val','oi_val','oi_off','int_yn','uc_yn', 'uc_val','int_val','ssi_yn','ssikidyn',
                  'hfoodsp','a_famnum','a_maritl','fownu6','fownu18','gestfips','peridnum', 'h_seq', 'ffpos', 'i_pawtyp',
                 'fpersons']
CPS_dataset = CPS_dataset[columns_to_keep]
CPS_dataset.to_csv('TANF.csv', columns=columns_to_keep, index=False)

In [27]:
CPS_dataset = pd.read_csv('TANF.csv')

In [28]:
mapping = {'None or not in universe': 0, 'Not in universe': 0, 'None': 0}
CPS_dataset.replace(mapping, inplace=True)
income_var = ['paw_val', 'fpawval', 'wsal_val', 'semp_val', 'frse_val']
CPS_dataset[income_var] = CPS_dataset[income_var].astype(int).values

In [31]:
tanf_fam = CPS_dataset[['h_seq', 'ffpos', 'paw_mon']][(CPS_dataset.paw_typ== 'TANF/AFDC')&(CPS_dataset.a_age>'18')]
tanf_fam.rename(columns={'paw_mon':'paw_mon_child'}, inplace=True)
tanf_fam['tanf_fam'] = np.ones(len(tanf_fam))

In [32]:
CPS_dataset = CPS_dataset.merge(tanf_fam, on=['h_seq', 'ffpos'], how='left')

In [33]:
CPS_dataset.loc[(CPS_dataset.tanf_fam==1)&(CPS_dataset.a_age<='18'), 'paw_typ'] = 'TANF/AFDC'
CPS_dataset.paw_mon = np.where(CPS_dataset.paw_typ=='TANF/AFDC', CPS_dataset.paw_mon_child, CPS_dataset.paw_mon)

In [36]:
CPS_dataset['temp_indicator'] = np.where(CPS_dataset.paw_typ=='TANF/AFDC', 1, 0)
num_per_fam = CPS_dataset[['h_seq', 'ffpos', 'temp_indicator']].groupby(['h_seq', 'ffpos'], as_index=False).sum()

In [39]:
num_per_fam.rename(columns={'temp_indicator':'num_per_fam'}, inplace=True)

In [41]:
CPS_dataset = CPS_dataset.merge(num_per_fam, on=['h_seq', 'ffpos'], how='left')

In [43]:
CPS_dataset['paw_avg'] = np.divide(CPS_dataset.fpawval, CPS_dataset.num_per_fam + 0.0000001)
CPS_dataset.paw_val = np.where(CPS_dataset.paw_typ=='TANF/AFDC', CPS_dataset.paw_avg, CPS_dataset.paw_val)

In [44]:
# TANF value & indicator
tanfvalue = pd.to_numeric(np.where(CPS_dataset.paw_typ== 'TANF/AFDC', CPS_dataset.paw_val, 0))
indicator = pd.to_numeric(np.where(CPS_dataset.paw_typ== 'TANF/AFDC', 1, 0))

In [45]:
TANF = DataFrame(tanfvalue.transpose())
TANF.columns = ['tanfvalue']

In [46]:
TANF['indicator'] = indicator
TANF['marsupwt'] = CPS_dataset.marsupwt
TANF['gestfips'] = CPS_dataset.gestfips
TANF['peridnum'] = CPS_dataset.peridnum

In [47]:
# Number of month receiving TANF
month = np.where(CPS_dataset.paw_mon == 'Twelve', 12, CPS_dataset.paw_mon)
month = np.where(CPS_dataset.paw_mon == 'One', 1, month)
month = pd.to_numeric(month)
TANF['month'] = month

In [48]:
#individual earned income
TANF['earned'] = CPS_dataset.wsal_val + CPS_dataset.semp_val + CPS_dataset.frse_val

In [49]:
# Unearned income
ss = pd.to_numeric(np.where(CPS_dataset.ss_val!='None or not in universe', CPS_dataset.ss_val, 0))
pension = pd.to_numeric(np.where(CPS_dataset.rtm_val!='None or not in universe', CPS_dataset.rtm_val, 0))
disability = pd.to_numeric(np.where(CPS_dataset.oi_off=='State disability payments', CPS_dataset.oi_val, 0))
unemploy = pd.to_numeric(np.where(CPS_dataset.uc_yn=='Yes', CPS_dataset.uc_val, 0))
interest = pd.to_numeric(np.where(CPS_dataset.int_yn=='Yes', CPS_dataset.int_val, 0))
unearned = ss + pension + disability + unemploy + interest #individual unearned income
TANF['unearned'] = unearned

In [50]:
# Net Income
TANF['net_income'] = TANF.earned + unearned

In [51]:
# Family TANF value
TANF['familyvalue'] = CPS_dataset.fpawval
familyindicator = np.where(TANF['familyvalue'] != 0, 1,0)
familyotherTANFindicator = familyindicator - indicator
TANF['indicatorOther'] = familyotherTANFindicator                               

In [52]:
# Prepare age information
age = np.where(CPS_dataset.a_age == "80-84 years of age",
                             random.randrange(80, 84),
                             CPS_dataset.a_age)
age = np.where(CPS_dataset.a_age == "85+ years of age",
                             random.randrange(85, 95),
                             age)
TANF['a_age'] = pd.to_numeric(age)

In [53]:
# Prepare gender inforamtion, 0 for male, 1 for female
TANF['sex'] = pd.to_numeric(np.where(CPS_dataset.a_sex == 'Male', 0, 1))

In [54]:
# Number of children
childunder6 = np.where(CPS_dataset.fownu6 == 'None, not in universe', 0, CPS_dataset.fownu6)
TANF['childunder6'] = pd.to_numeric(childunder6)
childunder18 = np.where(CPS_dataset.fownu18 == 'None, not in universe', 0, CPS_dataset.fownu18)
childunder18 = np.where(CPS_dataset.fownu18 == '9 or more', 9, childunder18)
TANF['childunder18'] = pd.to_numeric(childunder18)
TANF['child6to18'] = TANF['childunder18'] - TANF['childunder6']

In [55]:
# Status of Marriage
marriage = np.where(CPS_dataset.a_maritl == 'Married - civilian spouse', 1, 0)
marriage = np.where(CPS_dataset.a_maritl == 'Married - AF spouse present', 1, marriage)
marriage = np.where(CPS_dataset.a_maritl == 'Married - spouse absent (exc.', 1, marriage)
marriage = np.where(CPS_dataset.a_maritl == 'Separated', 1, marriage)
TANF['marriage'] = pd.to_numeric(marriage)

In [56]:
TANF['unemploy_indicator'] = np.where(CPS_dataset.uc_yn=='Yes', 1, 0)
TANF['ssi_indicator'] = np.where((CPS_dataset.ssi_yn=='Yes')|(CPS_dataset.ssikidyn=='Received SSI'), 1, 0)
TANF['snap_indicator'] = np.where(CPS_dataset.hfoodsp == "Yes",1,0)

## Regression Model

In [57]:
TANF['intercept'] = np.ones(len(TANF))
model = sm.Logit(endog=TANF.indicator, exog=TANF[['intercept','a_age', 'sex', 
                                                  'childunder6','child6to18', 'earned', 'unearned', 
                                                  'unemploy_indicator','ssi_indicator','snap_indicator',
                                                  'marriage']]).fit()
print model.summary()

Optimization terminated successfully.
         Current function value: 0.031598
         Iterations 12
                           Logit Regression Results                           
Dep. Variable:              indicator   No. Observations:               199099
Model:                          Logit   Df Residuals:                   199088
Method:                           MLE   Df Model:                           10
Date:                Fri, 08 Sep 2017   Pseudo R-squ.:                  0.2488
Time:                        15:15:05   Log-Likelihood:                -6291.1
converged:                       True   LL-Null:                       -8374.9
                                        LLR p-value:                     0.000
                         coef    std err          z      P>|z|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
intercept             -6.6683      0.102    -65.229      0.000        -6.869    -6.468
a_ag

In [58]:
probs = model.fittedvalues
TANF['probs'] = probs

## Import administrative data

In [59]:
admin = pd.read_csv('TANF_Administrative_1.csv',
                    dtype = {'Total Annual Benefits': np.float, 'Total Annual Recipient': np.float, 'Average Annual Benefits' : np.float})
admin.index = admin.Fips

In [60]:
# CPS total benefits and Administrative total benefits, annual basis
state_benefit = {}
state_recipients = {}
for state in admin.Fips:
    this_state = (TANF.gestfips==state)
    CPS_totalb = (TANF.tanfvalue * TANF.marsupwt)[this_state].sum()
    admin_totalb =  admin['Total Annual Benefits'][state] 
    CPS_totaln = (TANF.marsupwt[this_state & TANF.indicator==1] * TANF.month / 12).sum()
    admin_totaln =  admin['Total Annual Recipient'][state]

    temp = [admin.State[state], CPS_totalb, admin_totalb, CPS_totaln, admin_totaln]
    state_benefit[state] = temp
    
pre_augment_benefit = DataFrame(state_benefit).transpose()
pre_augment_benefit.columns = ['State', 'CPS total benefits','Admin total benefits',
                               'CPS total recipients','Admin total recipients']

In [61]:
pre_augment_benefit.to_csv('pre-blow-up.csv')

## Imputation

In [62]:
# caculate difference of SNAP stats and CPS aggregates on recipients number
# by state
diff = {'Fips':[],'Difference in Population':[],'Mean Benefit':[],'CPS Population':[],'TANF Population':[]}
diff['Fips'] = admin.Fips
current = (TANF.indicator==1)
for FIPS in admin.Fips:
        this_state = (TANF.gestfips==FIPS)
        current_tots = (TANF.marsupwt[current&this_state]*TANF.month).sum()/12
        valid_num = (TANF.marsupwt[current&this_state]*TANF.month).sum()/12 + 0.0000001
        current_mean = ((TANF.tanfvalue * TANF.marsupwt)[current&this_state].sum())/valid_num
        diff['CPS Population'].append(current_tots)
        diff['TANF Population'].append(float(admin['Total Annual Recipient'][admin.Fips == FIPS]))
        diff['Difference in Population'].append(float(admin['Total Annual Recipient'][admin.Fips == FIPS])- current_tots)
        diff['Mean Benefit'].append(current_mean)

In [63]:
d = DataFrame(diff)
d = d[['Fips', 'Mean Benefit', 'Difference in Population', 'CPS Population', 'TANF Population']]
d.index = d.Fips
d.to_csv('recipients.csv', index=False)

In [64]:
TANF['impute'] = np.zeros(len(TANF))
TANF['tanf_impute'] = np.zeros(len(TANF))

non_current = (TANF.indicator==0)
current = (TANF.indicator==1)
random.seed()

for FIPS in admin.Fips:
    
        print ('we need to impute', d['Difference in Population'][FIPS], 'for state', FIPS)
        
        if d['Difference in Population'][FIPS] < 0:
            continue
        else:
            this_state = (TANF.gestfips==FIPS)
            not_imputed = (TANF.impute==0)
            children = (TANF.a_age<=18)
            pool_index = TANF[this_state&not_imputed&non_current].index
            pool = DataFrame({'weight': TANF.marsupwt[pool_index], 'prob': probs[pool_index]},
                            index=pool_index)
            pool = pool.sort_values(by='prob', ascending=False)
            pool['cumsum_weight'] = pool['weight'].cumsum()
            pool['distance'] = abs(pool.cumsum_weight-d['Difference in Population'][FIPS])
            min_index = pool.sort_values(by='distance')[:1].index
            min_weight = int(pool.loc[min_index].cumsum_weight)
            pool['impute'] = np.where(pool.cumsum_weight<=min_weight+10 , 1, 0)
            TANF.impute[pool.index[pool['impute']==1]] = 1
            TANF.tanf_impute[pool.index[pool['impute']==1]] = admin['Average Annual Benefits'][FIPS]
           
        print ('Method1: regression gives', 
                TANF.marsupwt[(TANF.impute==1)&this_state].sum())

('we need to impute', 13800.659170000003, 'for state', 1)
('Method1: regression gives', 14270.02)
('we need to impute', 2623.2450003333333, 'for state', 2)
('Method1: regression gives', 2739.67)
('we need to impute', 20803.223336666666, 'for state', 4)
('Method1: regression gives', 20425.72)
('we need to impute', 6891.2933366666666, 'for state', 5)
('Method1: regression gives', 6209.34)
('we need to impute', 852548.30450000009, 'for state', 6)
('Method1: regression gives', 852447.8)
('we need to impute', 32943.81583, 'for state', 8)
('Method1: regression gives', 32840.06999999999)
('we need to impute', 3989.2816700000003, 'for state', 9)
('Method1: regression gives', 4468.6)
('we need to impute', 8661.8408299999992, 'for state', 10)
('Method1: regression gives', 8632.78)
('we need to impute', 6259.1741699999966, 'for state', 11)
('Method1: regression gives', 6260.3099999999995)
('we need to impute', 15007.385836666683, 'for state', 12)
('Method1: regression gives', 14590.65)
('we need 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [65]:
#Adjustment ratio
results = {}

imputed = (TANF.impute == 1)
has_val = (TANF.tanfvalue != 0)
no_val = (TANF.tanfvalue == 0)

for FIPS in admin.Fips:
    this_state = (TANF.gestfips==FIPS)
    
    current_total = (TANF.tanfvalue * TANF.marsupwt)[this_state].sum() 
    imputed_total = (TANF.tanf_impute * TANF.marsupwt)[this_state&imputed].sum()
    on_file = current_total + imputed_total

    admin_total = admin['Total Annual Benefits'][FIPS]
    
    adjust_ratio = admin_total / on_file
    this_state_num = [admin['State'][FIPS], on_file, admin_total, adjust_ratio]
    results[FIPS] = this_state_num
    

    TANF.tanf_impute = np.where(has_val&this_state, TANF.tanfvalue * adjust_ratio, TANF.tanf_impute)
    TANF.tanf_impute = np.where(no_val&this_state, TANF.tanf_impute * adjust_ratio, TANF.tanf_impute)

TANF["tanf_participation"] = np.zeros(len(TANF))
TANF["tanf_participation"] = np.where(TANF.impute==1, 2, 0)#Augmented
TANF["tanf_participation"] = np.where(has_val, 1, TANF.tanf_participation)#CPS 


r = DataFrame(results).transpose()
r.columns=['State', 'Imputed', 'Admin', 'adjust ratio']
r.to_csv('amount.csv', index=False)

In [69]:
TANF.to_csv('TANF_Imputation.csv', 
                   columns=['peridnum', 'tanf_participation','tanf_impute', 'tanfvalue'],
                   index=False)