Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Medicare and Medicaid values in cps.csv.gz file #185

Merged
merged 10 commits into from
Aug 10, 2018
Binary file modified cps_data/cps.csv.gz
Binary file not shown.
19 changes: 18 additions & 1 deletion cps_data/finalprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,25 @@ def adjust(data, targets):
def benefits(data, other_ben):
"""
Distribute benefits from non-models benefit programs and create total
benefits variable
benefits variable.
Replaces Medicare and Medicaid values with set amounts
"""
# replace medicare and medicaid
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code should run faster than lines 348-354:

medicare_cols = 'MCARE_VAL' + pd.Series((np.arange(16) + 1).astype(str))
medicaid_cols = 'MCAID_VAL' + pd.Series((np.arange(16) + 1).astype(str))

count_medicare = data[medicare_cols].astype(bool).sum(axis=1)
count_medicaid = data[medicaid_cols].astype(bool).sum(axis=1)

See https://drive.google.com/file/d/1Fw8rcvcERKs9llMf6dfOVAPuqwuqUUah for an example.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the tip!

medicare_cols = 'MCARE_VAL' + pd.Series((np.arange(15) + 1).astype(str))
medicaid_cols = 'MCAID_VAL' + pd.Series((np.arange(15) + 1).astype(str))
count_medicare = data[medicare_cols].astype(bool).sum(axis=1)
count_medicaid = data[medicaid_cols].astype(bool).sum(axis=1)
weighted_count_mcare = (count_medicare * data['s006']).sum()
weighted_count_mcaid = (count_medicaid * data['s006']).sum()
weighted_mcare = (data['mcare_ben'] * data['s006']).sum()
weighted_mcaid = (data['mcaid_ben'] * data['s006']).sum()
mcare_amt = weighted_mcare / weighted_count_mcare
mcaid_amt = weighted_mcaid / weighted_count_mcaid
data[medicare_cols] = data[medicare_cols].astype(bool) * mcare_amt
data[medicaid_cols] = data[medicaid_cols].astype(bool) * mcaid_amt
data['mcare_ben'] = data[medicare_cols].sum(axis=1)
data['mcaid_ben'] = data[medicaid_cols].sum(axis=1)

other_ben['2014_cost'] *= 1e6

# Distribute other benefits
Expand Down
6 changes: 3 additions & 3 deletions tests/cps_agg_expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ filer 437439 0 1
fips 12469516 1 56
h_seq 22850538907 1 99461
housing_ben 60379435 0 53253
mcaid_ben 904042846 0 692753
mcare_ben 1700697749 0 691961
mcaid_ben 888211102 0 98440
mcare_ben 1778073024 0 92976
n1820 36555 0 6
n21 711316 0 11
n24 226097 0 10
nu05 45400 0 4
nu13 163487 0 10
nu18 281400 0 12
other_ben 512349609 0 232456
other_ben 507328438 0 53790
s006 16296813000 100 561900
snap_ben 141713659 0 26569
ssi_ben 103019670 0 64378
Expand Down
18 changes: 9 additions & 9 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,27 +181,27 @@ def check_cps_benefits(data):
expect_ben_stat = dict()
# .. maximum value per filing unit for benefit
expect_ben_stat['max'] = {
'mcare': 691961, # <--- will be fixed after switch to actuarial value
'mcaid': 692753, # <--- will be fixed after switch to actuarial value
'mcare': 92976,
'mcaid': 98440,
'ssi': 64378,
'snap': 26569,
'wic': 4972,
'tanf': 159407, # <--- SEEMS ABSURD ($13,284/month)
'housing': 53253,
'vet': 169920, # <--- HIGH ($14,160/month)VA hospital costs or what?
'other': 232456 # <--- SEEMS ABSURD ($19,371/month)
'other': 53790
}
# .. minimum value per filing unit for positive benefit
expect_ben_stat['min'] = {
'mcare': 1, # <--- will be fixed after switch to actuarial value
'mcaid': 2, # <--- will be fixed after switch to actuarial value
'mcare': 11622,
'mcaid': 7031,
'ssi': 1, # <--- SEEMS LOW
'snap': 9, # <--- SEEMS LOW
'wic': 241,
'tanf': 1, # <--- SEEMS LOW
'housing': 1265,
'vet': 9890, # <--- is this actuarial value of VA hospital costs?
'other': 1 # <--- SEEMS LOW
'other': 3
}
# .. mean value per filing unit of positive benefit
expect_ben_stat['avg'] = {
Expand All @@ -213,7 +213,7 @@ def check_cps_benefits(data):
'tanf': 9117,
'housing': 7048,
'vet': 29912,
'other': 4323
'other': 4321
}
# compare actual and expected benefit statistics
error_msg = ''
Expand All @@ -237,9 +237,9 @@ def check_cps_benefits(data):
exp_maxben = expect_ben_stat['max'][bname]
if not np.allclose([maxben], [exp_maxben], rtol=0, atol=0.1):
msg = '\nCPS {}_ben maxben={} != {}'
error_msg += msg.format(bname, minben, exp_maxben)
error_msg += msg.format(bname, maxben, exp_maxben)
expect_avgben = expect_ben_stat['avg'][bname]
if not np.allclose([avgben], [expect_avgben], rtol=0, atol=0.5):
if not np.allclose([avgben], [expect_avgben], rtol=0, atol=0.6):
msg = '\nCPS {}_ben avgben={:.2f} != {:.2f}'
error_msg += msg.format(bname, avgben, expect_avgben)
if error_msg:
Expand Down