# Medicaid spending on prescription drugs

In [1]:
import pandas as pd
import numpy as np
pd.set_option("display.float_format", lambda x: "%.2f" % x) # Suppress scientific notation
# Enable browser notifications
%load_ext jupyternotify

<IPython.core.display.Javascript object>

## Import [data](https://www.nasbo.org/mainsite/reports-data/state-expenditure-report) on state expenditures from NASBO

In [2]:
budgets = pd.read_excel("data/medicaid_spending/1991-2017 Exp Report Data - 031518.xlsx", usecols=[0, 1, 22, 23, 24, 25, 26, 72, 73, 74, 75, 76], names=["year", "state", "medicaid_general", "medicaid_federal", "medicaid_other", "medicaid_bonds", "medicaid_total", "all_spending_general", "all_spending_federal", "all_spending_other", "all_spending_bonds", "all_spending_total"], sheet_name="State Exp Report Data")
budgets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1376 entries, 0 to 1375
Data columns (total 12 columns):
year                    1376 non-null int64
state                   1376 non-null object
medicaid_general        1355 non-null float64
medicaid_federal        1354 non-null float64
medicaid_other          1355 non-null float64
medicaid_bonds          1354 non-null float64
medicaid_total          1376 non-null float64
all_spending_general    1376 non-null float64
all_spending_federal    1376 non-null float64
all_spending_other      1376 non-null float64
all_spending_bonds      1376 non-null float64
all_spending_total      1376 non-null float64
dtypes: float64(10), int64(1), object(1)
memory usage: 129.1+ KB


Filter the data to years for 2008 and later.

In [3]:
budgets = budgets[budgets["year"] >= 2008].reset_index(drop=True)
budgets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 510 entries, 0 to 509
Data columns (total 12 columns):
year                    510 non-null int64
state                   510 non-null object
medicaid_general        500 non-null float64
medicaid_federal        500 non-null float64
medicaid_other          500 non-null float64
medicaid_bonds          500 non-null float64
medicaid_total          510 non-null float64
all_spending_general    510 non-null float64
all_spending_federal    510 non-null float64
all_spending_other      510 non-null float64
all_spending_bonds      510 non-null float64
all_spending_total      510 non-null float64
dtypes: float64(10), int64(1), object(1)
memory usage: 47.9+ KB


In [4]:
budgets.head(1)

Unnamed: 0,year,state,medicaid_general,medicaid_federal,medicaid_other,medicaid_bonds,medicaid_total,all_spending_general,all_spending_federal,all_spending_other,all_spending_bonds,all_spending_total
0,2008,Alabama,471.0,2899.0,1030.0,0.0,4400.0,8460.0,6291.0,4537.0,552.0,19840.0


Create a column that combines all state funding sources (general, other and bonds).

In [5]:
budgets["medicaid_state"] = budgets["medicaid_general"] + budgets["medicaid_other"] + budgets["medicaid_bonds"]
budgets["all_spending_state"] = budgets["all_spending_general"] + budgets["all_spending_other"] + budgets["all_spending_bonds"]
budgets.drop(["medicaid_general", "medicaid_other", "medicaid_bonds"], axis=1, inplace=True) # Drop component columns
budgets.drop(["all_spending_general", "all_spending_other", "all_spending_bonds"], axis=1, inplace=True) # Drop component columns
budgets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 510 entries, 0 to 509
Data columns (total 8 columns):
year                    510 non-null int64
state                   510 non-null object
medicaid_federal        500 non-null float64
medicaid_total          510 non-null float64
all_spending_federal    510 non-null float64
all_spending_total      510 non-null float64
medicaid_state          500 non-null float64
all_spending_state      510 non-null float64
dtypes: float64(6), int64(1), object(1)
memory usage: 32.0+ KB


Spot check the post-calculation numbers.

In [6]:
budgets.head(1)

Unnamed: 0,year,state,medicaid_federal,medicaid_total,all_spending_federal,all_spending_total,medicaid_state,all_spending_state
0,2008,Alabama,2899.0,4400.0,6291.0,19840.0,1501.0,13549.0


Reorder the columns.

In [7]:
budgets = budgets[["year", "state", "medicaid_state", "medicaid_federal", "medicaid_total", "all_spending_state", "all_spending_federal", "all_spending_total"]]
budgets.head(1)

Unnamed: 0,year,state,medicaid_state,medicaid_federal,medicaid_total,all_spending_state,all_spending_federal,all_spending_total
0,2008,Alabama,1501.0,2899.0,4400.0,13549.0,6291.0,19840.0


## Calculate the proportion of state budgets dedicated to Medicaid spending

In [8]:
budgets["medicaid_percentage_of_all_spending_total"] = (budgets["medicaid_total"] / budgets["all_spending_total"]) * 100
budgets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 510 entries, 0 to 509
Data columns (total 9 columns):
year                                         510 non-null int64
state                                        510 non-null object
medicaid_state                               500 non-null float64
medicaid_federal                             500 non-null float64
medicaid_total                               510 non-null float64
all_spending_state                           510 non-null float64
all_spending_federal                         510 non-null float64
all_spending_total                           510 non-null float64
medicaid_percentage_of_all_spending_total    500 non-null float64
dtypes: float64(7), int64(1), object(1)
memory usage: 35.9+ KB


What was the proportion of each state's 2016 budget dedicated to Medicaid spending?

In [9]:
budgets[budgets["year"] == 2016].sort_values("medicaid_percentage_of_all_spending_total", ascending=False)

Unnamed: 0,year,state,medicaid_state,medicaid_federal,medicaid_total,all_spending_state,all_spending_federal,all_spending_total,medicaid_percentage_of_all_spending_total
442,2016,Ohio,19423.6,6016.9,25440.5,55000.7,12449.9,67450.6,37.72
432,2016,Missouri,4423.0,4730.0,9153.0,16951.58,7676.78,24628.36,37.16
445,2016,Pennsylvania,11947.0,15961.0,27908.0,49281.0,27073.0,76354.0,36.55
421,2016,Indiana,2718.0,8553.0,11271.0,18958.0,12448.0,31406.0,35.89
450,2016,Tennessee,4119.0,6859.0,10978.0,19366.0,12567.0,31933.0,34.38
451,2016,Texas,17778.0,23696.0,41474.0,76841.43,45913.0,122754.43,33.79
436,2016,New Hampshire,761.0,1197.0,1958.0,3676.0,2158.0,5834.0,33.56
426,2016,Maine,1050.0,1605.0,2655.0,5504.0,2536.0,8040.0,33.02
412,2016,California,28323.0,53337.0,81660.0,160209.0,90690.0,250899.0,32.55
416,2016,Florida,9734.0,13539.0,23273.0,47013.0,25306.0,72319.0,32.18


In 2016, Medicaid spending accounted for at least a quarter of 26 states' budgets.

## Import the CMS [Medicaid drug utilization data](https://www.medicaid.gov/medicaid/prescription-drugs/state-drug-utilization-data/index.html)

Query the Medicaid drug utilization API for each year and calculate the Medicaid amount paid.

In [10]:
query_16 = "https://data.medicaid.gov/resource/4kp3-zsqr.json?$select=period_covered,sum(medicaid_amount_reimbursed)&$group=state_code,period_covered"
drug_spending_16 = pd.read_json(query_16)
query_15 = "https://data.medicaid.gov/resource/h5ii-2ar3.json?$select=period_covered,sum(medicaid_amount_reimbursed)&$group=state_code,period_covered"
drug_spending_15 = pd.read_json(query_15)
query_14 = "https://data.medicaid.gov/resource/hz5c-g52b.json?$select=period_covered,sum(medicaid_amount_reimbursed)&$group=state_code,period_covered"
drug_spending_14 = pd.read_json(query_14)
query_13 = "https://data.medicaid.gov/resource/kmyc-3n7k.json?$select=period_covered,sum(medicaid_amount_reimbursed)&$group=state_code,period_covered"
drug_spending_13 = pd.read_json(query_13)
query_12 = "https://data.medicaid.gov/resource/mkpf-yey3.json?$select=period_covered,sum(medicaid_amount_reimbursed)&$group=state_code,period_covered"
drug_spending_12 = pd.read_json(query_12)
query_11 = "https://data.medicaid.gov/resource/de79-47w8.json?$select=period_covered,sum(medicaid_amount_reimbursed)&$group=state_code,period_covered"
drug_spending_11 = pd.read_json(query_11)
query_10 = "https://data.medicaid.gov/resource/dgte-3gu8.json?$select=period_covered,sum(medicaid_amount_reimbursed)&$group=state_code,period_covered"
drug_spending_10 = pd.read_json(query_10)
query_09 = "https://data.medicaid.gov/resource/6tcm-8x84.json?$select=period_covered,sum(medicaid_amount_reimbursed)&$group=state_code,period_covered"
drug_spending_09 = pd.read_json(query_09)
query_08 = "https://data.medicaid.gov/resource/bzcm-rrj6.json?$select=period_covered,sum(medicaid_amount_reimbursed)&$group=state_code,period_covered"
drug_spending_08 = pd.read_json(query_08)

Concatenate the dataframes into a single dataframe of spending in all years.

In [11]:
drug_spending = pd.concat([drug_spending_16, drug_spending_15, drug_spending_14, drug_spending_13, drug_spending_12, drug_spending_11, drug_spending_10, drug_spending_09, drug_spending_08]).reset_index(drop=True)
drug_spending.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 2 columns):
period_covered                    9 non-null int64
sum_medicaid_amount_reimbursed    9 non-null float64
dtypes: float64(1), int64(1)
memory usage: 224.0 bytes


In [12]:
drug_spending.rename(columns={"period_covered": "year", "sum_medicaid_amount_reimbursed": "drug_spending"}, inplace=True)
drug_spending

Unnamed: 0,year,drug_spending
0,2016,61953536340.72
1,2015,55546271833.27
2,2014,46991952750.34
3,2013,37940450109.63
4,2012,37976860399.61
5,2011,37783429301.78
6,2010,32989906941.56
7,2009,26014836021.22
8,2008,24642404374.76


## Import the [Medicaid drug rebates data](https://www.medicaid.gov/medicaid/finance/state-expenditure-reporting/expenditure-reports/index.html) from the CMS MBES/CBES reports

In [13]:
drug_rebates_16 = pd.read_csv("data/medicaid_spending/FY_2016_Financial_Management_Data_-_National_Totals.csv", header=0, usecols=[0, 3, 4], names=["year", "service_category", "drug_rebates"])
drug_rebates_15 = pd.read_excel("data/medicaid_spending/FY 2015 NET EXPENDITURES.xlsx", sheet_name="MAP - National Totals", header=6, usecols=[0, 1], names=["service_category", "drug_rebates"])
drug_rebates_14 = pd.read_excel("data/medicaid_spending/FMR Net Expenditures FY14.xlsx", sheet_name="MAP - National Totals", header=6, usecols=[0, 1], names=["service_category", "drug_rebates"])
drug_rebates_13 = pd.read_excel("data/medicaid_spending/FMR Net Expenditures FY13.xlsx", sheet_name="MAP - National Totals", header=6, usecols=[0, 1], names=["service_category", "drug_rebates"])
drug_rebates_12 = pd.read_excel("data/medicaid_spending/FMR Net Expenditures FY12.xlsx", sheet_name="National Totals", header=6, usecols=[0, 1], names=["service_category", "drug_rebates"])
# For the following imports, we will skip all the rows until the "National Totals" data begins
drug_rebates_11 = pd.read_excel("data/medicaid_spending/NetExpenditure02through11.xlsx", sheet_name="2011", skiprows=10605, header=7, usecols=[0, 1], names=["service_category", "drug_rebates"]) 
drug_rebates_10 = pd.read_excel("data/medicaid_spending/NetExpenditure02through11.xlsx", sheet_name="2010", skiprows=10031, header=7, usecols=[0, 1], names=["service_category", "drug_rebates"]) 
drug_rebates_09 = pd.read_excel("data/medicaid_spending/NetExpenditure02through11.xlsx", sheet_name="2009", skiprows=7303, header=7, usecols=[0, 1], names=["service_category", "drug_rebates"]) 
drug_rebates_08 = pd.read_excel("data/medicaid_spending/NetExpenditure02through11.xlsx", sheet_name="2008", skiprows=7175, header=7, usecols=[0, 1], names=["service_category", "drug_rebates"]) 

Add a year column to the 2008 through 2015 dataframes.

In [14]:
drug_rebates_15["year"] = 2015
drug_rebates_14["year"] = 2014
drug_rebates_13["year"] = 2013
drug_rebates_12["year"] = 2012
drug_rebates_11["year"] = 2011
drug_rebates_10["year"] = 2010
drug_rebates_09["year"] = 2009
drug_rebates_08["year"] = 2008

Concatenate the dataframes into a single dataframe of rebates in all years.

In [15]:
drug_rebates = pd.concat([drug_rebates_16, drug_rebates_15, drug_rebates_14, drug_rebates_13, drug_rebates_12, drug_rebates_11, drug_rebates_10, drug_rebates_09, drug_rebates_08]).reset_index(drop=True)
drug_rebates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1739 entries, 0 to 1738
Data columns (total 3 columns):
drug_rebates        1710 non-null float64
service_category    1724 non-null object
year                1739 non-null int64
dtypes: float64(1), int64(1), object(1)
memory usage: 40.8+ KB


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Filter the data to just the six drug rebates.

In [16]:
drug_rebates = drug_rebates[(drug_rebates["service_category"].str.startswith("Drug Rebate Offset - ")) | (drug_rebates["service_category"].str.startswith("MCO - ")) | (drug_rebates["service_category"].str.startswith("Increased ACA OFFSET - "))]
drug_rebates

Unnamed: 0,drug_rebates,service_category,year
75,-12885689016.0,Drug Rebate Offset - National,2016
76,-864806347.0,Drug Rebate Offset - State Sidebar Agreement,2016
77,-15859240388.0,MCO - National Agreement,2016
78,-230435858.0,MCO - State Sidebar Agreement,2016
79,-576527028.0,Increased ACA OFFSET - Fee for Service,2016
80,-776169673.0,Increased ACA OFFSET - MCO,2016
295,-10547657885.0,Drug Rebate Offset - National,2015
296,-842686786.0,Drug Rebate Offset - State Sidebar Agreement,2015
297,-10990020714.0,MCO - National Agreement,2015
298,-207156105.0,MCO - State Sidebar Agreement,2015


Calculate the total drug rebates by year.

In [17]:
drug_rebates = drug_rebates.groupby("year")["drug_rebates"].sum().to_frame().reset_index()
drug_rebates

Unnamed: 0,year,drug_rebates
0,2008,-8393320183.0
1,2009,-9721492081.0
2,2010,-11505122505.0
3,2011,-16117488352.0
4,2012,-17549478009.0
5,2013,-18274357490.0
6,2014,-19878420141.0
7,2015,-24030295066.0
8,2016,-31192868310.0


## Calculate drug spending less rebates

Join the drug spending and drug rebates dataframes.

In [18]:
drugs = drug_spending.merge(drug_rebates, on="year")
# Add the columns because the rebates are expressed as negatives
drugs["spending_less_rebates"] = drugs["drug_spending"] + drugs["drug_rebates"]
drugs

Unnamed: 0,year,drug_spending,drug_rebates,spending_less_rebates
0,2016,61953536340.72,-31192868310.0,30760668030.72
1,2015,55546271833.27,-24030295066.0,31515976767.27
2,2014,46991952750.34,-19878420141.0,27113532609.34
3,2013,37940450109.63,-18274357490.0,19666092619.63
4,2012,37976860399.61,-17549478009.0,20427382390.61
5,2011,37783429301.78,-16117488352.0,21665940949.78
6,2010,32989906941.56,-11505122505.0,21484784436.56
7,2009,26014836021.22,-9721492081.0,16293343940.22
8,2008,24642404374.76,-8393320183.0,16249084191.76


## Import data on Medicaid enrollment from [KFF](https://www.kff.org/)

In [19]:
enrollment = pd.read_excel("data/medicaid_spending/enrollment.xlsx", names=["year", "enrollment"])
enrollment

Unnamed: 0,year,enrollment
0,2008,59523604
1,2009,62692693
2,2010,66586779
3,2011,68039582
4,2012,71410849
5,2013,72332368
6,2014,80694100
7,2015,76041000
8,2016,76131500


Join the drugs data to the enrollment data.

In [20]:
spending_per_enrollee = enrollment.merge(drugs, on="year")
spending_per_enrollee.drop(["drug_spending", "drug_rebates"], axis=1, inplace=True)
spending_per_enrollee

Unnamed: 0,year,enrollment,spending_less_rebates
0,2008,59523604,16249084191.76
1,2009,62692693,16293343940.22
2,2010,66586779,21484784436.56
3,2011,68039582,21665940949.78
4,2012,71410849,20427382390.61
5,2013,72332368,19666092619.63
6,2014,80694100,27113532609.34
7,2015,76041000,31515976767.27
8,2016,76131500,30760668030.72


## Calculate drug spending overall and per enrollee

In [21]:
spending_per_enrollee["spending_per_enrollee"] = spending_per_enrollee["spending_less_rebates"] / spending_per_enrollee["enrollment"]
spending_per_enrollee

Unnamed: 0,year,enrollment,spending_less_rebates,spending_per_enrollee
0,2008,59523604,16249084191.76,272.99
1,2009,62692693,16293343940.22,259.89
2,2010,66586779,21484784436.56,322.66
3,2011,68039582,21665940949.78,318.43
4,2012,71410849,20427382390.61,286.05
5,2013,72332368,19666092619.63,271.89
6,2014,80694100,27113532609.34,336.0
7,2015,76041000,31515976767.27,414.46
8,2016,76131500,30760668030.72,404.05


Overall drug spending increased nearly 90 percent from 2008 through 2016 while drug spending per enrollee increased nearly 50 percent over that time.

## Export the data for visualization and further analysis

In [22]:
writer = pd.ExcelWriter("data/medicaid_spending.xlsx")
budgets.to_excel(writer, "Budgets", startcol=0, index=False)
drugs.to_excel(writer, "Drugs", startcol=0, index=False)
spending_per_enrollee.to_excel(writer, "Drug Spending Per Enrollee", startcol=0, index=False)
writer.save()