In [8]:
import pandas as pd
import numpy as np

In [9]:
# Read in the data
df = pd.read_csv('../00_Source/merged_pop_drug_death.csv')
df.head()

Unnamed: 0,County,Year,State,Deaths,Population,death_rate
0,Adams County,2009,PA,12.0,101252.0,0.000119
1,Adams County,2010,PA,10.0,101407.0,9.9e-05
2,Adams County,2012,OH,10.0,28288.0,0.000354
3,Adams County,2014,OH,10.0,28060.0,0.000356
4,Adams County,2015,IL,14.0,66850.0,0.000209


In [10]:
# subset for each state and their controls
flor_cont = df[df['State'].isin(['FL','PA','OH','GA'])].copy()
wash_cont = df[df['State'].isin(['WA','AZ','MD','WI'])].copy()
texas_cont = df[df['State'].isin(['TX','IL','NY','VA'])].copy()

# make sure the shapes from the 3 groups add up to the total in df
assert len(flor_cont) + len(wash_cont) + len(texas_cont) == len(df)


In [11]:
# indicator for each state and their controls, 1 is intervention and 0 are controls
flor_cont['case'] = np.where(flor_cont['State'] == 'FL', 'Policy', 'No policy')
wash_cont['case'] = np.where(wash_cont['State'] == 'WA', 'Policy', 'No policy')
texas_cont['case'] = np.where(texas_cont['State'] == 'TX', 'Policy', 'No policy')
flor_cont.head()

Unnamed: 0,County,Year,State,Deaths,Population,death_rate,case
0,Adams County,2009,PA,12.0,101252.0,0.000119,No policy
1,Adams County,2010,PA,10.0,101407.0,9.9e-05,No policy
2,Adams County,2012,OH,10.0,28288.0,0.000354,No policy
3,Adams County,2014,OH,10.0,28060.0,0.000356,No policy
5,Adams County,2015,OH,13.0,27919.0,0.000466,No policy


In [12]:

# another indicator for when policy took effect(pre/post) 1 is post policy and 0 is pre policy
flor_cont['policy'] = np.where(flor_cont['Year'] >= 2010, 'post', 'pre')
wash_cont['policy'] = np.where(wash_cont['Year'] >= 2011, 'post', 'pre')
texas_cont['policy'] = np.where(texas_cont['Year'] >= 2007, 'post', 'pre')

In [13]:
flor_cont.head()

Unnamed: 0,County,Year,State,Deaths,Population,death_rate,case,policy
0,Adams County,2009,PA,12.0,101252.0,0.000119,No policy,pre
1,Adams County,2010,PA,10.0,101407.0,9.9e-05,No policy,post
2,Adams County,2012,OH,10.0,28288.0,0.000354,No policy,post
3,Adams County,2014,OH,10.0,28060.0,0.000356,No policy,post
5,Adams County,2015,OH,13.0,27919.0,0.000466,No policy,post


In [17]:
# mean death per 100,000 interventions 
flor_cont.groupby(['case','policy'])['death_rate'].mean()

"""
numbers seem to have changed for florida, not the pattern that i was expecting, need to check the data"""



case       policy
No policy  post      0.000186
           pre       0.000123
Policy     post      0.000149
           pre       0.000143
Name: death_rate, dtype: float64

In [18]:
wash_cont.groupby(['case','policy'])['death_rate'].mean()
# seing the same trend for washington

case       policy
No policy  post      0.000157
           pre       0.000117
Policy     post      0.000139
           pre       0.000132
Name: death_rate, dtype: float64

In [19]:
texas_cont.groupby(['case','policy'])['death_rate'].mean()

case       policy
No policy  post      0.000131
           pre       0.000107
Policy     post      0.000103
           pre       0.000100
Name: death_rate, dtype: float64