In [15]:
import numpy as np
import pandas as pd
import altair as alt

In [16]:
# load in data
opioid_deaths = pd.read_parquet("C:\MIDS\IDS720\opioid\\vital_pop.parquet", engine='fastparquet')
opioid_buyers = pd.read_parquet("C:\MIDS\IDS720\opioid\opi_pop.parquet", engine='fastparquet')

In [17]:
opioid_buyers=opioid_buyers.rename(columns={"MME per cap":"MME_per_cap"})
opioid_deaths=opioid_deaths.rename(columns={"vital deaths per cap":"deaths_per_cap"})
opioid_buyers.sample(10)

Unnamed: 0_level_0,BUYER_STATE,BUYER_COUNTY,fips,year,MME,Population,MME_per_cap,treatment
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3783,MS,JEFFERSON DAVIS,28065,2006,2735519.0,12928.0,211.596426,0
6183,SC,KERSHAW,45055,2012,19459500.0,62293.0,312.386558,0
4016,MS,PONTOTOC,28115,2014,8006018.0,30783.0,260.079215,0
7612,TX,MAVERICK,48323,2011,2927411.0,55260.0,52.975232,0
8651,WA,SKAMANIA,53059,2011,2855337.0,11154.0,255.9922,0
5280,OK,LE FLORE,40079,2006,16837130.0,49209.0,342.155429,0
4225,MS,YAZOO,28163,2007,3313749.0,28641.0,115.699499,0
7591,TX,MASON,48319,2008,396212.9,3876.0,102.222109,0
5604,OK,WOODS,40151,2006,2401776.0,8550.0,280.909461,0
3098,LA,EAST FELICIANA,22037,2014,3241498.0,19677.0,164.735358,0


In [18]:
opioid_buyers['year'].unique()

array([2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014], dtype=int64)

In [19]:
opioid_deaths['Year'].unique()

array([2003, 2005, 2006, 2007, 2009, 2010, 2011, 2004, 2008, 2012, 2013,
       2014, 2015], dtype=int64)

In [20]:
## Nick's code for confidence bands 
def get_reg_fit(data, yvar, xvar, alpha=0.05,col="blue"):
    import statsmodels.formula.api as smf

    # Grid for predicted values
    x = data.loc[pd.notnull(data[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions = pd.DataFrame({xvar: grid})

    # Fit model, get predictions
    model = smf.ols(f"{yvar} ~ {xvar}", data=data).fit()
    model_predict = model.get_prediction(predictions[xvar])
    predictions[yvar] = model_predict.summary_frame()["mean"]
    predictions[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)

    # Build chart
    reg = alt.Chart(predictions).mark_line(color=col).encode(
        x=alt.X(xvar, axis=alt.Axis(title='Years from Policy Change')),
        y=alt.X(yvar, axis=alt.Axis(title='')))
    ci = (
        alt.Chart(predictions)
        .mark_errorband(color=col)
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=""),
            y2="ci_high",
        )
    )
    chart = ci + reg
    return predictions, chart

# Florida plots

In [21]:
# Creating florida buyers df
states = ["SC","AL","GA","MS", "FL"]
opioid_buyers_fl=opioid_buyers[opioid_buyers["BUYER_STATE"].isin(states)]

#create column, to indicate whether control or not (1 for control, 0 for florida)
opioid_buyers_fl["is_control"]=np.where(opioid_buyers_fl["BUYER_STATE"]== 'FL', 0, 1)

#create policy years 
opioid_buyers_fl["policy_years"]= opioid_buyers_fl["year"]-2010
fl_opi_before=opioid_buyers_fl[opioid_buyers_fl["policy_years"]<0]
fl_opi_after=opioid_buyers_fl[opioid_buyers_fl["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_buyers_fl["is_control"]=np.where(opioid_buyers_fl["BUYER_STATE"]== 'FL', 0, 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_buyers_fl["policy_years"]= opioid_buyers_fl["year"]-2010


In [22]:
#create vertical line for plots
data = pd.DataFrame({"a": [0]})
sep_line = (alt.Chart(data).mark_rule(color="black", strokeDash=[10, 10]).encode(x=alt.X("a:Q", title="")))

In [23]:
# florida buyers pre/post plot
# Stuffs for making legends
legend = alt.Chart(fl_opi_before).transform_calculate(fl= "'Florida'", control="'control'")
scale = alt.Scale(domain=["Florida", "Control"], range=['red', 'blue'])

before = alt.Chart(
    fl_opi_before, title="Pre-Post Policy: Florida, Opioids per cap"
).encode(x="policy_years", y=alt.Y("MME_per_cap", title="Opioids per Cap"), color=alt.Color('fl:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "MME_per_cap").mark_line()


after = alt.Chart(
    fl_opi_after, title="Pre-Post Policy: Florida, Opioids per cap"
).encode(x="policy_years", y="MME_per_cap", color=alt.Color('control:N', scale=scale, title=''))

base2 = after.transform_regression("policy_years", "MME_per_cap").mark_line()

fit, before_line = get_reg_fit(
    fl_opi_before[fl_opi_before["is_control"] == 1],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="blue",
)
fit, before_line2 = get_reg_fit(
    fl_opi_before[fl_opi_before["is_control"] == 0],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="red"
)
fit, after_line = get_reg_fit(
    fl_opi_after[fl_opi_after["is_control"] == 1],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="blue",
)
fit, after_line2 = get_reg_fit(
    fl_opi_after[fl_opi_after["is_control"] == 0],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="red"
)

fl_pre_post = base + before_line2 + after_line2 +sep_line
fl_pre_post

In [24]:
# diff-in-diff
before = alt.Chart(
    fl_opi_before, title="Difference in Difference: Florida vs. Control, Opioids per cap"
).encode(x="policy_years", y=alt.Y("MME_per_cap", title="Opioid per Cap"), color=alt.Color('fl:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "MME_per_cap").mark_line()

after = alt.Chart(
    fl_opi_after, title="Difference in Difference: Florida vs. Control, Opioids per cap"
).encode(x="policy_years", y="MME_per_cap", color=alt.Color('control:N', scale=scale, title=''))

fl_chart_opioid = (
    base + base2 + before_line + before_line2 + after_line + after_line2 + sep_line
)

fl_chart_opioid

In [25]:
# Creating florida deathd df
opioid_deaths_fl=opioid_deaths[opioid_deaths["STNAME"]=="Florida"]
opioid_deaths_fl["policy_years"]= opioid_deaths_fl["Year"]-2010

fl_death_before=opioid_deaths_fl[opioid_deaths_fl["policy_years"]<0]
fl_death_after=opioid_deaths_fl[opioid_deaths_fl["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_fl["policy_years"]= opioid_deaths_fl["Year"]-2010


In [26]:
# florida deaths pre/post plot
before = alt.Chart(
    fl_death_before, title="Pre-Post Policy: Florida, Overdose Deaths per cap"
).encode(x="policy_years", y=alt.Y("deaths_per_cap", title="Deaths per Cap"), color=alt.Color('fl:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "deaths_per_cap").mark_line()

fit, before_line = get_reg_fit(
    fl_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)
fit, after_line = get_reg_fit(
    fl_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)

fl_chart_death = base + before_line + after_line + sep_line
fl_chart_death

In [27]:
# Creating control dataframes
states = ["SC","AL","GA","MS"]
states2 = ["South Carolina","Alabama","Georgia","Mississippi"]

opioid_buyers_con=opioid_buyers[opioid_buyers["BUYER_STATE"].isin(states)]
opioid_buyers_con.sample(10)
opioid_buyers_con["policy_years"]= opioid_buyers_con["year"]-2010

con_opi_before=opioid_buyers_con[opioid_buyers_con["policy_years"]<0]
con_opi_after=opioid_buyers_con[opioid_buyers_con["policy_years"]>=0]


opioid_deaths_con=opioid_deaths[opioid_deaths["STNAME"].isin(states2)]
opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2010


con_death_before=opioid_deaths_con[opioid_deaths_con["policy_years"]<0]
con_death_after=opioid_deaths_con[opioid_deaths_con["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_buyers_con["policy_years"]= opioid_buyers_con["year"]-2010
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2010


In [28]:
#Difference in difference plot, deaths, FL vs control
fit, before_line = get_reg_fit(
    con_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="blue"
)

fit, after_line = get_reg_fit(
    con_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="blue"
)

before = alt.Chart(
    con_death_before,
    title="Difference in Difference: Florida vs. Control, Overdose Deaths per cap",
).encode(x="policy_years", y=alt.Y("deaths_per_cap", title="Deaths per Cap"), color=alt.Color('fl:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "deaths_per_cap").mark_line(
    color="#FFAA00"
)

con_chart_death = base + before_line + after_line + sep_line
diff_fl_death=con_chart_death+fl_chart_death+sep_line
diff_fl_death

# Texas Plots

In [29]:
# creating texas dataframes
opioid_deaths_tx=opioid_deaths[opioid_deaths["STNAME"]=="Texas"]
opioid_deaths_tx["policy_years"]= opioid_deaths_tx["Year"]-2007

tx_death_before=opioid_deaths_tx[opioid_deaths_tx["policy_years"]<0]
tx_death_after=opioid_deaths_tx[opioid_deaths_tx["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_tx["policy_years"]= opioid_deaths_tx["Year"]-2007


In [30]:
# Creating texas control dataframes
states = ["Oklahoma","Lousiana","New Mexico"]

opioid_deaths_con=opioid_deaths[opioid_deaths["STNAME"].isin(states)]
opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2007

con_death_before=opioid_deaths_con[opioid_deaths_con["policy_years"]<0]
con_death_after=opioid_deaths_con[opioid_deaths_con["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2007


In [31]:
# texas deaths pre/post plot
# Stuffs for making legend
legend = alt.Chart(fl_opi_before).transform_calculate(
    tx="'Texas'", control="'control'"
)

scale = alt.Scale(domain=["Texas", "Control"], range=["red", "blue"])
before = alt.Chart(
    tx_death_before, title="Pre-Post Policy: Texas, Overdose Deaths per cap"
).encode(x="policy_years", y=alt.Y("deaths_per_cap", title="Deaths per Cap"), color=alt.Color('tx:N', scale=scale, title=''))
base = before.transform_regression("policy_years", "deaths_per_cap", ).mark_line()

fit, before_line = get_reg_fit(
    tx_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)
fit, after_line = get_reg_fit(
    tx_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)

tx_chart_death = base + before_line + after_line + sep_line
tx_chart_death

In [32]:
# diff-in-diff
fit, before_line = get_reg_fit(
    con_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05
)
fit, after_line = get_reg_fit(
    con_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05
)

before = alt.Chart(
    con_death_before,
    title="Difference in Difference: Texas vs. Control, Overdose Deaths per cap",
).encode(x="policy_years", y=alt.Y("deaths_per_cap", title="Deaths per Cap"), color=alt.Color('tx:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "deaths_per_cap").mark_line(
    color="#FFAA00"
)
con_chart_death = base + before_line + after_line + sep_line
diff_tx_death=con_chart_death+tx_chart_death+sep_line
diff_tx_death

# Washington Plots

In [33]:
# Creating Washington buyers df
states = ["ID","OR","MT", "WA"]
opioid_buyers_wa=opioid_buyers[opioid_buyers["BUYER_STATE"].isin(states)]

#create column, to indicate whether control or not (1 for control, 0 for Washington)
opioid_buyers_wa["is_control"]=np.where(opioid_buyers_wa["BUYER_STATE"]== 'WA', 0, 1)

#create policy years 
opioid_buyers_wa["policy_years"]= opioid_buyers_wa["year"]-2012
wa_opi_before=opioid_buyers_wa[opioid_buyers_wa["policy_years"]<0]
wa_opi_after=opioid_buyers_wa[opioid_buyers_wa["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_buyers_wa["is_control"]=np.where(opioid_buyers_wa["BUYER_STATE"]== 'WA', 0, 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_buyers_wa["policy_years"]= opioid_buyers_wa["year"]-2012


In [34]:
# Washington buyers pre/post plot
# Stuffs for making legends
legend = alt.Chart(wa_opi_before).transform_calculate(fl= "'Washington'", control="'control'")
scale = alt.Scale(domain=["Washington", "Control"], range=['red', 'blue'])

before = alt.Chart(
    wa_opi_before, title="Pre-Post Policy: Washington, Opioids per cap"
).encode(x="policy_years", y=alt.Y("MME_per_cap", title="Opioids per Cap"), color=alt.Color('fl:N', scale=scale, title=''))
base = before.transform_regression("policy_years", "MME_per_cap").mark_line()
after = alt.Chart(
    wa_opi_after, title="Pre-Post Policy: Washington, Opioids per cap"
).encode(x="policy_years", y="MME_per_cap", color=alt.Color('control:N', scale=scale, title=''))
base2 = after.transform_regression("policy_years", "MME_per_cap").mark_line()

fit, before_line = get_reg_fit(
    wa_opi_before[wa_opi_before["is_control"] == 1],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="blue",
)
fit, before_line2 = get_reg_fit(
    wa_opi_before[wa_opi_before["is_control"] == 0],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="red"
)
fit, after_line = get_reg_fit(
    wa_opi_after[wa_opi_after["is_control"] == 1],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="blue",
)
fit, after_line2 = get_reg_fit(
    wa_opi_after[wa_opi_after["is_control"] == 0],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="red"
)

wa_pre_post = base + before_line2 + after_line2 +sep_line
wa_pre_post

In [35]:
# diff-in-diff
before = alt.Chart(
    wa_opi_before, title="Difference in Difference: Washington vs. Control, Opioids per cap"
).encode(x="policy_years", y=alt.Y("MME_per_cap", title="Opioid per Cap"), color=alt.Color('fl:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "MME_per_cap").mark_line()

after = alt.Chart(
    fl_opi_after, title="Difference in Difference: Washington vs. Control, Opioids per cap"
).encode(x="policy_years", y="MME_per_cap", color=alt.Color('control:N', scale=scale, title=''))

wa_chart_opioid = (
    base + base2 + before_line + before_line2 + after_line + after_line2 + sep_line
)

wa_chart_opioid

In [36]:
# creating Washington dataframes
opioid_deaths_wa=opioid_deaths[opioid_deaths["STNAME"]=="Washington"]
opioid_deaths_wa["policy_years"]= opioid_deaths_wa["Year"]-2012

wa_death_before=opioid_deaths_wa[opioid_deaths_wa["policy_years"]<0]
wa_death_after=opioid_deaths_wa[opioid_deaths_wa["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_wa["policy_years"]= opioid_deaths_wa["Year"]-2012


In [37]:
# Creating wa control dataframes
states = ["Idaho","Oregon","Montana"]

opioid_deaths_con=opioid_deaths[opioid_deaths["STNAME"].isin(states)]
opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2012

con_death_before=opioid_deaths_con[opioid_deaths_con["policy_years"]<0]
con_death_after=opioid_deaths_con[opioid_deaths_con["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2012


In [38]:
# washington deaths pre/post plot
# Stuffs for making legend
legend = alt.Chart(fl_opi_before).transform_calculate(
    wa="'Washington'", control="'control'"
)

scale = alt.Scale(domain=["Washington", "Control"], range=["red", "blue"])
before = alt.Chart(
    wa_death_before, title="Pre-Post Policy: Washington, Overdose Deaths per cap"
).encode(
    x="policy_years", y=alt.Y("deaths_per_cap", title="Deaths per Cap"), color=alt.Color("wa:N", scale=scale, title="")
)
base = before.transform_regression("policy_years", "deaths_per_cap").mark_line()

fit, before_line = get_reg_fit(
    wa_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)
fit, after_line = get_reg_fit(
    wa_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)

wa_chart_death = base + before_line + after_line + sep_line
wa_chart_death

In [39]:
# diff-in-diff
fit, before_line = get_reg_fit(
    con_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05
)
fit, after_line = get_reg_fit(
    con_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05
)

before = alt.Chart(
    con_death_before,
    title="Difference in Difference: Washington vs. Control, Overdose Deaths per cap",
).encode(x="policy_years", y=alt.Y("deaths_per_cap", title="Deaths per Cap"), color=alt.Color('wa:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "deaths_per_cap").mark_line(
    color="#FFAA00"
)

con_chart_death = base + before_line + after_line + sep_line
diff_wa_death = con_chart_death + wa_chart_death + sep_line
diff_wa_death