In [268]:
import altair as alt
import numpy as np
import pandas as pd

In [269]:
fl = pd.read_csv("/Users/emma/Downloads/Finality Subsets/FL subsetfinalized.csv")
tx = pd.read_csv("/Users/emma/Downloads/Finality Subsets/TX subsetfinalized.csv")
wa = pd.read_csv("/Users/emma/Downloads/Finality Subsets/WA subsetfinalized.csv")

In [291]:
def dataset(df, year, state):
    """make dataset ready to run diff-diff and pre-post for opioids"""
    df["TRANSACTION_YEAR"] = df["TRANSACTION_YEAR"].astype("int")
    opioids_data = (df.groupby(["BUYER_STATE_x", "TRANSACTION_YEAR"], as_index=False).sum())
    opioids_data["policy"] = 0
    opioids_data.loc[opioids_data["TRANSACTION_YEAR"] > year, "policy"] = 1
    opioids_data["state"] = 0
    opioids_data.loc[opioids_data["BUYER_STATE_x"] == state, "state"] = 1
    df["death_rate"]= df["Deaths"] / df["population"]
    return opioids_data


In [293]:
FL = dataset(fl, 2010, "FL")

In [294]:
TX = dataset(tx, 2007, "TX")

In [295]:
WA = dataset(wa, 2012, "WA")

In [298]:
def pre_post(data, yvar, xvar, year,  alpha=0.05):
    import statsmodels.formula.api as smf

    # Grid for predicted values
    data1 = data.loc[(data["policy"]==0) & (data["state"] ==1), :]
    x = data1.loc[pd.notnull(data1[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions1 = pd.DataFrame({xvar: grid})

    # Fit model_before, get predictions
    model_before = smf.ols(f"{yvar} ~ {xvar}", data=data1).fit()
    model_predict = model_before.get_prediction(predictions1[xvar])
    predictions1[yvar] = model_predict.summary_frame()["mean"]
    predictions1[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)

    # Build chart
    reg1 = alt.Chart(predictions1).mark_line().encode(x=xvar, y=yvar)
    ci1 = (
        alt.Chart(predictions1)
        .mark_errorband()
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=yvar),
            y2="ci_high",
        )
    )
    
    data2 = data.loc[(data["policy"]==1) & (data["state"]==1),:]
    x = data2.loc[pd.notnull(data2[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions2 = pd.DataFrame({xvar: grid})

     # Fit model_before, get predictions
    model_after = smf.ols(f"{yvar} ~ {xvar}", data=data2).fit()
    model_predict_after = model_after.get_prediction(predictions2[xvar])
    predictions2[yvar] = model_predict_after.summary_frame()["mean"]
    predictions2[["ci_low", "ci_high"]] = model_predict_after.conf_int(alpha=alpha)

    # Build chart
    reg2 = alt.Chart(predictions2).mark_line().encode(x=xvar, y=yvar)
    ci2 = (
        alt.Chart(predictions2)
        .mark_errorband()
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=yvar),
            y2="ci_high",
        )
    )

    overlay = pd.DataFrame({'x': [year]})
    vline = alt.Chart(overlay).mark_rule(color='red', strokeWidth=3).encode(x='x:Q')
    
    chart = ci1 + ci2+ reg1 +reg2+ vline
    return predictions1, chart


In [299]:
def diff_diff(data, yvar, xvar, year, alpha=0.05):
    import statsmodels.formula.api as smf

    # Grid for predicted values
    data1 = data.loc[(data["policy"]==0) & (data["state"] == 0),:]
    x = data1.loc[pd.notnull(data1[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions1 = pd.DataFrame({xvar: grid})

    # Fit model_before, get predictions
    model_before = smf.ols(f"{yvar} ~ {xvar}", data=data1).fit()
    model_predict = model_before.get_prediction(predictions1[xvar])
    predictions1[yvar] = model_predict.summary_frame()["mean"]
    predictions1[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)

    # Build chart
    reg1 = alt.Chart(predictions1).mark_line().encode(x=xvar, y=yvar)
    ci1 = (
        alt.Chart(predictions1)
        .mark_errorband()
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=yvar),
            y2="ci_high",
        )
    )

    data11 = data.loc[(data["policy"]==0) & (data["state"] == 1),:]
    x = data11.loc[pd.notnull(data11[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions11 = pd.DataFrame({xvar: grid})

    # Fit model_before, get predictions
    model_before1 = smf.ols(f"{yvar} ~ {xvar}", data=data11).fit()
    model_predict1 = model_before1.get_prediction(predictions11[xvar])
    predictions11[yvar] = model_predict1.summary_frame()["mean"]
    predictions11[["ci_low", "ci_high"]] = model_predict1.conf_int(alpha=alpha)

    # Build chart
    reg11 = alt.Chart(predictions11).mark_line().encode(x=xvar, y=yvar)
    ci11 = (
        alt.Chart(predictions11)
        .mark_errorband()
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=yvar),
            y2="ci_high",
        )
    )
    
    data2 = data.loc[(data["policy"]==1) & (data["state"] == 0),:]
    x = data2.loc[pd.notnull(data2[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions2 = pd.DataFrame({xvar: grid})

     # Fit model_before, get predictions
    model_after = smf.ols(f"{yvar} ~ {xvar}", data=data2).fit()
    model_predict_after = model_after.get_prediction(predictions2[xvar])
    predictions2[yvar] = model_predict_after.summary_frame()["mean"]
    predictions2[["ci_low", "ci_high"]] = model_predict_after.conf_int(alpha=alpha)

    # Build chart
    reg2 = alt.Chart(predictions2).mark_line().encode(x=xvar, y=yvar)
    ci2 = (
        alt.Chart(predictions2)
        .mark_errorband()
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=yvar),
            y2="ci_high",
        )
    )

    data21 = data.loc[(data["policy"]==1) & (data["state"] == 1),:]
    x = data21.loc[pd.notnull(data21[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions21 = pd.DataFrame({xvar: grid})

     # Fit model_before, get predictions
    model_after1 = smf.ols(f"{yvar} ~ {xvar}", data=data21).fit()
    model_predict_after1 = model_after1.get_prediction(predictions21[xvar])
    predictions21[yvar] = model_predict_after1.summary_frame()["mean"]
    predictions21[["ci_low", "ci_high"]] = model_predict_after1.conf_int(alpha=alpha)

    # Build chart
    reg21 = alt.Chart(predictions21).mark_line().encode(x=xvar, y=yvar)
    ci21 = (
        alt.Chart(predictions21)
        .mark_errorband()
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=yvar),
            y2="ci_high",
        )
    )

    overlay = pd.DataFrame({'x': [year]})
    vline = alt.Chart(overlay).mark_rule(color='red', strokeWidth=3).encode(x='x:Q')
   
    chart = ci1 + ci2+ ci11+ ci21+reg1 +reg11 + reg2+ reg21+vline
    return chart


## Florida

> pre-post opioids

In [301]:
xvar= "TRANSACTION_YEAR"
yvar="opioid_shipment_population_ratio"
fit, chart = pre_post(FL, yvar, xvar, 2010, alpha=0.05)
chart

> diff-diff opioids

In [302]:
xvar= "TRANSACTION_YEAR"
yvar="opioid_shipment_population_ratio"
chart = diff_diff(FL, yvar, xvar, 2010)
chart

> pre-post death rate

In [313]:
xvar= "TRANSACTION_YEAR"
yvar="death_rate"
fit, chart = pre_post(FL, yvar, xvar, 2010, alpha=0.05)
chart

> diff-diff for death rate

In [315]:
xvar= "TRANSACTION_YEAR"
yvar="death_rate"
chart = diff_diff(FL, yvar, xvar, 2010, alpha=0.05)
chart

## Washington

> pre-post opioids

In [303]:
xvar= "TRANSACTION_YEAR"
yvar="opioid_shipment_population_ratio"
fit, chart = pre_post(WA, yvar, xvar, 2012, alpha=0.05)
chart

  return np.dot(wresid, wresid) / self.df_resid


> diff-diff opioids

In [304]:
xvar= "TRANSACTION_YEAR"
yvar="opioid_shipment_population_ratio"
chart = diff_diff(WA, yvar, xvar, 2012)
chart

  return np.dot(wresid, wresid) / self.df_resid


> pre-post for death rate

In [316]:
xvar= "TRANSACTION_YEAR"
yvar="death_rate"
fit, chart = pre_post(WA, yvar, xvar, 2012, alpha=0.05)
chart

  return np.dot(wresid, wresid) / self.df_resid


> diff-diff for death rate

In [317]:
xvar= "TRANSACTION_YEAR"
yvar="death_rate"
chart = diff_diff(WA, yvar, xvar, 2012)
chart

  return np.dot(wresid, wresid) / self.df_resid


In [321]:
TX.sample(10)

Unnamed: 0,BUYER_STATE_x,TRANSACTION_YEAR,Opioids_Shipment_IN_GM,fips,population,Deaths,opioid_shipment_population_ratio,policy,state,death_rate
2,CA,2008,9178293.0,11451000000.0,5750294000000.0,491087889.0,22.478334,1,0,8.5e-05
63,TX,2006,3995719.0,67638740000.0,1650067000000.0,154715467.0,34.782531,0,1,9.4e-05
46,ND,2007,97772.42,1414860000.0,2153756000.0,0.0,5.842829,0,0,0.0
40,MD,2010,2647307.0,11272270000.0,227256000000.0,23478221.0,13.200959,1,0,0.000103
74,UT,2008,1082797.0,13166110000.0,143826600000.0,26804902.0,11.128598,1,0,0.000186
66,TX,2009,5123285.0,84870880000.0,2125745000000.0,209301964.0,44.913991,1,1,9.8e-05
12,CO,2009,1454814.0,3372206000.0,145719600000.0,20486536.0,15.105026,1,0,0.000141
20,GA,2008,2587313.0,11281020000.0,220024100000.0,17652269.0,44.744939,1,0,8e-05
17,CO,2014,1709159.0,3500307000.0,168268200000.0,28281644.0,18.020627,1,0,0.000168
76,UT,2010,1203765.0,14843820000.0,160550000000.0,25255225.0,12.941672,1,0,0.000157


## Texas

In [322]:
xvar= "TRANSACTION_YEAR"
yvar="death_rate"
fit, chart = pre_post(TX, yvar, xvar, 2007, alpha=0.05)
chart

  return np.dot(wresid, wresid) / self.df_resid


In [323]:
xvar= "TRANSACTION_YEAR"
yvar="death_rate"
chart = diff_diff(TX, yvar, xvar, 2007, alpha=0.05)
chart

  return np.dot(wresid, wresid) / self.df_resid
