In [211]:
import statsmodels.formula.api as smf
import pandas as pd
import altair as alt
import numpy as np

In [212]:
df = pd.read_csv('https://raw.githubusercontent.com/MIDS-at-Duke/pds-2022-grey-team/main/00_Source/merged_pop_drug_death.csv')

In [213]:
df

Unnamed: 0,County,Year,State,Deaths,Population,death_rate
0,Adams County,2009,PA,12.0,101252.0,0.000119
1,Adams County,2010,PA,10.0,101407.0,0.000099
2,Adams County,2012,OH,10.0,28288.0,0.000354
3,Adams County,2014,OH,10.0,28060.0,0.000356
4,Adams County,2015,IL,14.0,66850.0,0.000209
...,...,...,...,...,...,...
3315,Yuma County,2011,AZ,20.0,202881.0,0.000099
3316,Yuma County,2012,AZ,32.0,202520.0,0.000158
3317,Yuma County,2013,AZ,22.0,202447.0,0.000109
3318,Yuma County,2014,AZ,35.0,204064.0,0.000172


In [214]:
print(df["death_rate"].isna().any())

False


In [215]:
def reg_fit(data, color, yvar, xvar, legend, alpha=0.05):
    colour = color
    years = list(np.arange(2003, 2016, 1))
    x = data.loc[pd.notnull(data[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions = pd.DataFrame({xvar: grid})

    # Fit model
    model = smf.ols(f"{yvar} ~ {xvar}", data=data).fit()
    model_predict = model.get_prediction(predictions[xvar])
    predictions[yvar] = model_predict.summary_frame()["mean"]
    predictions[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)
    
    # Build chart
    predictions["Treat"] = f"{legend}"
    reg = (
        alt.Chart(predictions)
        .mark_line()
        .encode(
            x=xvar,
            y=alt.Y(yvar, axis=alt.Axis(format="%")),
            color=alt.value(f"{colour}"),
            opacity=alt.Opacity("Treat", legend=alt.Legend(title="Legend")),
        )
    )
    ci = (
        alt.Chart(predictions)
        .mark_errorband()
        .encode(
            alt.X(f"{xvar}:Q", axis=alt.Axis(format=".0f", values=years)),
            y=alt.Y(
                "ci_low",
                title="Mortality Rate due to Opioid Overdose (by County)",
                scale=alt.Scale(zero=False),
            ),
            y2="ci_high",
            color=alt.value(f"{color}"),
        )
    )
    chart = ci + reg
    return predictions, chart


In [216]:
def plotting_chart(policy_year, color, data, yvar, xvar, legend, alpha=0.05):
    pl_year = policy_year
    pol_year = []
    pol_year.append(int(pl_year))
    years = list(np.arange(2003, 2016, 1))

    # Plot chart
    fit, reg_chart = reg_fit(
        color=color, data=data, yvar=yvar, xvar=xvar, legend=legend, alpha=alpha
    )
    policy = pd.DataFrame({"Year": pol_year})

    rule = (
        alt.Chart(policy)
        .mark_rule(color="black")
        .encode(alt.X("Year:Q", title="Year", axis=alt.Axis(values=years)))
    )
    return (reg_chart + rule).properties(width=500, height=500)

In [217]:
# split FL data via year pre-policy and post-policy
data_FL = df.loc[df["State"] == "FL", ["Year", "County", "death_rate"]]
FL_pre = data_FL[data_FL["Year"] < 2010]
FL_post = data_FL[data_FL["Year"] >= 2010]

In [218]:
# FL plot
pre_fl_fit = plotting_chart(
    2010, "blue", FL_pre, "death_rate", "Year", legend="Florida", alpha=0.05
)
post_fl_fit = plotting_chart(
    2010, "blue", FL_post, "death_rate", "Year", legend="Florida", alpha=0.05
)
final = pre_fl_fit + post_fl_fit

final.properties(
    title="Pre-Policy VS. Post-Policy of Opioid Regulations on Mortality Rate for Florida"
)

In [219]:
diff_FL = df.loc[df["State"].isin(["FL", "PA", "OH", "GA"])].copy()
diff_FL["Treat"] = 1
diff_FL.loc[diff_FL["State"].isin(["PA", "OH", "GA"]), "Treat"] = 0
diff_FL_treat = diff_FL.loc[diff_FL["Treat"] == 1]
diff_FL_control = diff_FL.loc[diff_FL["Treat"] == 0]
diff_FL_treat_pre = diff_FL_treat.loc[diff_FL_treat["Year"] < 2010]
diff_FL_treat_post = diff_FL_treat.loc[diff_FL_treat["Year"] >= 2010]
diff_FL_control_pre = diff_FL_control.loc[diff_FL_control["Year"] < 2010]
diff_FL_control_post = diff_FL_control.loc[diff_FL_control["Year"] >= 2010]

In [220]:
pre_FL = plotting_chart(
    2010, "blue", diff_FL_treat_pre, "death_rate", "Year", "Florida", alpha=0.05
)
post_FL = plotting_chart(
    2010, "blue", diff_FL_treat_post, "death_rate", "Year", "Florida", alpha=0.05
)
pre_control = plotting_chart(
    2010,
    "#456bd6",
    diff_FL_control_pre,
    "death_rate",
    "Year",
    "Comparison States - PA, OH, GA",
    alpha=0.05,
)
post_control = plotting_chart(
    2010,
    "#456bd6",
    diff_FL_control_post,
    "death_rate",
    "Year",
    "Comparison States - PA, OH, GA",
    alpha=0.05,
)

# FL vs PA, OH, GA
final = pre_FL + post_FL + pre_control + post_control
final.properties(
    title="Difference in Difference Analysis of Opioid Regulations on Mortality Rate for Florida vs Comparison States"
)

In [221]:
# split TX data via year pre-policy and post-policy
data_TX = df.loc[df["State"] == "TX", ["Year", "County", "death_rate"]]
TX_pre = data_TX[data_TX["Year"] < 2007]
TX_post = data_TX[data_TX["Year"] >= 2007]

In [222]:
# TX plot
pre_tx_fit = plotting_chart(
    2007, "green", TX_pre, "death_rate", "Year", legend="Texas", alpha=0.05
)
post_tx_fit = plotting_chart(
    2007, "green", TX_post, "death_rate", "Year", legend="Texas", alpha=0.05
)
final = pre_tx_fit + post_tx_fit

final.properties(
    title="Pre-Policy VS. Post-Policy of Opioid Regulations on Mortality Rate for Texas"
)

In [223]:
diff_TX = df.loc[df["State"].isin(["TX", "IL", "NY", "VA"])].copy()
diff_TX["Treat"] = 1
diff_TX.loc[diff_TX["State"].isin(["IL", "NY", "VA"]), "Treat"] = 0
diff_TX_treat = diff_TX.loc[diff_TX["Treat"] == 1]
diff_TX_control = diff_TX.loc[diff_TX["Treat"] == 0]
diff_TX_treat_pre = diff_TX_treat.loc[diff_TX_treat["Year"] < 2007]
diff_TX_treat_post = diff_TX_treat.loc[diff_TX_treat["Year"] >= 2007]
diff_TX_control_pre = diff_TX_control.loc[diff_TX_control["Year"] < 2007]
diff_TX_control_post = diff_TX_control.loc[diff_TX_control["Year"] >= 2007]

In [224]:
pre_TX = plotting_chart(
    2007, "green", diff_TX_treat_pre, "death_rate", "Year", "Texas", alpha=0.05
)
post_TX = plotting_chart(
    2007, "green", diff_TX_treat_post, "death_rate", "Year", "Texas", alpha=0.05
)
pre_control = plotting_chart(
    2007,
    "#6FCC49",
    diff_TX_control_pre,
    "death_rate",
    "Year",
    "Comparison States - IL, NY, VA",
    alpha=0.05,
)
post_control = plotting_chart(
    2007,
    "#6FCC49",
    diff_TX_control_post,
    "death_rate",
    "Year",
    "Comparison States - IL, NY, VA",
    alpha=0.05,
)

# TX vs IL, NY, OR
final = pre_TX + post_TX + pre_control + post_control
final.properties(
    title="Difference in Difference Analysis of Opioid Regulations on Mortality Rate for Texas vs Comparison States"
)

In [225]:
# split WA data via year pre-policy and post-policy
data_WA = df.loc[df["State"] == "WA", ["Year", "County", "death_rate"]]
WA_pre = data_WA[data_WA["Year"] < 2012]
WA_post = data_WA[data_WA["Year"] >= 2012]

In [226]:
# WA plot
pre_wa_fit = plotting_chart(
    2012, "brown", WA_pre, "death_rate", "Year", legend="Washington", alpha=0.05
)
post_wa_fit = plotting_chart(
    2012, "brown", WA_post, "death_rate", "Year", legend="Washington", alpha=0.05
)
final = pre_wa_fit + post_wa_fit

final.properties(
    title="Pre-Policy VS. Post-Policy of Opioid Regulations on Mortality Rate for Washington"
)

In [227]:
diff_WA = df.loc[df["State"].isin(["WA", "AZ", "MD", "WI"])].copy()
diff_WA["Treat"] = 1
diff_WA.loc[diff_WA["State"].isin(["AZ", "MD", "WI"]), "Treat"] = 0
diff_WA_treat = diff_WA.loc[diff_WA["Treat"] == 1]
diff_WA_control = diff_WA.loc[diff_WA["Treat"] == 0]
diff_WA_treat_pre = diff_WA_treat.loc[diff_WA_treat["Year"] < 2012]
diff_WA_treat_post = diff_WA_treat.loc[diff_WA_treat["Year"] >= 2012]
diff_WA_control_pre = diff_WA_control.loc[diff_WA_control["Year"] < 2012]
diff_WA_control_post = diff_WA_control.loc[diff_WA_control["Year"] >= 2012]

In [228]:
pre_WA = plotting_chart(
    2012, "brown", diff_WA_treat_pre, "death_rate", "Year", "Washington", alpha=0.05
)
post_WA = plotting_chart(
    2012, "brown", diff_WA_treat_post, "death_rate", "Year", "Washington", alpha=0.05
)
pre_control = plotting_chart(
    2012,
    "#D4879E",
    diff_WA_control_pre,
    "death_rate",
    "Year",
    "Comparison States - AZ, MD, WI",
    alpha=0.05,
)
post_control = plotting_chart(
    2012,
    "#D4879E",
    diff_WA_control_post,
    "death_rate",
    "Year",
    "Comparison States - AZ, MD, WI",
    alpha=0.05,
)

# WA vs AZ, MD, WI
final = pre_WA + post_WA + pre_control + post_control
final.properties(
    title="Difference in Difference Analysis of Opioid Regulations on Mortality Rate for Washington vs Comparison States"
)