In [1]:
import numpy as np
import pandas as pd
import altair as alt

In [2]:
# load in data
opioid_deaths = pd.read_parquet("/Users/yangshining/Desktop/pds2021-opioids-pds6/10_modified_data/vital_pop.parquet", engine='fastparquet')
opioid_buyers = pd.read_parquet("/Users/yangshining/Desktop/pds2021-opioids-pds6/10_modified_data/opi_pop.parquet", engine='fastparquet')

In [3]:

opioid_buyers=opioid_buyers.rename(columns={"MME per cap":"MME_per_cap"})
opioid_deaths=opioid_deaths.rename(columns={"vital deaths per cap":"deaths_per_cap"})
opioid_buyers["year"].value_counts()


2012    974
2006    973
2011    971
2013    970
2014    970
2007    969
2009    968
2010    967
2008    965
Name: year, dtype: int64

In [4]:
opioid_deaths["Year"].value_counts()

2015    794
2014    749
2013    704
2011    693
2012    680
2010    652
2009    596
2008    596
2007    571
2006    528
2005    480
2004    452
2003    408
Name: Year, dtype: int64

In [5]:
## Nick's code for confidence bands 

def get_reg_fit(data, yvar, xvar, alpha=0.05,col="blue"):
    import statsmodels.formula.api as smf

    # Grid for predicted values
    x = data.loc[pd.notnull(data[yvar]), xvar]
    xmin = x.min()
    xmax = x.max()
    step = (xmax - xmin) / 100
    grid = np.arange(xmin, xmax + step, step)
    predictions = pd.DataFrame({xvar: grid})

    # Fit model, get predictions
    model = smf.ols(f"{yvar} ~ {xvar}", data=data).fit()
    model_predict = model.get_prediction(predictions[xvar])
    predictions[yvar] = model_predict.summary_frame()["mean"]
    predictions[["ci_low", "ci_high"]] = model_predict.conf_int(alpha=alpha)

    # Build chart
    reg = alt.Chart(predictions).mark_line(color=col).encode(
        x=alt.X(xvar, axis=alt.Axis(title='Years from Policy Change')),
        y=alt.X(yvar, axis=alt.Axis(title='')))
    ci = (
        alt.Chart(predictions)
        .mark_errorband(color=col)
        .encode(
            x=xvar,
            y=alt.Y("ci_low", title=yvar),
            y2="ci_high",
        )
    )
    chart = ci + reg
    return predictions, chart

# Florida plots

In [6]:
# Creating florida buyers df


states = ["SC","AL","GA","MS", "FL"]

opioid_buyers_fl=opioid_buyers[opioid_buyers["BUYER_STATE"].isin(states)]

#create column, to indicate whether control or not (1 for control, 0 for florida)

opioid_buyers_fl["is_control"]=np.where(opioid_buyers_fl["BUYER_STATE"]== 'FL', 0, 1)

#create policy years 

opioid_buyers_fl["policy_years"]= opioid_buyers_fl["year"]-2010
fl_opi_before=opioid_buyers_fl[opioid_buyers_fl["policy_years"]<0]
fl_opi_after=opioid_buyers_fl[opioid_buyers_fl["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_buyers_fl["is_control"]=np.where(opioid_buyers_fl["BUYER_STATE"]== 'FL', 0, 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_buyers_fl["policy_years"]= opioid_buyers_fl["year"]-2010


In [7]:
#create vertical line for plots
data = pd.DataFrame({"a": [0]})
sep_line = (alt.Chart(data).mark_rule(color="black", strokeDash=[10, 10]).encode(x=alt.X("a:Q", title="")))

In [8]:
# florida buyers pre/post plot

# Stuffs for making legends

legend = alt.Chart(fl_opi_before).transform_calculate(fl= "'Florida'", control="'control'")

scale = alt.Scale(domain=["Florida", "Control"], range=['red', 'blue'])


before = alt.Chart(
    fl_opi_before, title="Pre-Post Policy: Florida, Opioids per cap"
).encode(x="policy_years", y="MME_per_cap", color=alt.Color('fl:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "MME_per_cap").mark_line()


after = alt.Chart(
    fl_opi_after, title="Pre-Post Policy: Florida, Opioids per cap"
).encode(x="policy_years", y="MME_per_cap", color=alt.Color('control:N', scale=scale, title=''))

base2 = after.transform_regression("policy_years", "MME_per_cap").mark_line()

fit, before_line = get_reg_fit(
    fl_opi_before[fl_opi_before["is_control"] == 1],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="blue",
)
fit, before_line2 = get_reg_fit(
    fl_opi_before[fl_opi_before["is_control"] == 0],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="red"
)
fit, after_line = get_reg_fit(
    fl_opi_after[fl_opi_after["is_control"] == 1],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="blue",
)
fit, after_line2 = get_reg_fit(
    fl_opi_after[fl_opi_after["is_control"] == 0],
    yvar="MME_per_cap",
    xvar="policy_years",
    alpha=0.05,
    col="red"
)

fl_pre_post = base + before_line2 + after_line2 +sep_line

fl_pre_post


In [9]:
# control buyers pre/post plot

control_pre_post = base + before_line + after_line +sep_line

control_pre_post 

In [10]:
# diff-in-diff

fl_chart_opioid = (
    base + base2 + before_line + before_line2 + after_line + after_line2 + sep_line
)

fl_chart_opioid


In [12]:
# Creating florida deathd df

opioid_deaths_fl=opioid_deaths[opioid_deaths["STNAME"]=="Florida"]

opioid_deaths_fl["policy_years"]= opioid_deaths_fl["Year"]-2010


fl_death_before=opioid_deaths_fl[opioid_deaths_fl["policy_years"]<0]
fl_death_after=opioid_deaths_fl[opioid_deaths_fl["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_fl["policy_years"]= opioid_deaths_fl["Year"]-2010


In [13]:
# florida deaths pre/post plot
before = alt.Chart(
    fl_death_before, title="Pre-Post Policy: Florida, Overdose Deaths per cap"
).encode(x="policy_years", y="deaths_per_cap", color=alt.Color('fl:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "deaths_per_cap").mark_line()

fit, before_line = get_reg_fit(
    fl_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)
fit, after_line = get_reg_fit(
    fl_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)

fl_chart_death = base + before_line + after_line + sep_line

fl_chart_death


In [14]:
# Creating control dataframes
states = ["SC","AL","GA","MS"]
states2 = ["South Carolina","Alabama","Georgia","Mississippi"]

opioid_buyers_con=opioid_buyers[opioid_buyers["BUYER_STATE"].isin(states)]
opioid_buyers_con.sample(10)
opioid_buyers_con["policy_years"]= opioid_buyers_con["year"]-2010

con_opi_before=opioid_buyers_con[opioid_buyers_con["policy_years"]<0]
con_opi_after=opioid_buyers_con[opioid_buyers_con["policy_years"]>=0]


opioid_deaths_con=opioid_deaths[opioid_deaths["STNAME"].isin(states2)]
opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2010


con_death_before=opioid_deaths_con[opioid_deaths_con["policy_years"]<0]
con_death_after=opioid_deaths_con[opioid_deaths_con["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_buyers_con["policy_years"]= opioid_buyers_con["year"]-2010
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2010


In [15]:
# For test (don't run)
# opioid_deaths_con["Year"].value_counts()

In [16]:
# For test (don't run)
# opioid_deaths_con["policy_years"].value_counts()

In [17]:
# control deaths pre/post plot
fit, before_line = get_reg_fit(
    con_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="blue"
)

fit, after_line = get_reg_fit(
    con_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="blue"
)

before = alt.Chart(
    con_death_before,
    title="Difference in Difference: Florida vs. Control, Overdose Deaths per cap",
).encode(x="policy_years", y="deaths_per_cap", color=alt.Color('fl:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "deaths_per_cap").mark_line(
    color="#FFAA00"
)

con_chart_death = base + before_line + after_line + sep_line

con_chart_death


In [18]:
#Difference in difference plot, deaths, FL vs control
diff_fl_death=con_chart_death+fl_chart_death+sep_line
diff_fl_death

# Texas Plots

In [19]:
# creating texas dataframes

opioid_deaths_tx=opioid_deaths[opioid_deaths["STNAME"]=="Texas"]

opioid_deaths_tx["policy_years"]= opioid_deaths_tx["Year"]-2007


tx_death_before=opioid_deaths_tx[opioid_deaths_tx["policy_years"]<0]
tx_death_after=opioid_deaths_tx[opioid_deaths_tx["policy_years"]>=0]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_tx["policy_years"]= opioid_deaths_tx["Year"]-2007


In [20]:
# Creating texas control dataframes
states = ["Oklahoma","Lousiana","New Mexico"]

opioid_deaths_con=opioid_deaths[opioid_deaths["STNAME"].isin(states)]
opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2007


con_death_before=opioid_deaths_con[opioid_deaths_con["policy_years"]<0]
con_death_after=opioid_deaths_con[opioid_deaths_con["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2007


In [21]:
# texas deaths pre/post plot

# Stuffs for making legend

legend = alt.Chart(fl_opi_before).transform_calculate(
    tx="'Texas'", control="'control'"
)

scale = alt.Scale(domain=["Texas", "Control"], range=["red", "blue"])


before = alt.Chart(
    tx_death_before, title="Pre-Post Policy: Texas, Overdose Deaths per cap"
).encode(x="policy_years", y="deaths_per_cap", color=alt.Color('tx:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "deaths_per_cap", ).mark_line()

fit, before_line = get_reg_fit(
    tx_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)
fit, after_line = get_reg_fit(
    tx_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)

tx_chart_death = base + before_line + after_line + sep_line
tx_chart_death


In [22]:
# control deaths pre/post plot
fit, before_line = get_reg_fit(
    con_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05
)
fit, after_line = get_reg_fit(
    con_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05
)

before = alt.Chart(
    con_death_before,
    title="Difference in Difference: Texas vs. Control, Overdose Deaths per cap",
).encode(x="policy_years", y="deaths_per_cap", color=alt.Color('tx:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "deaths_per_cap").mark_line(
    color="#FFAA00"
)


con_chart_death = base + before_line + after_line + sep_line
con_chart_death


In [23]:
# diff-in-diff
diff_tx_death=con_chart_death+tx_chart_death+sep_line
diff_tx_death

# Washington Plots

In [24]:
# creating Washington dataframes

opioid_deaths_wa=opioid_deaths[opioid_deaths["STNAME"]=="Washington"]

opioid_deaths_wa["policy_years"]= opioid_deaths_wa["Year"]-2012


wa_death_before=opioid_deaths_wa[opioid_deaths_wa["policy_years"]<0]
wa_death_after=opioid_deaths_wa[opioid_deaths_wa["policy_years"]>=0]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_wa["policy_years"]= opioid_deaths_wa["Year"]-2012


In [25]:
# Creating wa control dataframes
states = ["Idaho","Oregon","Montana"]

opioid_deaths_con=opioid_deaths[opioid_deaths["STNAME"].isin(states)]
opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2012


con_death_before=opioid_deaths_con[opioid_deaths_con["policy_years"]<0]
con_death_after=opioid_deaths_con[opioid_deaths_con["policy_years"]>=0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  opioid_deaths_con["policy_years"]= opioid_deaths_con["Year"]-2012


In [26]:
# washington deaths pre/post plot

# Stuffs for making legend

legend = alt.Chart(fl_opi_before).transform_calculate(
    wa="'Washington'", control="'control'"
)

scale = alt.Scale(domain=["Washington", "Control"], range=["red", "blue"])

before = alt.Chart(
    wa_death_before, title="Pre-Post Policy: Washington, Overdose Deaths per cap"
).encode(
    x="policy_years", y="deaths_per_cap", color=alt.Color("wa:N", scale=scale, title="")
)

base = before.transform_regression("policy_years", "deaths_per_cap").mark_line()

fit, before_line = get_reg_fit(
    wa_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)
fit, after_line = get_reg_fit(
    wa_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05, col="red"
)

wa_chart_death = base + before_line + after_line + sep_line
wa_chart_death


In [27]:
# control deaths pre/post plot
fit, before_line = get_reg_fit(
    con_death_before, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05
)
fit, after_line = get_reg_fit(
    con_death_after, yvar="deaths_per_cap", xvar="policy_years", alpha=0.05
)

before = alt.Chart(
    con_death_before,
    title="Difference in Difference: Washington vs. Control, Overdose Deaths per cap",
).encode(x="policy_years", y="deaths_per_cap", color=alt.Color('wa:N', scale=scale, title=''))

base = before.transform_regression("policy_years", "deaths_per_cap").mark_line(
    color="#FFAA00"
)

con_chart_death = base + before_line + after_line + sep_line

con_chart_death


In [28]:
# diff-in-diff
diff_wa_death = con_chart_death + wa_chart_death + sep_line
diff_wa_death
