In [42]:
import pandas as pd

population = pd.read_parquet("population.parquet")

shipment = pd.read_parquet("shipment_eda.parquet")

vitalstats_temp = pd.read_csv(
    "https://raw.githubusercontent.com/MIDS-at-Duke/Opioid-Team-Project-Group-9/main/Data/processed/vitalstats_temp.csv?token=GHSAT0AAAAAACHBMRNBETGNCJRKCPZ6EKBUZLEGHHA"
)

In [43]:
required_cases = [
    "Drug poisonings (overdose) Unintentional (X40-X44)",
    "All other drug-induced causes",
    "Drug poisonings (overdose) Suicide (X60-X64)",
    "Drug poisonings (overdose) Undetermined (Y10-Y14)",
    "Drug poisonings (overdose) Homicide (X85)",
]
vitalstats_temp["if_drugdose"] = vitalstats_temp["Drug/Alcohol Induced Cause"].isin(
    required_cases
)

In [44]:
# drop NA for county code for now, will adjust as needs be
vitalstats_temp = vitalstats_temp.dropna(subset=["County Code"])

In [45]:
vitalstats_temp["County Code"] = vitalstats_temp["County Code"].astype(int)
vitalstats_temp["County Code"] = vitalstats_temp["County Code"].astype(str).str.zfill(5)

In [46]:
merged_vital_pop = pd.merge(
    vitalstats_temp,
    population,
    left_on=["County Code", "Year"],
    right_on=["County_Code", "Year"],
    how="left",
)
full_table = pd.merge(
    merged_vital_pop,
    shipment,
    left_on=["County_y", "Year"],
    right_on=["BUYER_COUNTY", "YEAR"],
    how="left",
)

In [52]:
# getting "Georgia", "Louisiana", "North Carolina", "Florida"
required_states = ["Georgia", "Louisiana", "North Carolina", "Florida"]
full_table["if_required_states"] = full_table["State_y"].isin(required_states)
# getting year of interest from 2007 to 2013
required_year = [2007, 2008, 2009, 2010, 2011, 2012, 2013]
full_table["if_year"] = full_table["Year"].isin(required_year)
# create sub table for Florida and control states from year 2007 to 2013
Florida_others = full_table[
    (full_table["if_required_states"] == True)
    & (full_table["if_year"] == True)
    & (full_table["if_drugdose"] == True)
]

Florida_others["Deaths"] = pd.to_numeric(Florida_others["Deaths"], errors="coerce")
Florida_others = Florida_others[
    ["State_y", "County_y", "County_Code", "Year", "MME", "Population", "Deaths"]
]
# data_types = Florida_others.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Florida_others["Deaths"] = pd.to_numeric(Florida_others["Deaths"], errors="coerce")


In [53]:
Florida_others.head()

Unnamed: 0,State_y,County_y,County_Code,Year,MME,Population,Deaths
33841,Florida,ALACHUA,12001,2007,95264080.0,242685.0,17.0
33846,Florida,BAY,12005,2007,61276520.0,165345.0,30.0
33847,Florida,BAY,12005,2007,31585930.0,165345.0,30.0
33854,Florida,BREVARD,12009,2007,246517100.0,539719.0,84.0
33855,Florida,BREVARD,12009,2007,246517100.0,539719.0,20.0


In [63]:
Florida_others["Opioid Prescriptions Per Capita"] = (
    Florida_others["MME"] / Florida_others["Population"]
)
Florida_others["Mortality rate from Drug Overdoses Per Capita"] = (
    Florida_others["Deaths"] / Florida_others["Population"]
)
Florida_others

Unnamed: 0,State_y,County_y,County_Code,Year,MME,Population,Deaths,is_control,Opioid Prescriptions Per Capita,Mortality rate from Drug Overdoses Per Capita
33841,Florida,ALACHUA,12001,2007,9.526408e+07,242685.0,17.0,False,392.542092,0.000070
33846,Florida,BAY,12005,2007,6.127652e+07,165345.0,30.0,False,370.597945,0.000181
33847,Florida,BAY,12005,2007,3.158593e+07,165345.0,30.0,False,191.030479,0.000181
33854,Florida,BREVARD,12009,2007,2.465171e+08,539719.0,84.0,False,456.750876,0.000156
33855,Florida,BREVARD,12009,2007,2.465171e+08,539719.0,20.0,False,456.750876,0.000037
...,...,...,...,...,...,...,...,...,...,...
166195,North Carolina,WAYNE,37191,2013,1.051307e+07,124625.0,11.0,True,84.357630,0.000088
166196,North Carolina,WAYNE,37191,2013,1.258363e+06,124625.0,11.0,True,10.097192,0.000088
166197,North Carolina,WAYNE,37191,2013,1.799060e+07,124625.0,11.0,True,144.357866,0.000088
166230,North Carolina,WILKES,37193,2013,5.072383e+06,68610.0,19.0,True,73.930661,0.000277


In [96]:
control_list = ["Georgia", "Louisiana", "North Carolina"]
control_states = Florida_others[Florida_others["State_y"].isin(control_list) == True]
Florida = Florida_others[Florida_others["State_y"] == "Florida"]
group_florida = (
    Florida.groupby("Year")[
        [
            "Opioid Prescriptions Per Capita",
            "Mortality rate from Drug Overdoses Per Capita",
        ]
    ]
    .mean()
    .reset_index()
)

group_florida

group_control = (
    control_states.groupby("Year")[
        [
            "Opioid Prescriptions Per Capita",
            "Mortality rate from Drug Overdoses Per Capita",
        ]
    ]
    .mean()
    .reset_index()
)

group_control

Unnamed: 0,Year,Opioid Prescriptions Per Capita,Mortality rate from Drug Overdoses Per Capita
0,2007,277.733431,0.000162
1,2008,212.273336,0.000124
2,2009,275.75504,0.000138
3,2010,312.662675,0.000129
4,2011,409.141559,0.000151
5,2012,364.138316,0.000137
6,2013,295.073999,0.000147


In [108]:
result_df = pd.concat([group_florida, group_control], ignore_index=True)
result_df["if_treat"] = result_df.index.map(lambda x: 0 if x <= 6 else 1)
result_df["if_effect"] = result_df["Year"].apply(lambda x: 0 if x < 2010 else 1)
result_df.columns = [
    "year",
    "Opioid_Prescriptions_Per_Capita",
    "Mortality_rate_from_Drug_Overdoses_Per_Capita",
    "if_treat",
    "if_effect",
]

result_df

Unnamed: 0,year,Opioid_Prescriptions_Per_Capita,Mortality_rate_from_Drug_Overdoses_Per_Capita,if_treat,if_effect
0,2007,231.470535,0.000106,0,0
1,2008,287.029437,0.00011,0,0
2,2009,334.198363,0.000104,0,0
3,2010,434.685699,0.000132,0,1
4,2011,373.263691,0.000123,0,1
5,2012,266.689503,9.9e-05,0,1
6,2013,232.993127,9.7e-05,0,1
7,2007,277.733431,0.000162,1,0
8,2008,212.273336,0.000124,1,0
9,2009,275.75504,0.000138,1,0


In [111]:
from statsmodels.formula.api import ols
import seaborn.objects as so
import matplotlib.pyplot as plt
from matplotlib import style

model_presciptions = ols(
    "Opioid_Prescriptions_Per_Capita ~ if_effect + if_treat + if_effect:if_treat",
    result_df,
).fit()
print(model_presciptions.summary())

                                   OLS Regression Results                                  
Dep. Variable:     Opioid_Prescriptions_Per_Capita   R-squared:                       0.287
Model:                                         OLS   Adj. R-squared:                  0.073
Method:                              Least Squares   F-statistic:                     1.342
Date:                             Mon, 27 Nov 2023   Prob (F-statistic):              0.316
Time:                                     01:27:47   Log-Likelihood:                -75.958
No. Observations:                               14   AIC:                             159.9
Df Residuals:                                   10   BIC:                             162.5
Df Model:                                        3                                         
Covariance Type:                         nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      



In [112]:
model_motality = ols(
    "Mortality_rate_from_Drug_Overdoses_Per_Capita ~ if_effect + if_treat + if_effect:if_treat",
    result_df,
).fit()
print(model_motality.summary())

                                          OLS Regression Results                                         
Dep. Variable:     Mortality_rate_from_Drug_Overdoses_Per_Capita   R-squared:                       0.630
Model:                                                       OLS   Adj. R-squared:                  0.519
Method:                                            Least Squares   F-statistic:                     5.678
Date:                                           Mon, 27 Nov 2023   Prob (F-statistic):             0.0156
Time:                                                   01:27:52   Log-Likelihood:                 138.82
No. Observations:                                             14   AIC:                            -269.6
Df Residuals:                                                 10   BIC:                            -267.1
Df Model:                                                      3                                         
Covariance Type:                              

