In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import PolynomialFeatures

from pathlib import Path

from vou.batch import load_json
from experiment.run import get_scenario_dirs

In [2]:
def read_scenario_results(scenario_dir: Path):
    results = pd.read_csv(scenario_dir.joinpath('results.csv'))
    params = load_json(scenario_dir.joinpath('params.json'))

    for param_name, param_value in params.items():
        results[param_name] = param_value

    return results

In [3]:
def read_all_results():
    scenario_dirs = get_scenario_dirs()
    dfs = []

    for d in scenario_dirs:
        dfs.append(read_scenario_results(Path(d)))

    return pd.concat(dfs)

In [4]:
df = read_all_results()

In [5]:
df['high'] = np.where(df['starting_dose'] == 120, 1, 0)
df['med'] = np.where(df['starting_dose'] == 80, 1, 0)

In [6]:
Y = df['overdose_count']

In [7]:
X = df.loc[:, 
    (
        "dose_variability",
        "fentanyl_prob",
        "counterfeit_prob",
        "high",
        "med",
        # "starting_dose",
        # "dose_increase",
        # "behavioral_variability",
        # "availability",
        # "internal_risk",
        # "external_risk",
    )
]

In [8]:
X['counterfeit_prob_dose_variability'] = X['counterfeit_prob'] * X['dose_variability']
X['counterfeit_prob_fentanyl_prob'] = X['counterfeit_prob'] * X['fentanyl_prob']

In [9]:
X = sm.add_constant(X)

In [10]:
model = sm.OLS(Y, X).fit()

In [11]:
model.summary()

0,1,2,3
Dep. Variable:,overdose_count,R-squared:,0.587
Model:,OLS,Adj. R-squared:,0.587
Method:,Least Squares,F-statistic:,10660.0
Date:,"Wed, 09 Feb 2022",Prob (F-statistic):,0.0
Time:,10:06:15,Log-Likelihood:,-129390.0
No. Observations:,52500,AIC:,258800.0
Df Residuals:,52492,BIC:,258900.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1570,0.090,-1.754,0.079,-0.333,0.018
dose_variability,0.2598,0.196,1.323,0.186,-0.125,0.645
fentanyl_prob,0.0438,0.393,0.112,0.911,-0.726,0.814
counterfeit_prob,0.2664,0.302,0.882,0.378,-0.326,0.859
high,7.1981,0.030,236.626,0.000,7.138,7.258
med,0.0097,0.030,0.317,0.751,-0.050,0.069
counterfeit_prob_dose_variability,0.5132,0.675,0.760,0.447,-0.811,1.837
counterfeit_prob_fentanyl_prob,-0.9414,1.351,-0.697,0.486,-3.589,1.707

0,1,2,3
Omnibus:,12526.441,Durbin-Watson:,2.003
Prob(Omnibus):,0.0,Jarque-Bera (JB):,57692.656
Skew:,1.094,Prob(JB):,0.0
Kurtosis:,7.646,Cond. No.,136.0


In [159]:
def logistic_model(target: str):
    y = df[target]
    m = sm.Logit(y, X).fit()
    return m

In [160]:
m = logistic_model('fatal_overdose')
print(m.summary())

Optimization terminated successfully.
         Current function value: 0.587698
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:         fatal_overdose   No. Observations:                52500
Model:                          Logit   Df Residuals:                    52494
Method:                           MLE   Df Model:                            5
Date:                Sun, 06 Feb 2022   Pseudo R-squ.:               0.0003770
Time:                        17:14:54   Log-Likelihood:                -30854.
converged:                       True   LL-Null:                       -30866.
Covariance Type:            nonrobust   LLR p-value:                 0.0002996
                                        coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
const                                -1.1066      0.070    -15

In [152]:
# OR for 10% increase in countefeit pill prevalence and dose variability

((np.exp(0.1074 + 0.1689 + 0.4445) - 1) / 10) + 1

1.1056077414806378

In [161]:
df.groupby('counterfeit_prob').agg(sum)

Unnamed: 0_level_0,Unnamed: 0,dose_increase,overdose_count,overdose_any,fatal_overdose,dose_variability,fentanyl_prob,starting_dose,behavioral_variability,availability,internal_risk,external_risk,high,med,low
counterfeit_prob,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0.065,371250,137500,17622,2500,1970,2250.0,1125.0,625000,875.0,5375.0,3750.0,3750.0,2500,2500,2500
0.13,371250,137500,17459,2500,2030,2250.0,1125.0,625000,875.0,5375.0,3750.0,3750.0,2500,2500,2500
0.195,371250,137500,17950,2504,2054,2250.0,1125.0,625000,875.0,5375.0,3750.0,3750.0,2500,2500,2500
0.26,371250,137500,18228,2508,2060,2250.0,1125.0,625000,875.0,5375.0,3750.0,3750.0,2500,2500,2500
0.325,371250,137500,18287,2542,2080,2250.0,1125.0,625000,875.0,5375.0,3750.0,3750.0,2500,2500,2500
0.39,371250,137500,18521,2538,2081,2250.0,1125.0,625000,875.0,5375.0,3750.0,3750.0,2500,2500,2500
0.455,371250,137500,18072,2544,2149,2250.0,1125.0,625000,875.0,5375.0,3750.0,3750.0,2500,2500,2500


In [162]:
(18521 / 18228) - 1

0.016074171604125498

In [51]:
(17495 / 18228) - 1

-0.04021285933728325