This code cleans all analytical model outputs and produces all summary statistics and tables reported in the main paper and supplement. It has to be run after the analysis code `OIC-OO v7`, using the same ControlFile and from the same folder; this code will read the output .tab files from the analysis, and will fail if they are not available. See main `Analysis & Graphing` folder for `ReadMe` with explanation of ControlFile format and fields.



In [1]:
import os
import json
import regex
import pandas as pd
import numpy as np

from shutil import copy
from distutils.dir_util import copy_tree


def clean_final(baserunname, scen, name='final', subset=['2003', '2004', '2005']):
    """Clean and process main run output tabfile, yielding tabfile with 
    only variables (no params) and larger timestep; `subset` is used to 
    identify params in the output tabfile, and should *not* include the 
    initial time"""
    table = pd.read_csv(f'{baserunname}_{name}_{scen}.tab', sep='\t', 
                        index_col=0, error_bad_lines=False)
    display(f"Processing {baserunname}_{name}_{scen}.tab...")
    
    # Split table along secondary time row
    table1 = table.iloc[:table.index.get_loc('Time'), :]
    table2 = table.iloc[table.index.get_loc('Time'):, :].dropna(axis=1, how='all')
    
    # Convert secondary time row to str, with g-format to drop float decimals from whole years
    table2.columns = table2.iloc[0].apply('{:g}'.format).astype('str')
    table2 = table2[1:] # Drop secondary time row
    table = pd.concat([table1, table2]) # Concat with joint time str axis to align values by time
    table.dropna(how='all', subset=subset, inplace=True) # Drop constants
    table = table[table.columns[::4]] # Reduce density of time axis to 0.25 years
    table.to_csv(f'{baserunname}_{name}_{scen}_vars.tab', sep='\t')
    
    
def clean_sens(baserunname, scen, fitlist, name='sens', repvars=['SimVar'], dropvars=['RepErrRaw']):
    """Clean and process sensitivity output tabfile, yielding cleaned 
    tabfile '_clean' and `repvars`-only tabfile '_fits' from `fitlist`, 
    removing `dropvars`"""
    
    # Clean sensitivity data
    senstable = pd.read_csv(f'{baserunname}_{name}_{scen}_clean.tab', sep='\t', index_col=[0,1])
    senstable = senstable.reorder_levels(['Var', 'Perc']).sort_index()
    
    # Filter out data for specified variables, by default RepErrRaw
    for var in dropvars:
        filt = pd.Series(~senstable.index.levels[0].str.startswith(var), 
                         index=senstable.index.levels[0])
        senstable = senstable[filt[senstable.index.get_level_values('Var')].values]
    senstable.to_csv(f'{baserunname}_{name}_{scen}_clean.tab', sep='\t')
    
    # Extract sensitivity projection fit-to-data
    fits_sens = senstable.loc[[f'{repvar}[{var[0]}]' for repvar in repvars for var in fitlist]]
    fits_sens.to_csv(f'{baserunname}_{name}_{scen}_fits.tab', sep='\t')


def insert_sums(tablename, sumlist, sumvars=['SimVar', 'DataVar'], index_col=0):
    """Calculate summed variables and add to tabfile; sums each var in 
    `sumvars` for elements specified in `sumlist`"""
    t = pd.read_csv(tablename, sep='\t', index_col=index_col)
    
    t_dict = {}
    # For each triplet in sumlist, set first elm as sum of other two
    for a, b, c in sumlist:
        for var in sumvars:
            t_dict[f'{var}[{a}]'] = t.loc[f'{var}[{b}]'] + t.loc[f'{var}[{c}]']
    
    # Compile summed variables
    if index_col==0:
        t_sums = pd.concat(t_dict, axis=1).T
    else:
        t_sums = pd.concat(t_dict)
    
    # Merge back in to main dataframe and export to tabfile
    t = pd.concat([t, t_sums]).sort_index()
    t.to_csv(tablename, sep='\t')


def calc_gof(resdf, simvar, datavar):
    """Calculate goodness-of-fit measures for given sim & data vars"""
    # IMPORTANT: cross-screen for missing sim or data values
    sim = resdf.loc[simvar].where(resdf.loc[datavar].notna())
    dat = resdf.loc[datavar].where(resdf.loc[simvar].notna())
    
    # Calculate various GOF stats & return each one
    error = abs(sim - dat)
    maen = error.mean()/dat.mean()
    mape = (error/dat).mean()
    simstd = np.sqrt((sim ** 2).mean() - sim.mean() ** 2)
    datastd = np.sqrt((dat ** 2).mean() - dat.mean() ** 2)
    r2 = (sim.corr(dat)) ** 2
    mse = (error ** 2).mean()
    um = ((sim.mean() - dat.mean()) ** 2/ mse)
    us = ((simstd - datastd) ** 2/ mse)
    uc = (2 * (1 - sim.corr(dat)) * simstd * datastd / mse)
    return maen, mape, r2, mse, um, us, uc
    
    
def get_year_values(table, senstable, var, years, percs, name):
    """Get value and bounds of specified `var` in `years` as text"""
    vartext = [name + '\n'] # Initialise with specified name, varname by default

    # Iterate through years specified and pull values for each
    for year in years:
        val = table.loc[var, str(year)]
        lower = senstable.loc[var, percs[0]].loc[str(float(year))]
        upper = senstable.loc[var, percs[1]].loc[str(float(year))]

        vartext.append(f"{year}\t{val}\t{lower}\t{upper}\n")

    return vartext


def compare_vals(first, second, projvars, projyear, compperc=50.0):
    """Calculate differences between specified `projvars` in `projyear` 
    for `first` and `second`, using `first` as reference values"""
    vals = []
    for file in first, second:
        senstable = pd.read_csv(file, sep='\t', index_col=[0,1])
        senstable = senstable[senstable.columns[::4]]
        senstable.columns = senstable.columns.astype(float).astype(int)
        vals.append([senstable.loc[var, compperc][projyear] for var in projvars])
        del senstable

    # Calculate change in values using first as reference point
    vals_chg = [(var1-var0)/var0 for var0, var1 in zip(vals[0], vals[1])]
    return vals_chg


def compile_sens_panel(baserunname, name, key, scen, outvars, projvars, endyear, 
                       projyear, params=True, dropvars=None):
    """Compile key outcomes panel for sensitivity analysis, specifying 
    run for comparison with `name`, `key` and `scen`, key outcome vars 
    with `outvars` at `endyear` and `projvars` at `projyear`, including 
    parametric sensitivity if `params` is True (and excluding params 
    e.g. from loop knockout with `dropvars`)"""
    
    # Read in base run for comparison and subset key outcome values
    b = pd.read_csv(f'{baserunname}_final_{scen}_vars.tab', sep='\t', index_col=0)
    b = b.loc[outvars + projvars][[endyear, projyear]]

    sensoutdict = {}
    
    # Read in sensitivity run and calculate change in key outcome values vs. base
    t = pd.read_csv(f'{baserunname}_{name}_{key}_{scen}_vars.tab', sep='\t', index_col=0)
    for var in outvars:
        sensoutdict[var] = (t.loc[var, endyear] - b.loc[var, endyear]) / b.loc[var, endyear]
    for var in projvars:
        sensoutdict[var] = (t.loc[var, projyear] - b.loc[var, projyear]) / b.loc[var, projyear]

    # Read in parameter values and calculate sensitivity
    if params:
        paramdf = pd.read_csv(f'{baserunname}_{name}_params.tab', sep='\t', index_col=0)

        pt = paramdf[['Value', key]] # Select values for relevant run
        if dropvars: # Drop specified params (e.g. knocked-out loops)
            pt = pt.drop(dropvars)
        
        pt = pt[pt['Value'] > 0.0001] # Screen out values below 1e-04
        pt['Chg'] = (pt[key] - pt['Value']) / pt['Value']
        sensoutdict['Med elasticity'] = abs(pt['Chg']).median()
        sensoutdict['Max elasticity'] = max(pt['Chg'].min(), pt['Chg'].max(), key=abs)
    else: # Or specify zero elasticity
        sensoutdict['Med elasticity'] = 0
        sensoutdict['Max elasticity'] = 0

    # Subset sensitivity run results
    t = t.loc[:, :endyear]

    # Calculate goodness-of-fit statistics
    t = t[t.columns[::4]] # Subset to each year instead of 0.25 years
    gofs = [[*calc_gof(t, f'SimVar[{elm[0]}]', f'DataVar[{elm[0]}]')] for elm in fitlist]

    gofdf = pd.DataFrame(gofs, index=[elm[0] for elm in fitlist], 
                         columns=['MAEN', 'MAPE', 'R2', 'MSE', 'Um', 'Us', 'Uc'])
    gofdf.loc['Avg'] = gofdf.mean()
    gofdf.to_csv(f'{baserunname}_{name}_{key}_GOF.tab', sep='\t')

    # Pull relevant GOF statistics from GOF stats dataframe
    sensoutdict['Avg MAEN'] = gofdf.loc['Avg', 'MAEN']
    sensoutdict['Max MAEN'] = gofdf['MAEN'].max()
    
    # Return series with each key outcome for the panel
    return pd.Series(sensoutdict)


def strbds_from_perc(perc):
    """Return lower & upper bounds that define `perc` CrI as strings"""
    if perc > 1: # If perc specified as percentage (not decimal)
        return [str(round((0.5 - perc/200), 3)), str(round((0.5 + perc/200), 3))]
    else: # If perc specified as decimal, not 100%
        return [str(round((0.5 - perc/2), 3)), str(round((0.5 + perc/2), 3))]


def get_value(file, varname):
    """General purpose function for reading values from .mdl, .out, etc. 
    files; returns value matching `varname` in a 'var = val' syntax"""
    varregex = regex.compile(r'(?<=([^\w ]|\n)\s?' + regex.escape(varname)
                             + r'\s*=)\s*-?(?:\d*)(\.\d*)?([eE][+\-]?\d+)?')

    with open(file, 'r') as f:
        filetext = f.read()
        value = float((regex.search(varregex, filetext))[0])

    return value

In [2]:
# Read specified controlfile and unpack into variables
controlfilename = input("Enter control file name (with extension):")
cf = json.load(open(controlfilename, 'r'))

for k,v in cf.items():
    exec(k + '=v')

for setting in [analysissettings]:
    for k, v in setting.items():
        exec(k + '=v')

# Initialise base working directory
os.chdir(f"{baserunname}_IterCal")
basedir = os.getcwd()

Enter control file name (with extension):OICC220127.txt


In [3]:
##### DATA FILE PREPARATION AND CLEANING #####

os.chdir(basedir)
os.makedirs('./Results', exist_ok=True)
os.chdir('./Results')

# Copy over all necessary files from other directories
copy(f'../{baserunname}_main_fits.tab', './')
copy(f'../{baserunname}_params.tab', './')
for cin in (basescens + scenariolist):
    copy(f'../Scenarios/{baserunname}_final_{cin[:-4]}.tab', './')
    copy(f'../Scenarios/{baserunname}_sens_{cin[:-4]}_clean.tab', './')
for cin in basescens[0:2]:
    for proj in proj_subs:
        copy(f'../Scenarios/{baserunname}_final_{cin[:-4]}{proj}.tab', './')
        copy(f'../Scenarios/{baserunname}_sens_{cin[:-4]}{proj}_clean.tab', './')

# Clean & process projection & policy scenario results
for scen in [cin[:-4] for cin in (basescens + scenariolist)]:
    clean_final(baserunname, scen)
    clean_sens(baserunname, scen, fitlist)
    
for scen in [(cin[:-4] + pol[:-4]) for cin in basescens for pol in policylist]:
    clean_final(baserunname, scen)
    
# Clean & process alternative data condition analysis results
for scen in [f'{cin[:-4]}_{basescens[0][:-4]}' for cin in testlist]:
    copy(f'../Sensitivity/{baserunname}_test_{scen}.tab', './')
    copy(f'../Sensitivity/{baserunname}_test_params.tab', './')
    clean_final(baserunname, scen, name='test')

# Clean & process loop knockout sensitivity results
copy(f'../Sensitivity/{baserunname}_lk_params.tab', './')
for key in lkdict.keys():
    for name in ['lk', 'lk_run']:
        copy(f'../Sensitivity/{baserunname}_{name}_{key}_{basescens[0][:-4]}.tab', './')
        clean_final(baserunname, f'{key}_{basescens[0][:-4]}', name=name)

# Clean & process parametric assumptions sensitivity results
sensdict = dict([[''.join([w[0] for w in regex.findall(r"[\w']+", var)]), var] 
                 for var in sensvars])
copy(f'../Sensitivity/{baserunname}_assm_params.tab', './')
for key in sensdict.keys():
    for sfx in ['_L', '_H']:
        copy(f'../Sensitivity/{baserunname}_assm_{key}{sfx}_{basescens[0][:-4]}.tab', './')
        clean_final(baserunname, f'{key}{sfx}_{basescens[0][:-4]}', name='assm')

'Processing X8_final_Base.tab...'

'Processing X8_final_Cnst.tab...'

'Processing X8_final_FentH.tab...'

'Processing X8_final_FentL.tab...'

'Processing X8_final_Pos.tab...'

'Processing X8_final_Neg.tab...'

'Processing X8_final_NoFentHist.tab...'

'Processing X8_final_BaseNone.tab...'

'Processing X8_final_BaseBupProvBarriers.tab...'

'Processing X8_final_BaseBupProv.tab...'

'Processing X8_final_BaseDevOUD.tab...'

'Processing X8_final_BaseDivRxInit.tab...'

'Processing X8_final_BaseFentODRisk.tab...'

'Processing X8_final_BaseHInit.tab...'

'Processing X8_final_BaseNxKits.tab...'

'Processing X8_final_BaseOwnRxInit.tab...'

'Processing X8_final_BasePackage.tab...'

'Processing X8_final_BasePackage_noFent.tab...'

'Processing X8_final_BasePeerRecovery.tab...'

'Processing X8_final_BaseRxRate.tab...'

'Processing X8_final_BaseReturntoOUD.tab...'

'Processing X8_final_BaseBupProvBarriers10.tab...'

'Processing X8_final_BaseBupProv10.tab...'

'Processing X8_final_BaseDevOUD10.tab...'

'Processing X8_final_BaseDivRxInit10.tab...'

'Processing X8_final_BaseFentODRisk10.tab...'

'Processing X8_final_BaseHInit10.tab...'

'Processing X8_final_BaseNxKits10.tab...'

'Processing X8_final_BaseOwnRxInit10.tab...'

'Processing X8_final_BasePackage10.tab...'

'Processing X8_final_BasePackage_noFent10.tab...'

'Processing X8_final_BasePeerRecovery10.tab...'

'Processing X8_final_BaseRxRate10.tab...'

'Processing X8_final_BaseReturntoOUD10.tab...'

'Processing X8_final_BaseBupProvBarriers50.tab...'

'Processing X8_final_BaseBupProv50.tab...'

'Processing X8_final_BaseDevOUD50.tab...'

'Processing X8_final_BaseDivRxInit50.tab...'

'Processing X8_final_BaseFentODRisk50.tab...'

'Processing X8_final_BaseHInit50.tab...'

'Processing X8_final_BaseNxKits50.tab...'

'Processing X8_final_BaseOwnRxInit50.tab...'

'Processing X8_final_BasePackage50.tab...'

'Processing X8_final_BasePackage_noFent50.tab...'

'Processing X8_final_BasePeerRecovery50.tab...'

'Processing X8_final_BaseRxRate50.tab...'

'Processing X8_final_BaseReturntoOUD50.tab...'

'Processing X8_final_BaseDivRxInit65.tab...'

'Processing X8_final_BaseNxPeerRecovery.tab...'

'Processing X8_final_BasePackage_RR.tab...'

'Processing X8_final_CnstNone.tab...'

'Processing X8_final_CnstBupProvBarriers.tab...'

'Processing X8_final_CnstBupProv.tab...'

'Processing X8_final_CnstDevOUD.tab...'

'Processing X8_final_CnstDivRxInit.tab...'

'Processing X8_final_CnstFentODRisk.tab...'

'Processing X8_final_CnstHInit.tab...'

'Processing X8_final_CnstNxKits.tab...'

'Processing X8_final_CnstOwnRxInit.tab...'

'Processing X8_final_CnstPackage.tab...'

'Processing X8_final_CnstPackage_noFent.tab...'

'Processing X8_final_CnstPeerRecovery.tab...'

'Processing X8_final_CnstRxRate.tab...'

'Processing X8_final_CnstReturntoOUD.tab...'

'Processing X8_final_CnstBupProvBarriers10.tab...'

'Processing X8_final_CnstBupProv10.tab...'

'Processing X8_final_CnstDevOUD10.tab...'

'Processing X8_final_CnstDivRxInit10.tab...'

'Processing X8_final_CnstFentODRisk10.tab...'

'Processing X8_final_CnstHInit10.tab...'

'Processing X8_final_CnstNxKits10.tab...'

'Processing X8_final_CnstOwnRxInit10.tab...'

'Processing X8_final_CnstPackage10.tab...'

'Processing X8_final_CnstPackage_noFent10.tab...'

'Processing X8_final_CnstPeerRecovery10.tab...'

'Processing X8_final_CnstRxRate10.tab...'

'Processing X8_final_CnstReturntoOUD10.tab...'

'Processing X8_final_CnstBupProvBarriers50.tab...'

'Processing X8_final_CnstBupProv50.tab...'

'Processing X8_final_CnstDevOUD50.tab...'

'Processing X8_final_CnstDivRxInit50.tab...'

'Processing X8_final_CnstFentODRisk50.tab...'

'Processing X8_final_CnstHInit50.tab...'

'Processing X8_final_CnstNxKits50.tab...'

'Processing X8_final_CnstOwnRxInit50.tab...'

'Processing X8_final_CnstPackage50.tab...'

'Processing X8_final_CnstPackage_noFent50.tab...'

'Processing X8_final_CnstPeerRecovery50.tab...'

'Processing X8_final_CnstRxRate50.tab...'

'Processing X8_final_CnstReturntoOUD50.tab...'

'Processing X8_final_CnstDivRxInit65.tab...'

'Processing X8_final_CnstNxPeerRecovery.tab...'

'Processing X8_final_CnstPackage_RR.tab...'

'Processing X8_final_FentHNone.tab...'

'Processing X8_final_FentHBupProvBarriers.tab...'

'Processing X8_final_FentHBupProv.tab...'

'Processing X8_final_FentHDevOUD.tab...'

'Processing X8_final_FentHDivRxInit.tab...'

'Processing X8_final_FentHFentODRisk.tab...'

'Processing X8_final_FentHHInit.tab...'

'Processing X8_final_FentHNxKits.tab...'

'Processing X8_final_FentHOwnRxInit.tab...'

'Processing X8_final_FentHPackage.tab...'

'Processing X8_final_FentHPackage_noFent.tab...'

'Processing X8_final_FentHPeerRecovery.tab...'

'Processing X8_final_FentHRxRate.tab...'

'Processing X8_final_FentHReturntoOUD.tab...'

'Processing X8_final_FentHBupProvBarriers10.tab...'

'Processing X8_final_FentHBupProv10.tab...'

'Processing X8_final_FentHDevOUD10.tab...'

'Processing X8_final_FentHDivRxInit10.tab...'

'Processing X8_final_FentHFentODRisk10.tab...'

'Processing X8_final_FentHHInit10.tab...'

'Processing X8_final_FentHNxKits10.tab...'

'Processing X8_final_FentHOwnRxInit10.tab...'

'Processing X8_final_FentHPackage10.tab...'

'Processing X8_final_FentHPackage_noFent10.tab...'

'Processing X8_final_FentHPeerRecovery10.tab...'

'Processing X8_final_FentHRxRate10.tab...'

'Processing X8_final_FentHReturntoOUD10.tab...'

'Processing X8_final_FentHBupProvBarriers50.tab...'

'Processing X8_final_FentHBupProv50.tab...'

'Processing X8_final_FentHDevOUD50.tab...'

'Processing X8_final_FentHDivRxInit50.tab...'

'Processing X8_final_FentHFentODRisk50.tab...'

'Processing X8_final_FentHHInit50.tab...'

'Processing X8_final_FentHNxKits50.tab...'

'Processing X8_final_FentHOwnRxInit50.tab...'

'Processing X8_final_FentHPackage50.tab...'

'Processing X8_final_FentHPackage_noFent50.tab...'

'Processing X8_final_FentHPeerRecovery50.tab...'

'Processing X8_final_FentHRxRate50.tab...'

'Processing X8_final_FentHReturntoOUD50.tab...'

'Processing X8_final_FentHDivRxInit65.tab...'

'Processing X8_final_FentHNxPeerRecovery.tab...'

'Processing X8_final_FentHPackage_RR.tab...'

'Processing X8_final_FentLNone.tab...'

'Processing X8_final_FentLBupProvBarriers.tab...'

'Processing X8_final_FentLBupProv.tab...'

'Processing X8_final_FentLDevOUD.tab...'

'Processing X8_final_FentLDivRxInit.tab...'

'Processing X8_final_FentLFentODRisk.tab...'

'Processing X8_final_FentLHInit.tab...'

'Processing X8_final_FentLNxKits.tab...'

'Processing X8_final_FentLOwnRxInit.tab...'

'Processing X8_final_FentLPackage.tab...'

'Processing X8_final_FentLPackage_noFent.tab...'

'Processing X8_final_FentLPeerRecovery.tab...'

'Processing X8_final_FentLRxRate.tab...'

'Processing X8_final_FentLReturntoOUD.tab...'

'Processing X8_final_FentLBupProvBarriers10.tab...'

'Processing X8_final_FentLBupProv10.tab...'

'Processing X8_final_FentLDevOUD10.tab...'

'Processing X8_final_FentLDivRxInit10.tab...'

'Processing X8_final_FentLFentODRisk10.tab...'

'Processing X8_final_FentLHInit10.tab...'

'Processing X8_final_FentLNxKits10.tab...'

'Processing X8_final_FentLOwnRxInit10.tab...'

'Processing X8_final_FentLPackage10.tab...'

'Processing X8_final_FentLPackage_noFent10.tab...'

'Processing X8_final_FentLPeerRecovery10.tab...'

'Processing X8_final_FentLRxRate10.tab...'

'Processing X8_final_FentLReturntoOUD10.tab...'

'Processing X8_final_FentLBupProvBarriers50.tab...'

'Processing X8_final_FentLBupProv50.tab...'

'Processing X8_final_FentLDevOUD50.tab...'

'Processing X8_final_FentLDivRxInit50.tab...'

'Processing X8_final_FentLFentODRisk50.tab...'

'Processing X8_final_FentLHInit50.tab...'

'Processing X8_final_FentLNxKits50.tab...'

'Processing X8_final_FentLOwnRxInit50.tab...'

'Processing X8_final_FentLPackage50.tab...'

'Processing X8_final_FentLPackage_noFent50.tab...'

'Processing X8_final_FentLPeerRecovery50.tab...'

'Processing X8_final_FentLRxRate50.tab...'

'Processing X8_final_FentLReturntoOUD50.tab...'

'Processing X8_final_FentLDivRxInit65.tab...'

'Processing X8_final_FentLNxPeerRecovery.tab...'

'Processing X8_final_FentLPackage_RR.tab...'

'Processing X8_test_Data2019_Base.tab...'

'Processing X8_test_FentMax_Base.tab...'

'Processing X8_lk_av_Base.tab...'

'Processing X8_lk_run_av_Base.tab...'

'Processing X8_lk_pr_Base.tab...'

'Processing X8_lk_run_pr_Base.tab...'

'Processing X8_lk_si_Base.tab...'

'Processing X8_lk_run_si_Base.tab...'

'Processing X8_assm_Asf_L_Base.tab...'

'Processing X8_assm_Asf_H_Base.tab...'

'Processing X8_assm_EoMToOdrB_L_Base.tab...'

'Processing X8_assm_EoMToOdrB_H_Base.tab...'

'Processing X8_assm_EoMToOdrM_L_Base.tab...'

'Processing X8_assm_EoMToOdrM_H_Base.tab...'

'Processing X8_assm_EoMToOdrV_L_Base.tab...'

'Processing X8_assm_EoMToOdrV_H_Base.tab...'

'Processing X8_assm_Owm_L_Base.tab...'

'Processing X8_assm_Owm_H_Base.tab...'

'Processing X8_assm_Prdt_L_Base.tab...'

'Processing X8_assm_Prdt_H_Base.tab...'

'Processing X8_assm_Prit_L_Base.tab...'

'Processing X8_assm_Prit_H_Base.tab...'

'Processing X8_assm_PrwN_L_Base.tab...'

'Processing X8_assm_PrwN_H_Base.tab...'

'Processing X8_assm_POw_L_Base.tab...'

'Processing X8_assm_POw_H_Base.tab...'

'Processing X8_assm_Poces_L_Base.tab...'

'Processing X8_assm_Poces_H_Base.tab...'

'Processing X8_assm_RrHnMT_L_Base.tab...'

'Processing X8_assm_RrHnMT_H_Base.tab...'

'Processing X8_assm_RdHwROom_L_Base.tab...'

'Processing X8_assm_RdHwROom_H_Base.tab...'

'Processing X8_assm_RdROnH_L_Base.tab...'

'Processing X8_assm_RdROnH_H_Base.tab...'

'Processing X8_assm_SoRstprp_L_Base.tab...'

'Processing X8_assm_SoRstprp_H_Base.tab...'

'Processing X8_assm_SoRstRpp_L_Base.tab...'

'Processing X8_assm_SoRstRpp_H_Base.tab...'

'Processing X8_assm_SoRstdpp_L_Base.tab...'

'Processing X8_assm_SoRstdpp_H_Base.tab...'

'Processing X8_assm_SoRstMpd_L_Base.tab...'

'Processing X8_assm_SoRstMpd_H_Base.tab...'

'Processing X8_assm_TtrRss_L_Base.tab...'

'Processing X8_assm_TtrRss_H_Base.tab...'

'Processing X8_assm_TadB_L_Base.tab...'

'Processing X8_assm_TadB_H_Base.tab...'

'Processing X8_assm_TadM_L_Base.tab...'

'Processing X8_assm_TadM_H_Base.tab...'

'Processing X8_assm_TadV_L_Base.tab...'

'Processing X8_assm_TadV_H_Base.tab...'

'Processing X8_assm_TsfBH_L_Base.tab...'

'Processing X8_assm_TsfBH_H_Base.tab...'

'Processing X8_assm_TsfBRO_L_Base.tab...'

'Processing X8_assm_TsfBRO_H_Base.tab...'

'Processing X8_assm_TsfMHr_L_Base.tab...'

'Processing X8_assm_TsfMHr_H_Base.tab...'

'Processing X8_assm_TsfMROr_L_Base.tab...'

'Processing X8_assm_TsfMROr_H_Base.tab...'

'Processing X8_assm_TsrHrtROnH_L_Base.tab...'

'Processing X8_assm_TsrHrtROnH_H_Base.tab...'

'Processing X8_assm_Tsfi_L_Base.tab...'

'Processing X8_assm_Tsfi_H_Base.tab...'

In [4]:
##### CALCULATE AGGREGATED VARIABLES, STD ERRS & GOF STATISICS #####

fitdict = dict(fitlist)

# Specify aggregate variables to calculate w/ labels
sumlist = [('ROUT', 'ROUD', 'ROUH'), ('InRT', 'InRM', 'InRD'), ('ODRT', 'ODRB', 'ODSB')]
fitdict['ROUT'] = 'Total Rx opioid use disorder'
fitdict['InRT'] = 'Total Rx misuse initiation'
fitdict['ODRT'] = 'Overdose deaths (Rx + Rx synthetics)'

# Calculate aggregates for various results tabfiles
insert_sums(f'{baserunname}_main_fits.tab', sumlist)
for cin in basescens + scenariolist:
    insert_sums(f'{baserunname}_final_{cin[:-4]}_vars.tab', sumlist, 
                sumvars=['SimVar', 'DataVar', 'RepVar'])
    insert_sums(f'{baserunname}_sens_{cin[:-4]}_clean.tab', sumlist, 
                sumvars=['SimVar', 'RepVar'], index_col=[0,1])

# Read and append standard error terms where available
tssd = pd.read_excel('../../Time series standard deviations.xlsx', 
                     sheet_name='Summary', index_col=[0,1], header=1)

stderrdict = {} # Initialise container for stderrs
stderrdict['DataErr[InRT]'] = tssd.loc[
    ('Total Rx misuse initiation SAMHSA', 'Standard Error of Weighted Mean')]
stderrdict['DataErr[InHT]'] = tssd.loc[('Total heroin initiation SAMHSA', 'RAND Multiplied SE')]
stderrdict['DataErr[ROUT]'] = (
    tssd.loc[('Rx OUD no PY heroin NSDUH', 'Standard Error of Weighted Mean')] 
    + tssd.loc[('Rx OUD + H NSDUH RAND', 'RAND Multiplied SE')])
stderrdict['DataErr[HUD]'] = tssd.loc[('HUD NSDUH RAND', 'RAND Multiplied SE')]

stderrs = pd.concat(stderrdict, axis=1).T # Concatenate stderr series and transpose
stderrs.columns = stderrs.columns.astype('str')

fits = pd.read_csv(f'{baserunname}_main_fits.tab', sep='\t', index_col=0)
fits = pd.concat([fits, stderrs])

fits.to_csv(f'{baserunname}_main_fits.tab', sep='\t')

# Calculate goodness-of-fit statistics
fits = fits[fits.columns[::4]]
gofs = [[*calc_gof(fits, f'SimVar[{elm}]', f'DataVar[{elm}]')] for elm in fitdict.keys()]

gofdf = pd.DataFrame(gofs, index=fitdict.values(), 
                     columns=['MAEN', 'MAPE', 'R2', 'MSE', 'Um', 'Us', 'Uc'])
gofdf.loc['Average'] = gofdf.iloc[0:-3].mean() # Leave out calculated aggregates from average
gofdf.to_csv(f'{baserunname}_GOF.tab', sep='\t')
display(gofdf)

Unnamed: 0,MAEN,MAPE,R2,MSE,Um,Us,Uc
Rx misuse,0.069915,0.077364,0.896295,469789600000.0,0.00524,0.288046,0.706714
Rx OUD no heroin,0.098521,0.102244,0.774939,48094650000.0,0.03482,0.050344,0.914836
Rx OUD with heroin,0.253866,0.31012,0.717824,2028300000.0,0.001739,0.170136,0.828125
Nondisordered heroin use,0.266286,0.269278,0.20087,41428040000.0,0.004651,0.145329,0.85002
Heroin use disorder,0.116095,0.118851,0.836071,48852550000.0,0.007466,0.032355,0.960179
MOUD Tx (buprenorphine),0.030501,0.11189,0.997619,181098200.0,0.006632,0.071948,0.92142
MOUD Tx (methadone),0.015621,0.024677,0.953486,231711500.0,0.031656,0.089511,0.878834
MOUD Tx (Vivitrol),0.021976,0.039537,0.999564,88206.77,0.205188,0.155055,0.639757
Rx misuse initiation (own Rx),0.09805,0.110611,0.506337,1508846000.0,0.000663,0.000492,0.998844
Rx misuse initiation (diverted),0.047379,0.054549,0.952787,8577663000.0,0.007944,0.012123,0.979933


In [5]:
##### COMPILE AND EXPORT INPUT VALUES AND SELECTED YEAR-BY-YEAR VALUES #####

# Assemble input time series projection values
t = pd.read_csv(f'{baserunname}_final_{basescens[0][:-4]}_vars.tab', sep='\t', index_col=0)
p = pd.read_csv(f'{baserunname}_final_{scenariolist[0][:-4]}_vars.tab', sep='\t', index_col=0)
n = pd.read_csv(f'{baserunname}_final_{scenariolist[1][:-4]}_vars.tab', sep='\t', index_col=0)

inputslist = [f'Input\t{endyear}\t{projyear}\tOptimistic\tPessimistic\n'] # Initialise with column labels
for proj in proj_subs:
    endval = t.loc[f'Projection output data[{proj}]', str(endyear)]
    projval = t.loc[f'Projection output data[{proj}]', str(projyear)]
    posval = p.loc[f'Projection output data[{proj}]', str(projyear)]
    negval = n.loc[f'Projection output data[{proj}]', str(projyear)]
    inputslist.append(f'{proj}\t{endval}\t{projval}\t{posval}\t{negval}\n')
    
with open(f'{baserunname}_inputs.tab', 'w') as f:
    f.writelines(inputslist)
del t

# Compile yearvals output for specified variables and years from sensitivity projections
t = pd.read_csv(f'{baserunname}_final_{basescens[0][:-4]}_vars.tab', sep='\t', index_col=0)
s = pd.read_csv(f'{baserunname}_sens_{basescens[0][:-4]}_clean.tab', sep='\t', index_col=[0,1])

vartext = [f'Year\tVal\t{yv_percs[0]}\t{yv_percs[1]}\n'] # Initialise with column labels
for var in yearvals:
    vartext.extend(get_year_values(t, s, var, years, yv_percs, var))

# Add prior values
for prior in priorlist:
    vartext.extend(get_year_values(t, s, f'SimPrior[{prior[0]}]', [prior[1]], yv_percs, prior[2]))

# Get projection end values for each main scenario
for cin in (basescens[:1] + scenariolist[0:2]):
    t = pd.read_csv(f'{baserunname}_final_{cin[:-4]}_vars.tab', sep='\t', index_col=0)
    s = pd.read_csv(f'{baserunname}_sens_{cin[:-4]}_clean.tab', sep='\t', index_col=[0,1])
    
    for var in projvars:
        vartext.extend(get_year_values(t, s, var, [str(projyear)], yv_percs, cin[:-4] + ' ' + var))

with open(f'{baserunname}_yearvals.tab', 'w') as f:
    f.writelines(vartext)

In [6]:
##### ALTERNATIVE PROJECTION ASSUMPTIONS SENSITIVITY ANALYSIS #####

dflist = [] # Initialise empty container

# Iterate through using each basescen as reference point
for cin in basescens[0:2]:
    first = f'{baserunname}_sens_{cin[:-4]}_clean.tab' # Specify reference scenario

    # Calculate comparison for each projection assumption
    vals_chgs = []
    for proj in proj_subs:
        second = f'{baserunname}_sens_{cin[:-4]}{proj}_clean.tab'
        vals_chgs.append(compare_vals(first, second, projvars, projyear))
    dflist.append(pd.DataFrame(vals_chgs, index=proj_subs, columns=projvars))

avgchgdf = (dflist[0] - dflist[1]) / 2 # NOTE: expressed as delta from basescens[0] to [1]
dflist.append(avgchgdf)

# Assemble and export comparison results
cols = [f'{cin} {var}' for cin in ['Base', 'Cnst', 'Avg'] for var in projvars]
chgsdf = pd.concat(dflist, axis=1)
chgsdf.loc['MAC'] = abs(chgsdf).mean() # Calculate mean absolute change
chgsdf.columns = cols
chgsdf.to_csv(f'{baserunname}_proj_changes.tab', sep='\t')

chgsdf

Unnamed: 0,Base Projected cumulative overdose deaths,Base Projected cumulative UD person years,Cnst Projected cumulative overdose deaths,Cnst Projected cumulative UD person years,Avg Projected cumulative overdose deaths,Avg Projected cumulative UD person years
Fent,-0.197057,0.015097,0.244989,-0.015885,-0.221023,0.015491
NxKD,0.043895,-0.003185,-0.040737,0.002592,0.042316,-0.002889
HPI,0.000369,6e-05,-0.000338,-6.8e-05,0.000354,6.4e-05
BMDCap,0.001204,0.000282,-0.001389,-0.000398,0.001297,0.00034
MMTCap,0.070109,0.013703,-0.062156,-0.013356,0.066132,0.013529
VivCap,0.003535,0.00202,-0.003451,-0.00189,0.003493,0.001955
PtRx,0.021198,0.030998,-0.026068,-0.035875,0.023633,0.033436
RxPP,0.002668,0.00386,-0.004451,-0.006372,0.00356,0.005116
RxDur,-0.004363,-0.006037,0.00833,0.011625,-0.006346,-0.008831
MME,0.009216,0.021112,-0.013833,-0.027296,0.011525,0.024204


In [7]:
##### PRODUCE SUMMARY TABLES FROM POLICY ANALYSIS #####

# Process annual and cumulative main results for each scenario and baseline case
for cin in basescens:
    # Read in baseline results
    b = pd.read_csv(f'{baserunname}_final_{cin[:-4]}_vars.tab', sep='\t', index_col=0)
    resdict = {'Baseline': b.loc[annvars]}
    curdict = {'Baseline': b.loc[projvars]}
    cumdf = pd.DataFrame(columns=projvars) # Initialise container dataframe
    cumdf.loc['Baseline'] = [b.loc[var, str(projyear)] - b.loc[var, str(polstart)] 
                             for var in projvars] # Re-zero to polstart year value
    del b # Clear results to free up memory

    for pol in policylist:
        # Read in results for each scenario
        scen = cin[:-4] + pol[:-4]
        t = pd.read_csv(f'{baserunname}_final_{scen}_vars.tab', sep='\t', index_col=0)
        resdict[pol[:-4]] = t.loc[annvars]
        curdict[pol[:-4]] = t.loc[projvars]
        cumdf.loc[pol[:-4]] = [t.loc[var, str(projyear)] - t.loc[var, str(polstart)] 
                               for var in projvars] # Re-zero to polstart year value
        
    # Compile cumulative and annual results dataframes
    resdf = pd.concat(resdict, names=['Scenario', 'Var'])
    curdf = pd.concat(curdict, names=['Scenario', 'Var'])
    curdf = curdf.subtract(curdf[str(polstart)], axis=0)
    resdf = pd.concat([resdf, curdf])
    resdf = resdf.reorder_levels(['Var', 'Scenario']).sort_index()
    resdf = resdf.loc[:, str(polstart):]
    
    # Calculate % changes
    chgdict = {}
    for var in annvars: # Calculate and append for annual results
        chgvar = f'% change in {var}'
        chgdict[chgvar] = ((resdf.loc[var] - resdf.loc[(var, 'Baseline')]) 
                           / resdf.loc[(var, 'Baseline')]) # Calculate % change from baseline
    chgdf = pd.concat(chgdict)
    resdf = resdf.append(chgdf)
    
    for var in projvars: # Calculate and append for cumulative results
        chgvar = f'% change in {var}'
        cumdf[chgvar] = (cumdf[var] - cumdf.loc['Baseline', var])/ cumdf.loc['Baseline', var]

    # Rename scenarios with specified labels
    resdf.rename(polnames, inplace=True)
    cumdf.rename(polnames, inplace=True)

    resdf.to_csv(f'{baserunname}_{cin[:-4]}_PolRes.tab', sep='\t')
    cumdf.to_csv(f'{baserunname}_{cin[:-4]}_PolTot.tab', sep='\t')

    
# Process annual results with CrI quantiles from full sensitivity sample
pollist = [f'{basescens[0][:-4]}{cin[:-4]}' for cin in policylist] # Compile list of scenarios

poldict = {} # Initialise container for relevant results
# Add main and sens results for each scenario to container
for scen in pollist:
    t = pd.read_csv(f'{baserunname}_final_{scen}_vars.tab', sep='\t', index_col=0)
    s = pd.read_csv(f'{baserunname}_sens_{scen}_clean.tab', sep='\t', index_col=[0,1])
    t.columns = t.columns.astype(float)
    s.columns = s.columns.astype(float)
    
    scendict = {}
    for var in polvars: # Add expected values from baserun to sensitivity dataframe
        scendict[var] = s.loc[var].sort_values([polstart, styear, endyear])
        scendict[var].loc['EV'] = t.loc[var]
    poldict[scen] = pd.concat(scendict, keys=polvars, names=['Var', 'Run'])
    del s, t # Clear results to free up memory

# Compile new dataframe of scenario results with full sample
projtable = pd.concat(poldict, names=['Scen', 'Var', 'Run'])
projtable.to_csv(f'{baserunname}_polprojraw.tab', sep='\t')

p = projtable.loc[:, polstart:] # Subset results to relevant years
p.index = p.index.droplevel('Run')

# Calculate annual value quantiles at each time step based on full sample
polprojdict = {}
for scen in pollist:
    scenpercdict = {}
    for var in polvars:
        scenpercdict[var] = p.loc[(scen, var)].iloc[:-1].quantile(polquants)
        scenpercdict[var].loc['EV'] = p.loc[(scen, var)].iloc[-1]
    
    polprojdict[scen] = pd.concat(scenpercdict, keys=polvars, names=['Var', 'Perc'])

# Calculate % change quantiles at each time step based on full sample
polpercdict = {}
for scen in pollist[1:]:
    # First calculate % change across the entire sample
    percs = (p.loc[scen] - p.loc[pollist[0]])/p.loc[pollist[0]]
    
    # Then take quantiles for the % change value at each time step
    scenpercdict = {}
    for var in polvars:
        scenpercdict[var] = percs.loc[var].iloc[:-1].quantile(polquants)
        scenpercdict[var].loc['EV'] = percs.loc[var].iloc[-1]
    
    polpercdict[scen] = pd.concat(scenpercdict, keys=polvars, names=['Var', 'Perc'])

# Rename scenarios with specified labels
polrenames = dict([[scen, polnames[cin[:-4]]] for scen, cin in zip(pollist, policylist)])

# Save compiled tables of annual value and % change quantiles
polprojtable = pd.concat(polprojdict, names=['Scen', 'Var', 'Perc'])
polprojtable.rename(index=polrenames, level=0, inplace=True)
polprojtable.to_csv(f'{baserunname}_polprojann.tab', sep='\t')

polperctable = pd.concat(polpercdict, names=['Scen', 'Var', 'Perc'])
polperctable.rename(index=polrenames, level=0, inplace=True)
polperctable.to_csv(f'{baserunname}_polprojperc.tab', sep='\t')

In [8]:
##### LOOP KNOCKOUT ANALYSIS PANEL #####

# Set up labels for loop knockout keys
lknamedict = {'av': 'Availability', 'pr': 'Perceived risk', 'si': 'Social influence'}

lkdfdict = {} # Initiatlise container for results

# Iterate through loop knockout keys and compile deactivated and re-estimated results from each
for key in lkdict.keys():
    lkdfdict['Deactivated ' + lknamedict[key]] = compile_sens_panel(
        baserunname, 'lk_run', key, basescens[0][:-4], outvars, projvars, 
        str(endyear), str(projyear), dropvars=lkdict[key], params=False)
    lkdfdict['Recalibrated w/o ' + lknamedict[key]] = compile_sens_panel(
        baserunname, 'lk', key, basescens[0][:-4], outvars, projvars, 
        str(endyear), str(projyear), dropvars=lkdict[key])

# Compile and export results
lkdf = pd.concat(lkdfdict, axis=1).T
lkdf.to_csv(f'{baserunname}_lk_sens.tab', sep='\t')
lkdf

Unnamed: 0,Cumulative overdose deaths,Cumulative UD person years,Projected cumulative overdose deaths,Projected cumulative UD person years,Med elasticity,Max elasticity,Avg MAEN,Max MAEN
Deactivated Availability,-0.250672,-0.170013,-0.400782,-0.155946,0.0,0.0,0.227831,0.534612
Recalibrated w/o Availability,-0.001151,-0.002813,0.06165,0.142808,0.045423,2.64415,0.139565,0.329296
Deactivated Perceived risk,1.121378,0.268582,3.081437,0.918938,0.0,0.0,1.972365,12.376209
Recalibrated w/o Perceived risk,-0.005124,0.004091,0.126429,0.00707,0.059089,18.6013,0.155176,0.300739
Deactivated Social influence,-0.138857,-0.045306,-0.320325,-0.091862,0.0,0.0,0.236695,0.50538
Recalibrated w/o Social influence,-0.004294,-0.001294,0.115741,-0.033767,0.071636,2.469114,0.135563,0.293534


In [9]:
##### PARAMETRIC ASSUMPTIONS SENSITIVITY ANALYSIS PANEL #####

# Compile runnames from variable names in sensvars
sensdict = dict([[''.join([w[0] for w in regex.findall(r"[\w']+", var)]), var] 
                 for var in sensvars])

assmdfdict = {} # Initiatlise container for results

# Iterate through sensvars names and compile results from each
for key in sensdict.keys():
    # Compile high and low scenario results panels
    high = compile_sens_panel(baserunname, 'assm', f'{key}_L', basescens[0][:-4], outvars, 
                              projvars, str(endyear), str(projyear))
    low = compile_sens_panel(baserunname, 'assm', f'{key}_H', basescens[0][:-4], outvars, 
                             projvars, str(endyear), str(projyear))
    
    # Concatenate and take average
    var = pd.concat({'H': high, 'L': low}, axis=1)
    var['avg'] = (abs(var['H']) + abs(var['L'])) / 2 * np.sign(var['H']) # Take sign from H change
    var['avg'].iloc[0:6] = var['avg'].iloc[0:6] / sensrange # Convert to elasticity
    assmdfdict[sensdict[key]] = var['avg']

# Compile and export results
assmdf = pd.concat(assmdfdict, axis=1).T
assmdf.to_csv(f'{baserunname}_assm_sens.tab', sep='\t')
assmdf

Unnamed: 0,Cumulative overdose deaths,Cumulative UD person years,Projected cumulative overdose deaths,Projected cumulative UD person years,Med elasticity,Max elasticity,Avg MAEN,Max MAEN
ADF substitutability factor,-0.008695,-0.005959,-0.003984,-0.005064,0.048377,-2.568796,0.126883,0.26612
Effect of MOUD Tx on OD death rate[Bup],0.000925,-0.003711,-0.033869,0.008052,0.031899,-1.53401,0.126881,0.266322
Effect of MOUD Tx on OD death rate[MMT],-0.000682,-0.000727,-0.019236,0.008357,0.024752,-1.987798,0.126915,0.266277
Effect of MOUD Tx on OD death rate[Viv],0.001289,-0.002716,-0.01402,0.016326,0.067332,-2.628936,0.126943,0.266231
OxyContin withdrawal magnitude,-0.002139,0.008638,-0.015501,-0.010958,0.036614,2.984977,0.126833,0.266107
Perceived risk decrease time,-0.002428,-0.00616,-0.006774,-0.00642,0.040778,-2.040129,0.126871,0.266137
Perceived risk increase time,-0.002522,0.00918,0.014261,0.00451,0.031492,-0.800005,0.126917,0.266071
Perceived risk weight NFOD,-0.002036,-0.00449,-0.008184,-0.014776,0.045095,-2.226558,0.126848,0.266227
Probability OD witnessed,0.002475,-0.011777,0.072349,-0.026882,0.040181,-1.604609,0.126833,0.266626
Probability of calling emergency services,-0.000579,-0.002404,-0.011966,-0.008052,0.035107,2.159412,0.12687,0.266203


In [10]:
##### ALTERNATIVE DATA CONDITIONS SENSITIVITY ANALYSIS PANEL #####

# Set up labels for alternative data conditions
aldtnamedict = {'Data2019': 'Excluding 2020 data'}

aldtdfdict = {} # Initiatlise container for results

# Iterate through sensvars names and compile results from each
for key, val in aldtnamedict.items():
    # Compile high and low scenario results panels
    aldtdfdict[val] = compile_sens_panel(baserunname, 'test', key, basescens[0][:-4], 
                                         outvars, projvars, str(endyear), str(projyear))

# Compile and export results
aldtdf = pd.concat(aldtdfdict, axis=1).T
aldtdf.to_csv(f'{baserunname}_aldt_sens.tab', sep='\t')
aldtdf


Unnamed: 0,Cumulative overdose deaths,Cumulative UD person years,Projected cumulative overdose deaths,Projected cumulative UD person years,Med elasticity,Max elasticity,Avg MAEN,Max MAEN
Excluding 2020 data,-0.007885,0.006924,0.019055,0.102588,0.009327,0.500095,0.123994,0.251937


In [11]:
##### HOLDOUT DATA ANALYSIS PROCESSING #####

# Clean and process results files
clean_final(baserunname, basescens[0][:-4], name='hold_final')
clean_sens(baserunname, basescens[0][:-4], fitlist, name='hold_sens', repvars=['RepVar'])

# Read in predicted CrI values and actual data values
holdfits = pd.read_csv(f'{baserunname}_hold_sens_Base_fits.tab', sep='\t', index_col=[0,1])
holdfits = holdfits.loc[:, :str(float(endyear))]
holdfits.drop([f'RepVar[{var}]' for var in hold_excl], inplace=True, level=0)
holdfits = holdfits.reorder_levels(['Perc', 'Var']).sort_index()

mainfits = pd.read_csv(f'{baserunname}_main_fits.tab', sep='\t', index_col=[0])
datavars = [f'DataVar[{var[0]}]' for var in fitlist]
datafits = mainfits.loc[datavars].sort_index()
datafits.drop([f'DataVar[{var}]' for var in hold_excl], inplace=True)

# Create Boolean df tracking which values are within which percent CrIs
for bds in hold_percs:
    
    # Reindex data values dataframe to use same keys
    datafits.columns = holdfits.loc[bds[0]].columns
    datafits.index = holdfits.loc[bds[0]].index
    
    # Compare data values to credible interval limits for entire dataframe
    booldf = ((datafits > holdfits.loc[bds[0]]) & (datafits < holdfits.loc[bds[1]]))
    booldf.where(datafits.notna(), pd.NA, inplace=True) # Ensure NAs are coded as NA, not False
    booldf = booldf[booldf.columns[::4]] # Reduce down to annual resolution
    booldf = booldf.loc[:, str(float(holdoutyear+1)):] # Limit to predicted years

    # Create 'asterisk' version of Boolean output (for use in tables)
    stardf = booldf.copy()
    stardf.where(booldf==True, '*', inplace=True)
    stardf.mask(booldf==True, '', inplace=True)
    
    # Create results table of data values and predicted ranges for all vars and prediction years
    holddfdict = {}
    for yr in booldf.columns:
        data, lowers, uppers = (col.astype('int64').apply('{:.2e}'.format) for col in 
                                [datafits[yr], holdfits.loc[bds[0]][yr], holdfits.loc[bds[1]][yr]])
        yrvals = [f'{val}{star} ({lower}-{upper})' for val, star, lower, upper 
                  in zip(data, stardf[yr], lowers, uppers)]
        holddfdict[yr] = pd.Series(yrvals)
        
    holddf = pd.concat(holddfdict, axis=1)
    holddf.index = booldf.index # To assign RepVar names to output
    holddf.columns = pd.to_numeric(holddf.columns).astype('int')
    holddf.to_csv(f'{baserunname}_holdout.tab', sep='\t')
    booldf.to_csv(f'{baserunname}_holdout_bool.tab', sep='\t')
    
    # 
    predtotal = booldf.size
    predright = booldf.sum().sum()
    predshort = booldf.drop([f'RepVar[{var}]' for var in hold_drop]).size
    predshrgt = booldf.drop([f'RepVar[{var}]' for var in hold_drop]).sum().sum()
    
    display(booldf)
    display(predtotal, predright, predshort, predshrgt)

'Processing X8_hold_final_Base.tab...'

Unnamed: 0_level_0,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,2019.0,2020.0
Var,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
RepVar[HUD],True,True,True,True,True,True,False,True
RepVar[InHD],True,False,True,False,True,False,False,False
RepVar[InHO],True,True,True,True,True,True,True,True
RepVar[InHR],True,False,True,False,True,False,True,True
RepVar[InHT],True,False,True,False,True,False,True,False
RepVar[InRD],True,True,True,True,True,True,True,True
RepVar[InRM],True,True,True,True,False,False,True,True
RepVar[NDHU],True,False,True,False,False,False,False,False
RepVar[ODHC],False,False,False,False,True,True,True,True
RepVar[ODRB],True,True,True,True,True,True,False,True


120

86

104

78

In [12]:
##### SYNDATA CrI PROCESSING #####

# Read in data
syndf = pd.read_csv(f'{baserunname}_syndata_results.tab', sep='\t', index_col=[0, 1])
syndf = syndf.reorder_levels(['Perc', 'Run']).sort_index()

spdfdict = {} # Initialise container for results

# Create Boolean df tracking which values are within which percent CrIs
for perc in syn_reppercs:
    bds = strbds_from_perc(perc) # Calculate CrI bounds for each percent CrI
    spdfdict[perc] = ((syndf.loc['True'] > syndf.loc[bds[0]]) 
                      & (syndf.loc['True'] < syndf.loc[bds[1]]))

# Calculate distance of estimate from median relative to main CrI
bdsmain = strbds_from_perc(syn_mainperc)
spdfdict[f'dist{syn_mainperc}'] = abs((syndf.loc['Value'] - syndf.loc['True']) / 
                                      (syndf.loc[bdsmain[1]] - syndf.loc[bdsmain[0]]))

# Compile and export percent CrI calculations
synpercdf = pd.concat(spdfdict, names=['Perc', 'Run'])
synpercdf.to_csv(f'{baserunname}_syndata_intervals.tab', sep='\t')

# Collapse Boolean df to get mean percentages within each CrI
means = synpercdf.mean(axis=1).groupby('Perc').mean()
means[f'Dist{syn_mainperc}Med'] = np.nanmedian(spdfdict[f'dist{syn_mainperc}'])
means.to_csv(f'{baserunname}_syndata_means.tab', sep='\t')
means

Perc
50           0.348630
80           0.600000
90           0.715068
95           0.776712
98           0.822603
dist95       0.431584
Dist95Med    0.284392
dtype: float64

In [36]:
##### CALCULATE VALUES FOR SUMMARYTEXT #####

# Pull values for fentanyl counterfactual ODs
t = pd.read_csv(f'{baserunname}_final_{basescens[0][:-4]}_vars.tab', sep='\t', index_col=0)
nft = pd.read_csv(f'{baserunname}_final_{scenariolist[0][:-4]}_vars.tab', sep='\t', index_col=0)
nofentods = nft.loc['Cumulative overdose deaths', '2019']
nofentodsdata = np.sum((t.loc['Total overdose deaths NVSS'] 
                        - t.loc['Total overdose deaths base Rx NVSS'] 
                        - t.loc['Total overdose deaths base heroin NVSS'])[::4])
del t, nft

# Calculate MCMC sample size
mcsample = mcsettings['MCLIMIT'] - mcsettings['MCBURNIN']

# Calculate PSRF percentages below 1.1 and 1.2 key thresholds
mcout = pd.read_csv(f'{baserunname}_main_MC_MCMC_stats.tab', sep='\t', index_col=0)
psrfs = [i for i in mcout.index if 'PSRF' in i]
psrfs.remove('PSRF Payoff')
mcout = mcout.loc[psrfs]
mcout.columns = mcout.columns.astype('float').astype('int')
mcout = mcout[mcout.columns[mcout.columns > mcsettings['MCBURNIN']]].dropna(axis=1)
psrf12 = np.nanmean(mcout < 1.2)
psrf11 = np.nanmean(mcout < 1.1)
del mcout

# Get parameter numbers
t = pd.read_csv(f'{baserunname}_params.tab', sep='\t', index_col=0)
iscs = len([idx for idx in t.index if 'Initial stock correction' in idx])
estpars = len(t.index) - iscs
del t

# Compile summarytext
summarytext = [
    f"Exogenous inputs\t{len(proj_subs)}\n", 
    f"Calibration time series\t{len(fitlist) - 3}\n", 
    f"MCMC total\t{mcsettings['MCLIMIT']}\n", 
    f"MCMC burnin\t{mcsettings['MCBURNIN']}\n", 
    f"MCMC sample\t{mcsample}\n", 
    f"MCMC PSRF < 1.2\t{psrf12}\t< 1.1\t{psrf11}\n", 
    f"Sensitivity sample\t{int(mcsample * samplefrac)}\n", 
    f"Sensitivity analysis range\t{sensrange}\n", 
    f"Syndata sets\t{synsample}\n", 
    f"Estimated parameters (no ISCs)\t{estpars}\n", 
    f"Initial stock corrections\t{iscs}\n", 
    f"Cumulative OD deaths without fentanyl\t{int(nofentods)}\n", 
    f"Cumulative synth-involved OD deaths DATA\t{int(nofentodsdata)}\n", 
    f"Confidence interval estimated params\t{round(param_percs[-1] - param_percs[0], 3)}\n", 
    f"Confidence interval estimated params\t{syn_mainperc/100}\n", 
    f"Holdout cutoff year\t{holdoutyear}\n", 
    f"Total holdout years\t{endyear - holdoutyear}\n", 
    f"Total holdout datapoints\t{predtotal}\n", 
    f"Holdout datapoints in pred. interval\t{predright}\n"
]

# Calculate projection differences for key outcomes expressed as delta from 'base' to 'cnst'
first = f'{baserunname}_sens_{basescens[0][:-4]}_clean.tab'
second = f'{baserunname}_sens_{basescens[1][:-4]}_clean.tab'
basecomps = compare_vals(first, second, projvars, projyear)

summarytext.extend([f"Base-Cnst delta for {var}\t{val}\n" for var, val in zip(projvars, basecomps)])

# Read fixed parameter values from .mdl file
mdl = f"../{simsettings['model']}"
summarytext.append("\n\nFixed parameter values\n")
summarytext.extend([f'{var}\t{get_value(mdl, var)}\n' for var in paramvals])

# Create relative Tx-seeking rate table
ot = 1
ob = get_value(mdl, "Tx seeking fraction Bup Rx OUD")
om = round((ot - ob) * get_value(mdl, "Tx seeking fraction MMT Rx OUD relative"), 5)
ov = round(ot - ob - om, 5)
ht = get_value(mdl, "Tx seeking rate HUD relative to Rx OUD no H")
hb = round(ht * get_value(mdl, "Tx seeking fraction Bup HUD"), 5)
hm = round((ht - hb) * get_value(mdl, "Tx seeking fraction MMT HUD relative"), 5)
hv = round(ht - hb - hm, 5)

summarytext.extend(["\n\n", "Relative Tx seeking rates\n", 
                    f"OUD\t{ot}\t{ob}\t{om}\t{ov}\n", f"HUD\t{ht}\t{hb}\t{hm}\t{hv}\n"])

# Export summary text file
with open(f"{baserunname}_summary.txt", 'w') as summaryfile:
    summaryfile.writelines(summarytext)

display(summarytext)


['Exogenous inputs\t11\n',
 'Calibration time series\t15\n',
 'MCMC total\t2500000\n',
 'MCMC burnin\t1500000\n',
 'MCMC sample\t1000000\n',
 'MCMC PSRF < 1.2\t0.987546699875467\t< 1.1\t0.950186799501868\n',
 'Sensitivity sample\t5000\n',
 'Sensitivity analysis range\t0.1\n',
 'Syndata sets\t20\n',
 'Estimated parameters (no ISCs)\t53\n',
 'Initial stock corrections\t20\n',
 'Cumulative OD deaths without fentanyl\t476507\n',
 'Cumulative synth-involved OD deaths DATA\t228109\n',
 'Confidence interval estimated params\t0.9\n',
 'Confidence interval estimated params\t0.95\n',
 'Holdout cutoff year\t2012\n',
 'Total holdout years\t8\n',
 'Total holdout datapoints\t120\n',
 'Holdout datapoints in pred. interval\t86\n',
 'Base-Cnst delta for Projected cumulative overdose deaths\t-0.0705067212778524\n',
 'Base-Cnst delta for Projected cumulative UD person years\t0.08633543938693883\n',
 '\n\nFixed parameter values\n',
 'Perceived risk weight NFOD\t0.1\n',
 'OxyContin withdrawal magnitude\t0.

In [37]:
##### SEND MAIN OUTPUTS TO SUBFOLDER FOR EASY ACCESS #####

os.chdir(basedir)
os.chdir('./Results')
os.makedirs('./ResMain', exist_ok=True)

resmain = ['assm_sens.tab', 'aldt_sens.tab', 'GOF.tab', 'inputs.tab', 'lk_sens.tab', 
           'params.tab', 'polprojann.tab', 'polprojperc.tab', 'proj_changes.tab', 
           'holdout.tab', 'syndata_means.tab', 'yearvals.tab', 'summary.txt']

for res in resmain:
    copy(f'./{baserunname}_{res}', './ResMain')

In [None]:
000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000