In [1]:
import pandas as pd
import os

In [2]:
#random notes and settings
#set exporter.fhir.export = false
#set exporter.csv.export = true

#encounters want reason code == 55680006 (drug overdose)
#encounters want description == 'Death Certification'

#run_synthea -p 10000 -s 10000 -cs 12345 -m opioid_addiction Maine Bangor

#^^this command generates 10000 people (-p) with the seed 10000 (-s) and the provider seed of 12345 (-cs) using the opiod_addition module (-m) in Bangor, ME

In [3]:
def makeEncountersDF(path, seeds):
    '''
    Go to the path and grab all encounters.csv and put them in one file with an extra column for seed
    '''
    df = pd.DataFrame()
    for seed in seeds:
        try:
            encounters = pd.read_csv(os.path.join(path, 'bangor_s' + str(seed), 'encounters.csv'), dtype=str)
            encounters['seed'] = seed
            df = df.append(encounters)
        except:
            print('File for seed', str(seed), 'does not exist...skipping')
    return df

In [4]:
def getODEncounters(df):
    '''
    Return all drug overdose encounters (reason code 5568006) from a given encounters dataframe
    This will include overdose deaths as well -- description -- 'Death Certification'
    '''
    data = df[df['REASONCODE'] == '55680006']
    return data

In [5]:
def getODDeaths(df):
    '''
    Return all drug overdose deaths (reason code 5568006) from a given encounters dataframe
    DESCRIPTION == 'Death Certification' and REASONCODE == '5568006'
    '''
    data = df[(df['DESCRIPTION'] == 'Death Certification') & (df['REASONCODE'] == '55680006')]
    return data

In [6]:
def getODstats(df):
    '''
    get patient level sample statistics on probability of death per drug overdose ED visit
    '''
    #getting all overdose encouunters
    od_enc = getODEncounters(df)
    od_enc = od_enc.groupby(['PATIENT','seed'], as_index=False)['REASONCODE'].count().rename(columns={'REASONCODE':'OD_count'})
    #getting all overdose deaths
    od_death = getODDeaths(df)
    od_death = od_death.groupby(['PATIENT','seed'], as_index=False)['REASONCODE'].count().rename(columns={'REASONCODE':'OD_death'})
    #joining the above two dataframes
    od = pd.merge(od_enc, od_death, how='left', on=['PATIENT', 'seed']).fillna(0)
    #calculating patient level probability of death from overdose encounter
    od['prDeath'] = od['OD_death']/od['OD_count']
    #making column for weight of patient to calculate weighted average probability
    od['weight'] = od['OD_count']/sum(od['OD_count'])
    #weighted pr(death) -- can sum this column to get weighted sample pr(death)
    od['weightedPrDeath'] = od['weight']*od['prDeath']
    
    return od

In [7]:
path = r'C:\repos\Synthea\output'
seeds = [10000, 13370, 23123, 33555, 39093, 45000, 51327, 65888, 74982, 82388]

#pull in data
df = makeEncountersDF(path, seeds)

In [12]:
#calculate overdose stats
od_df = getODstats(df)

print(od_df['prDeath'].mean())

print(od_df['weightedPrDeath'].sum())

0.02204020446763761
0.022825977174022828


In [14]:
od_df

Unnamed: 0,PATIENT,seed,OD_count,OD_death,prDeath,weight,weightedPrDeath
0,0010705a-860d-9f3e-b048-805d265066c9,45000,4,0.0,0.0,0.000202,0.00000
1,001510a4-99b1-89f2-2a8e-4784b653ec84,45000,2,0.0,0.0,0.000101,0.00000
2,00178119-0081-9c85-875e-cadf915d257e,10000,1,0.0,0.0,0.000050,0.00000
3,001c57fb-5353-fca7-a5bd-09338a7940a0,13370,3,0.0,0.0,0.000151,0.00000
4,0030aba4-03d4-dbbe-efa4-674f74e42a22,74982,2,1.0,0.5,0.000101,0.00005
...,...,...,...,...,...,...,...
5266,ff887290-dd71-7bf8-1ec3-163ace581c61,23123,5,0.0,0.0,0.000252,0.00000
5267,ffd1383b-049d-8d60-77a7-eb9e6cedd796,74982,3,0.0,0.0,0.000151,0.00000
5268,ffd95e98-e213-1250-cb5b-50fd7b682891,74982,2,0.0,0.0,0.000101,0.00000
5269,ffe7475c-3345-6b58-7e73-a4283460329d,13370,6,0.0,0.0,0.000303,0.00000
