In [1]:
import pandas as pd
import os

In [2]:
#random notes and settings
#set exporter.fhir.export = false
#set exporter.csv.export = true

#encounters want reason code == 55680006 (drug overdose)
#encounters want description == 'Death Certification'

#run_synthea -p 10000 -s 10000 -cs 12345 -m opioid_addiction Maine Bangor

#^^this command generates 10000 people (-p) with the seed 10000 (-s) and the provider seed of 12345 (-cs) using the opiod_addition module (-m) in Bangor, ME

In [3]:
def makeEncountersDF(path, seeds):
    '''
    Go to the path and grab all encounters.csv and put them in one file with an extra column for seed
    '''
    df = pd.DataFrame()
    for seed in seeds:
        try:
            encounters = pd.read_csv(os.path.join(path, 'bangor_s' + str(seed), 'encounters.csv'), dtype=str)
            encounters['seed'] = seed
            df = df.append(encounters)
        except:
            print('File for seed', str(seed), 'does not exist...skipping')
    return df

In [4]:
def getODEncounters(df):
    '''
    Return all drug overdose encounters (reason code 5568006) from a given encounters dataframe
    This will include overdose deaths as well -- description -- 'Death Certification'
    '''
    data = df[df['REASONCODE'] == '55680006']
    return data

In [5]:
def getODDeaths(df):
    '''
    Return all drug overdose deaths (reason code 5568006) from a given encounters dataframe
    DESCRIPTION == 'Death Certification' and REASONCODE == '5568006'
    '''
    data = df[(df['DESCRIPTION'] == 'Death Certification') & (df['REASONCODE'] == '55680006')]
    return data

In [6]:
def getODstats(df):
    '''
    get patient level sample statistics on probability of death per drug overdose ED visit
    '''
    #getting all overdose encouunters
    od_enc = getODEncounters(df)
    od_enc = od_enc.groupby(['PATIENT','seed'], as_index=False)['REASONCODE'].count().rename(columns={'REASONCODE':'OD_count'})
    #getting all overdose deaths
    od_death = getODDeaths(df)
    od_death = od_death.groupby(['PATIENT','seed'], as_index=False)['REASONCODE'].count().rename(columns={'REASONCODE':'OD_death'})
    #joining the above two dataframes
    od = pd.merge(od_enc, od_death, how='left', on=['PATIENT', 'seed']).fillna(0)
    #calculating patient level probability of death from overdose encounter
    od['prDeath'] = od['OD_death']/od['OD_count']
    #making column for weight of patient to calculate weighted average probability
    od['weight'] = od['OD_count']/sum(od['OD_count'])
    #weighted pr(death) -- can sum this column to get weighted sample pr(death)
    od['weightedPrDeath'] = od['weight']*od['prDeath']
    
    return od

In [7]:
path = r'C:\repos\Synthea\output'
#seeds = [10000, 13370, 23123, 33555, 39093, 45000, 51327, 65888, 74982, 82388]
#seeds = [12345]
seeds = [22222]

#pull in data
df = makeEncountersDF(path, seeds)

In [8]:
#calculate overdose stats
od_df = getODstats(df)

print(od_df['prDeath'].mean())

print(od_df['weightedPrDeath'].sum())

0.020320082728193224
0.020590814469521542


In [13]:
len(df['PATIENT'].unique())

4560698

In [9]:
od_df

Unnamed: 0,PATIENT,seed,OD_count,OD_death,prDeath,weight,weightedPrDeath
0,00000c94-212f-7940-b8c1-692423cfad98,22222,4,1.0,0.250,0.000005,0.000001
1,00006d99-ad13-aae4-c40a-002954805bdf,22222,2,1.0,0.500,0.000002,0.000001
2,00007137-723e-ff4b-3ab4-01f180988500,22222,4,0.0,0.000,0.000005,0.000000
3,00009bfd-2bca-3101-730c-a218315c191e,22222,5,0.0,0.000,0.000006,0.000000
4,0000a8f2-d6a4-ab21-294f-c13ac2f45034,22222,8,1.0,0.125,0.000009,0.000001
...,...,...,...,...,...,...,...
229212,fffef610-5bd7-c85a-f0e7-051b8a7a522e,22222,5,0.0,0.000,0.000006,0.000000
229213,ffff0ae8-7753-176d-bf7a-f5ee0a0cda7c,22222,4,0.0,0.000,0.000005,0.000000
229214,ffff1883-c1c0-4a88-2ed9-d71aaf5449c0,22222,3,0.0,0.000,0.000003,0.000000
229215,ffff5e59-6e15-a258-444a-ea4a47d92a64,22222,4,0.0,0.000,0.000005,0.000000


In [4]:
def makeMedicationsDF(path, seeds):
    '''
    Go to the path and grab all encounters.csv and put them in one file with an extra column for seed
    '''
    df = pd.DataFrame()
    for seed in seeds:
        try:
            medications = pd.read_csv(os.path.join(path, 'bangor_s' + str(seed), 'medications.csv'), dtype=str)
            medications['seed'] = seed
            df = df.append(medications)
        except:
            print('File for seed', str(seed), 'does not exist...skipping')
    return df

In [5]:
path = r'\\lmi.org\Data\Ser_Del\HlthMgmt\Civ\RstricOpen\SyntheaChallenge\data'
seeds = [10000, 13370, 23123, 33555, 39093, 45000, 51327, 65888, 74982, 82388]

#pull in data
df = makeMedicationsDF(path, seeds)



In [6]:
df

Unnamed: 0,START,STOP,PATIENT,PAYER,ENCOUNTER,CODE,DESCRIPTION,BASE_COST,PAYER_COVERAGE,DISPENSES,TOTALCOST,REASONCODE,REASONDESCRIPTION,seed
0,2013-01-31T17:08:25Z,2013-01-31T17:23:25Z,cc98085d-fb73-1c5a-b82a-6fd874d16956,d47b3510-2895-3b70-9897-342d681c769d,70731a88-ec28-e3ff-4a3c-51ceef1ea6e6,1660014,1 ML Epinephrine 1 MG/ML Injection,2.53,0.00,1,2.53,,,10000
1,2013-01-31T17:08:25Z,2013-01-31T17:23:25Z,cc98085d-fb73-1c5a-b82a-6fd874d16956,d47b3510-2895-3b70-9897-342d681c769d,70731a88-ec28-e3ff-4a3c-51ceef1ea6e6,834357,3 ML Amiodarone hydrocholoride 50 MG/ML Prefil...,27.01,0.00,1,27.01,,,10000
2,2013-01-31T17:08:25Z,2013-01-31T17:23:25Z,cc98085d-fb73-1c5a-b82a-6fd874d16956,d47b3510-2895-3b70-9897-342d681c769d,70731a88-ec28-e3ff-4a3c-51ceef1ea6e6,1190795,Atropine Sulfate 1 MG/ML Injectable Solution,11.17,0.00,1,11.17,,,10000
3,2016-07-25T12:47:23Z,,5e090cdb-677c-1b38-bb24-2a360f6a489a,42c4fca7-f8a9-3cd1-982a-dd9751bf3e2a,d895e693-8d64-659a-f1c1-673c559e76a0,309362,Clopidogrel 75 MG Oral Tablet,9.00,0.00,58,522.00,,,10000
4,2016-07-25T12:47:23Z,,5e090cdb-677c-1b38-bb24-2a360f6a489a,42c4fca7-f8a9-3cd1-982a-dd9751bf3e2a,d895e693-8d64-659a-f1c1-673c559e76a0,312961,Simvastatin 20 MG Oral Tablet,8.70,0.00,58,504.60,,,10000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6741,1999-08-03T22:48:51Z,,2de3335a-1c17-56fc-ea59-655de45a2508,7caa7254-5050-3b5e-9eae-bd5ea30e809c,13825764-e4b8-2c78-63c6-520e09cfe9b1,897718,Verapamil Hydrochloride 40 MG,41.29,0.00,264,10900.56,,,82388
6742,1999-08-03T22:48:51Z,,2de3335a-1c17-56fc-ea59-655de45a2508,7caa7254-5050-3b5e-9eae-bd5ea30e809c,13825764-e4b8-2c78-63c6-520e09cfe9b1,197604,Digoxin 0.125 MG Oral Tablet,38.50,0.00,264,10164.00,,,82388
6743,2004-03-24T18:13:30Z,,15bde7a3-17ea-a127-1c7c-00fc87c27d0b,b1c428d6-4f07-31e0-90f0-68ffa6ff8c76,8ebc9b95-cb1d-b408-882e-a5d28adb43f7,855332,Warfarin Sodium 5 MG Oral Tablet,41.12,0.00,208,8552.96,,,82388
6744,2004-03-24T18:13:30Z,,15bde7a3-17ea-a127-1c7c-00fc87c27d0b,b1c428d6-4f07-31e0-90f0-68ffa6ff8c76,8ebc9b95-cb1d-b408-882e-a5d28adb43f7,897718,Verapamil Hydrochloride 40 MG,52.71,0.00,208,10963.68,,,82388


In [7]:
df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 67789 entries, 0 to 6745
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   START              67789 non-null  object
 1   STOP               43827 non-null  object
 2   PATIENT            67789 non-null  object
 3   PAYER              67789 non-null  object
 4   ENCOUNTER          67789 non-null  object
 5   CODE               67789 non-null  object
 6   DESCRIPTION        67789 non-null  object
 7   BASE_COST          67789 non-null  object
 8   PAYER_COVERAGE     67789 non-null  object
 9   DISPENSES          67789 non-null  object
 10  TOTALCOST          67789 non-null  object
 11  REASONCODE         0 non-null      object
 12  REASONDESCRIPTION  0 non-null      object
 13  seed               67789 non-null  int64 
dtypes: int64(1), object(13)
memory usage: 7.8+ MB


In [10]:
df['YEAR'] = df['START'].str.slice(stop=4)

In [16]:
df_grouped = df.groupby(['PATIENT', 'YEAR', 'CODE', 'DESCRIPTION'])['ENCOUNTER'].count() \
    .reset_index(name='Number of Prescriptions')

In [18]:
df_grouped.head()

Unnamed: 0,PATIENT,YEAR,CODE,DESCRIPTION,Number of Prescriptions
0,000341fd-e879-21b3-aa5b-13cda0050ba9,1976,705129,Nitroglycerin 0.4 MG/ACTUAT Mucosal Spray,1
1,000341fd-e879-21b3-aa5b-13cda0050ba9,1991,312961,Simvastatin 20 MG Oral Tablet,1
2,000341fd-e879-21b3-aa5b-13cda0050ba9,1994,197361,Amlodipine 5 MG Oral Tablet,1
3,000341fd-e879-21b3-aa5b-13cda0050ba9,1997,309362,Clopidogrel 75 MG Oral Tablet,1
4,000341fd-e879-21b3-aa5b-13cda0050ba9,2002,259255,Atorvastatin 80 MG Oral Tablet,1


In [19]:
df_grouped.to_csv("prescription_info.csv", index=False)