In [35]:
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
from itertools import combinations
from collections import defaultdict

In [36]:
data_fp = '/Users/JakeCanfield/Documents/Trauma_Surgery_Research/Data/Raw_data/PUF AY %i/CSV/'

mechanism_code_dict = {1:'Cut/pierce', 2:'Drowning/submersion', 3:'Fall', 4:'Fire/flame', 5:'Hot object/substance', 6:'Firearm', 7:'Machinery', 8:'MVT Occupant', 9:'MVT Motorcyclist', 10:'MVT Pedal cyclist', 11:'MVT Pedestrian', 12:'MVT Unspecified', 13:'MVT Other', 14:'Pedal cyclist, other', 15:'Pedestrian, other', 16:'Transport, other', 17:'Natural/environmental,  Bites and stings', 18:'Natural/environmental,  Other', 19:'Overexertion', 20:'Poisoning', 21:'Struck by, against', 22:'Suffocation', 23:'Other specified and classifiable', 24:'Other specified, not elsewhere classifiable', 25:'Unspecified', 26:'Adverse effects, medical care', 27:'Adverse effects, drugs'} # As noted in PUF dictionary
trauma_type_code_dict = {1:'Blunt', 2:'Penetrating', 3:'Burn', 4:'Other/unspecified', 9:'Activity Code - Not Valid as a Primary E-Code'} # As noted in PUF Dictionary
sex_code_dict = {1:'Male', 2:'Female', 3:'Unknown'}

vitals_cols = ['EMSSBP', 'EMSRESPIRATORYRATE', 'EMSPULSERATE', 'EMSPULSEOXIMETRY', 'EMSTOTALGCS',
              'TEMPERATURE', 'SBP', 'RESPIRATORYRATE', 'PULSERATE', 'PULSEOXIMETRY', 'TOTALGCS']
required_cols = ['INC_KEY', 'SEX', 'TRAUMATYPE', 'MECHANISM', 'AGEYEARS']

In [37]:
years = range(2017, 2021, 1)
cutofftimes = [5, 10, 15, 20, 60]
for cutofftime in cutofftimes:
    for year in years:
        if year in range(2017, 2019):
            TRAUMA_df = pd.read_csv(data_fp%year + 'PUF_TRAUMA.csv')
            ECODE_LOOKUP_df = pd.read_csv(data_fp%year + 'PUF_ECODE_LOOKUP.csv')
        elif year in range(2019, 2021):
            TRAUMA_df = pd.read_csv(data_fp%year + 'PUF_TRAUMA.csv')
            ECODE_LOOKUP_df = pd.read_csv(data_fp%year + 'PUF_ECODE_LOOKUP.csv')
        else:
            pass
        
        TRAUMA_df.columns = map(str.upper, TRAUMA_df.columns)
        ICDPROCEDURE_df.columns = map(str.upper, ICDPROCEDURE_df.columns)
        ICDPROCEDURE_LOOKUP_df.columns = map(str.upper, ICDPROCEDURE_LOOKUP_df.columns)
        ECODE_LOOKUP_df.columns = map(str.upper, ECODE_LOOKUP_df.columns)
        
        if year in range(2019, 2021):
            TRAUMA_df['HMRRHGCTRLSURGMINS'] = TRAUMA_df['HMRRHGCTRLSURGHRS']*60
            TRAUMA_df['EDMINS'] = TRAUMA_df['EDDISCHARGEHRS']*60
        else:
            pass
        
        TRAUMA_df = TRAUMA_df.loc[TRAUMA_df['HMRRHGCTRLSURGTYPE'] == 3.0]
        
        TRAUMA_df = TRAUMA_df[(TRAUMA_df['HMRRHGCTRLSURGMINS'] <= TRAUMA_df['EDMINS']) & (TRAUMA_df['HMRRHGCTRLSURGMINS'] <= cutofftime)]
        
        ECODE_LOOKUP_df = ECODE_LOOKUP_df.loc[:, ['ECODE', 'ECODE_DESC', 'MECHANISM', 'TRAUMATYPE']]
    
        ecode_dict = dict(zip(ECODE_LOOKUP_df['ECODE'], ECODE_LOOKUP_df['ECODE_DESC'])) # create dictionary
        TRAUMA_df['PRIMARYECODEICD10'] = TRAUMA_df['PRIMARYECODEICD10'].replace(ecode_dict) # implement dictionary
        
        mechanism_dict = dict(zip(ECODE_LOOKUP_df['ECODE_DESC'], ECODE_LOOKUP_df['MECHANISM'])) # create dictionary
        TRAUMA_df['MECHANISM'] = TRAUMA_df['PRIMARYECODEICD10'].map(mechanism_dict) # implement dictionary
        
        traumatype_dict = dict(zip(ECODE_LOOKUP_df['ECODE_DESC'], ECODE_LOOKUP_df['TRAUMATYPE'])) # create dictionary
        TRAUMA_df['TRAUMATYPE'] = TRAUMA_df['PRIMARYECODEICD10'].map(traumatype_dict) # implement dictionary
        
        TRAUMA_df['MECHANISM'] = TRAUMA_df['MECHANISM'].replace(mechanism_code_dict) # implement dictionary
        
        TRAUMA_df['TRAUMATYPE'] = TRAUMA_df['TRAUMATYPE'].replace(trauma_type_code_dict) # implement dictionary
        
        TRAUMA_df['SEX'] = TRAUMA_df['SEX'].replace(sex_code_dict)
        
        TRAUMA_df = TRAUMA_df.loc[:,required_cols+vitals_cols]
        numeric_columns = TRAUMA_df.select_dtypes(include=[np.number]).columns
        TRAUMA_df[numeric_columns] = TRAUMA_df[numeric_columns].applymap(lambda x: np.nan if x < 0 else x)
        print('Number of Cases in %i (Cutoff after arrival = %i minutes):'%(year, cutofftime), np.shape(TRAUMA_df)[0])
        
        TRAUMA_df.to_csv('/Users/JakeCanfield/Documents/Trauma_Surgery_Research/data/Vitals/vitals_%i_mins_df_%i.csv'%(cutofftime, year), index=False)

Number of Cases in 2017 (Cutoff after arrival = 5 minutes): 434
Number of Cases in 2018 (Cutoff after arrival = 5 minutes): 459
Number of Cases in 2019 (Cutoff after arrival = 5 minutes): 580


  exec(code_obj, self.user_global_ns, self.user_ns)


Number of Cases in 2020 (Cutoff after arrival = 5 minutes): 772
Number of Cases in 2017 (Cutoff after arrival = 10 minutes): 656
Number of Cases in 2018 (Cutoff after arrival = 10 minutes): 690
Number of Cases in 2019 (Cutoff after arrival = 10 minutes): 814
Number of Cases in 2020 (Cutoff after arrival = 10 minutes): 1061
Number of Cases in 2017 (Cutoff after arrival = 15 minutes): 775
Number of Cases in 2018 (Cutoff after arrival = 15 minutes): 792
Number of Cases in 2019 (Cutoff after arrival = 15 minutes): 994
Number of Cases in 2020 (Cutoff after arrival = 15 minutes): 1272
Number of Cases in 2017 (Cutoff after arrival = 20 minutes): 859
Number of Cases in 2018 (Cutoff after arrival = 20 minutes): 880
Number of Cases in 2019 (Cutoff after arrival = 20 minutes): 1081
Number of Cases in 2020 (Cutoff after arrival = 20 minutes): 1385
Number of Cases in 2017 (Cutoff after arrival = 60 minutes): 1105
Number of Cases in 2018 (Cutoff after arrival = 60 minutes): 1109
Number of Cases in 2

In [39]:
years = range(2007, 2017)
cutofftimes = [5, 10, 15, 20, 60]
for cutofftime in cutofftimes:
    for year in years:
        if year in range(2010, 2017, 1):
            PCODE_df = pd.read_csv(data_fp%year + 'PUF_PCODE.csv').sort_values('INC_KEY')
            PCODE_df = PCODE_df.rename(columns={'HOURTOPROC': 'HOURTOPRO'})
        else:
            PCODE_df = pd.read_csv(data_fp%year + 'PUF_PCODE.csv').sort_values('INC_KEY')
        PCODEDES_df = pd.read_csv(data_fp%year + 'PUF_PCODEDES.csv')
        ECODE_df = pd.read_csv(data_fp%year + 'PUF_ECODE.csv', usecols=['INC_KEY', 'ECODE']).sort_values('INC_KEY')
        ECODEDES_df = pd.read_csv(data_fp%year + 'PUF_ECODEDES.csv', usecols=['INJTYPE', 'MECHANISM', 'ECODE', 'ECODEDES'])
        DEMO_df = pd.read_csv(data_fp%year + 'PUF_DEMO.csv', usecols=['INC_KEY', 'GENDER', 'AGE']).sort_values('INC_KEY')
        VITALS_df = pd.read_csv(data_fp%year + 'PUF_VITALS.csv', usecols=['INC_KEY', 'VSTYPE', 'SBP', 'RR', 'PULSE', 'OXYSAT', 'TEMP', 'GCSTOT']).sort_values('INC_KEY')
        ED_df = pd.read_csv(data_fp%year + 'PUF_ED.csv', usecols=['INC_KEY', 'EDMIN']).sort_values('INC_KEY')
        if year in range(2013, 2017, 1):
            PM_df = pd.read_csv(data_fp%year + 'PUF_PM.csv', usecols=['inc_key', 'HEMORRHAGE_CTRL_STYPE_CODE', 'HEMORRHAGE_CTRL_STYPE_DESC', 'HemorrhageCtrlMins', 'HemorrhageCtrlDays']).sort_values('inc_key')
            PM_df.columns = map(str.upper, PM_df.columns)
            PM_df = pd.merge(PM_df, ED_df[['INC_KEY', 'EDMIN']], on='INC_KEY', how='left')
        else:
            pass
        
        # Force column headers to be all upper case and fill in missing year data
        
        PCODE_df.columns = map(str.upper, PCODE_df.columns)
        ED_df.columns = map(str.upper, ED_df.columns)
        PCODEDES_df.columns = map(str.upper, PCODEDES_df.columns)
        PCODE_df['YOPROC'] = PCODE_df['YOPROC'].fillna(year)
        
        PCODE_df['PROCEDUREMINS'] = PCODE_df['HOURTOPRO']*60
        
        PCODE_df = PCODE_df.loc[:, ['INC_KEY', 'PCODE', 'DAYTOPROC', 'PROCEDUREMINS', 'HOURTOPRO']]
        
        ECODE_LOOKUP_df = ECODEDES_df.loc[:, ['ECODE', 'ECODEDES', 'MECHANISM', 'INJTYPE']]
        
        # Merge ED_df with PCODE_df to add EDMIN column
        PCODE_df = pd.merge(PCODE_df, ED_df[['INC_KEY', 'EDMIN']], on='INC_KEY', how='left')
    
    
        if year in range(2013, 2017, 1):
            DCS_list = PM_df.loc[PM_df['HEMORRHAGE_CTRL_STYPE_CODE'] == 3.0]
            DCS_list = DCS_list[DCS_list['HEMORRHAGECTRLMINS'] <= cutofftime]
            thoracotomy_list = DCS_list[DCS_list['HEMORRHAGECTRLMINS'] <= DCS_list['EDMIN']]
            thoracotomy_list2 = PCODE_df.loc[PCODE_df['PCODE'] == 34.02]
            thoracotomy_list2 = thoracotomy_list2[(thoracotomy_list2['PROCEDUREMINS'] <= cutofftime) & (thoracotomy_list2['PROCEDUREMINS'] <= thoracotomy_list2['EDMIN'])]
            thoracotomy_keys = np.unique(thoracotomy_list['INC_KEY'].tolist() + thoracotomy_list2['INC_KEY'].tolist())
        else:
            thoracotomy_list = PCODE_df.loc[PCODE_df['PCODE'] == 34.02]
            thoracotomy_list = thoracotomy_list[(thoracotomy_list['PROCEDUREMINS'] <= cutofftime) & (thoracotomy_list['PROCEDUREMINS'] <= thoracotomy_list['EDMIN'])]
            thoracotomy_keys = np.unique(thoracotomy_list['INC_KEY'].tolist())
        
        PCODE_df = PCODE_df.loc[PCODE_df['INC_KEY'].isin(thoracotomy_keys)]
        ECODE_df = ECODE_df.loc[ECODE_df['INC_KEY'].isin(thoracotomy_keys)]
        DEMO_df = DEMO_df.loc[DEMO_df['INC_KEY'].isin(thoracotomy_keys)]
        ED_df = ED_df.loc[ED_df['INC_KEY'].isin(thoracotomy_keys)]
        VITALS_df = VITALS_df.loc[VITALS_df['INC_KEY'].isin(thoracotomy_keys)]
        if year in range(2013, 2017, 1):
            PM_df = PM_df.loc[PM_df['INC_KEY'].isin(thoracotomy_keys)]
        else:
            pass
        
        ecode_dict = dict(zip(ECODEDES_df['ECODE'], ECODEDES_df['ECODEDES'])) # create dictionary
        ECODE_df['ECODE'] = ECODE_df['ECODE'].replace(ecode_dict) # implement dictionary
        
        mechanism_dict = dict(zip(ECODEDES_df['ECODEDES'], ECODEDES_df['MECHANISM'])) # create dictionary
        ECODE_df['MECHANISM'] = ECODE_df['ECODE'].map(mechanism_dict) # implement dictionary
        
        traumatype_dict = dict(zip(ECODEDES_df['ECODEDES'], ECODE_LOOKUP_df['INJTYPE'])) # create dictionary
        ECODE_df['INJTYPE'] = ECODE_df['ECODE'].map(traumatype_dict) # implement dictionary
        
        ECODE_df['MECHANISM'] = ECODE_df['MECHANISM'].replace(mechanism_code_dict) # implement dictionary, may not need
        
        ECODE_df['INJTYPE'] = ECODE_df['INJTYPE'].replace(trauma_type_code_dict) # implement dictionary, may not need
        
        DEMO_df['GENDER'] = DEMO_df['GENDER'].replace(sex_code_dict) # May not need
        
        # Pivot VITALS_df
        VITALS_pivot = VITALS_df.pivot(index='INC_KEY', columns='VSTYPE', values=['SBP', 'RR', 'PULSE', 'OXYSAT', 'TEMP', 'GCSTOT'])
        VITALS_pivot.columns = [f'{col[1]}{col[0]}' for col in VITALS_pivot.columns]
        VITALS_pivot = VITALS_pivot.reset_index()   
        
        if year in range(2013, 2017, 1):
            TRAUMA_df = pd.merge(pd.merge(pd.merge(pd.merge(ECODE_df, DEMO_df, on='INC_KEY', how='left'), ED_df, on='INC_KEY', how='left'), PM_df, on='INC_KEY', how='left'), VITALS_pivot, on='INC_KEY', how='left')
        elif year in range(2007, 2013, 1):
            TRAUMA_df = pd.merge(pd.merge(pd.merge(ECODE_df, DEMO_df, on='INC_KEY', how='left'), ED_df, on='INC_KEY', how='left'), VITALS_pivot, on='INC_KEY', how='left')
            
        TRAUMA_df = TRAUMA_df.rename(columns={'INJTYPE':'TRAUMATYPE','GENDER':'SEX', 'AGE':'AGEYEARS',
                                             'EDSBP':'SBP', 'EDRR':'RESPIRATORYRATE', 'EMSRR':'EMSRESPIRATORYRATE',
                                             'EDPULSE':'PULSERATE', 'EMSPULSE':'EMSPULSERATE', 'EDOXYSAT':'PULSEOXIMETRY',
                                             'EMSOXYSAT':'EMSPULSEOXIMETRY', 'EDTEMP':'TEMPERATURE',
                                             'EDGCSTOT':'TOTALGCS', 'EMSGCSTOT':'EMSTOTALGCS'})
        TRAUMA_df = TRAUMA_df.loc[:, required_cols + vitals_cols]
        numeric_columns = TRAUMA_df.select_dtypes(include=[np.number]).columns
        TRAUMA_df[numeric_columns] = TRAUMA_df[numeric_columns].applymap(lambda x: np.nan if x < 0 else x)
        print('Number of Cases in %i (Cutoff after arrival = %i minutes):'%(year, cutofftime), np.shape(TRAUMA_df)[0])
        
        TRAUMA_df.to_csv('/Users/JakeCanfield/Documents/Trauma_Surgery_Research/data/Vitals/vitals_%i_mins_df_%i.csv'%(cutofftime, year), index=False)

Number of Cases in 2007 (Cutoff after arrival = 5 minutes): 400
Number of Cases in 2008 (Cutoff after arrival = 5 minutes): 655
Number of Cases in 2009 (Cutoff after arrival = 5 minutes): 544
Number of Cases in 2010 (Cutoff after arrival = 5 minutes): 285
Number of Cases in 2011 (Cutoff after arrival = 5 minutes): 356
Number of Cases in 2012 (Cutoff after arrival = 5 minutes): 418
Number of Cases in 2013 (Cutoff after arrival = 5 minutes): 439
Number of Cases in 2014 (Cutoff after arrival = 5 minutes): 498
Number of Cases in 2015 (Cutoff after arrival = 5 minutes): 612
Number of Cases in 2016 (Cutoff after arrival = 5 minutes): 433
Number of Cases in 2007 (Cutoff after arrival = 10 minutes): 400
Number of Cases in 2008 (Cutoff after arrival = 10 minutes): 758
Number of Cases in 2009 (Cutoff after arrival = 10 minutes): 645
Number of Cases in 2010 (Cutoff after arrival = 10 minutes): 396
Number of Cases in 2011 (Cutoff after arrival = 10 minutes): 484
Number of Cases in 2012 (Cutoff aft