In [1]:
import torch
import pandas as pd
import tarfile
import os
import matplotlib.pyplot as plt
import numpy as np

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("CUDA is available. Using GPU.")
else:
    device = torch.device('cpu')
    print("CUDA is not available. Using CPU.")

CUDA is available. Using GPU.


In [3]:
patient_information = pd.read_csv('/scratch/users/shrestp/mover/EPIC_EMR/EPIC_EMR/EMR/patient_information.csv')
patient_information = patient_information.drop_duplicates()
hours_df = pd.read_csv('/scratch/users/shrestp/mover/EPIC_EMR/EPIC_EMR/EMR/hours_fb_df_new_with_inner_greater10.csv', index_col=0)

In [4]:
def agg_func(series):
    if series.nunique() == 1:
        return series.iloc[0]
    else:
        return '|'.join(map(str, series.unique()))

In [5]:
patient_information['HOSP_ADMSN_TIME'] = pd.to_datetime(patient_information['HOSP_ADMSN_TIME'])
patient_information['HOSP_DISCH_TIME'] = pd.to_datetime(patient_information['HOSP_DISCH_TIME'])
hours_df['HOSP_ADMSN_TIME'] = pd.to_datetime(hours_df['HOSP_ADMSN_TIME'])
hours_df['HOSP_DISCH_TIME'] = pd.to_datetime(hours_df['HOSP_DISCH_TIME'])

  patient_information['HOSP_ADMSN_TIME'] = pd.to_datetime(patient_information['HOSP_ADMSN_TIME'])
  patient_information['HOSP_DISCH_TIME'] = pd.to_datetime(patient_information['HOSP_DISCH_TIME'])


In [6]:
patient_information.columns

Index(['LOG_ID', 'MRN', 'DISCH_DISP_C', 'DISCH_DISP', 'HOSP_ADMSN_TIME',
       'HOSP_DISCH_TIME', 'LOS', 'ICU_ADMIN_FLAG', 'SURGERY_DATE',
       'BIRTH_DATE', 'HEIGHT', 'WEIGHT', 'SEX', 'PRIMARY_ANES_TYPE_NM',
       'ASA_RATING_C', 'ASA_RATING', 'PATIENT_CLASS_GROUP', 'PATIENT_CLASS_NM',
       'PRIMARY_PROCEDURE_NM', 'IN_OR_DTTM', 'OUT_OR_DTTM',
       'AN_START_DATETIME', 'AN_STOP_DATETIME'],
      dtype='object')

In [7]:
# Merge demographic data
cols = ['MRN', 'LOG_ID', 'ICU_ADMIN_FLAG', 'SURGERY_DATE', 'DISCH_DISP_C', 'DISCH_DISP', 'BIRTH_DATE', 'SEX', 'PRIMARY_PROCEDURE_NM', 'HOSP_ADMSN_TIME', 'HOSP_DISCH_TIME', 'ASA_RATING_C', 'ASA_RATING']
final_df = pd.merge(hours_df, patient_information[cols], on=['MRN', 'HOSP_ADMSN_TIME', 'HOSP_DISCH_TIME'], how='inner')
final_df

Unnamed: 0,MRN,HOSP_ADMSN_TIME,HOSP_DISCH_TIME,HOURS,TOTAL_TIME_PRESSORS,LOG_ID,ICU_ADMIN_FLAG,SURGERY_DATE,DISCH_DISP_C,DISCH_DISP,BIRTH_DATE,SEX,PRIMARY_PROCEDURE_NM,ASA_RATING_C,ASA_RATING
0,a90ec3a77eb38e3f,2020-01-26 12:24:00,2020-01-28 00:55:00,0.000000,26.016667,50e3005ac618afb0,Yes,1/26/20 0:00,3.0,Expired,80,Male,"REPAIR, AAA, ENDOVASCULAR",3.0,Severe Systemic Disease
1,a90ec3a77eb38e3f,2020-01-26 12:24:00,2020-01-28 00:55:00,0.000000,26.016667,174438c70c5d4725,Yes,1/27/20 0:00,3.0,Expired,80,Male,"LAPAROTOMY, EXPLORATORY",5.0,Moribund
2,9924af750a97222f,2021-05-08 08:54:00,2021-06-26 18:25:00,0.000000,27.683333,f31b5001242515a4,Yes,5/8/21 0:00,6.0,Skilled Nursing Facility,35,Male,"CRANIECTOMY OR CRANIOTOMY, EMERGENT",,
3,9924af750a97222f,2021-05-08 08:54:00,2021-06-26 18:25:00,0.000000,27.683333,6d04f434bd2ffd77,Yes,5/30/21 0:00,6.0,Skilled Nursing Facility,35,Male,"LAPAROTOMY, EXPLORATORY",5.0,Moribund
4,0043f49c66d1a29a,2020-04-12 11:04:00,2020-05-01 15:11:00,0.000000,7.283333,558d26139a0cdfc0,Yes,4/22/20 0:00,15.0,Home Routine,64,Male,"HIPEC - CHEMOTHERAPY, INTRAPERITONEAL, HYPERTH...",3.0,Severe Systemic Disease
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4991,f8f45cc5d02951fa,2021-11-20 05:46:00,2021-11-29 04:56:00,0.143549,120.400000,c9fcad3baed6cb24,Yes,11/22/21 0:00,3.0,Expired,52,Male,CRANIECTOMY,4.0,Incapacitating Disease
4992,6b4848288939cb28,2021-05-13 14:05:00,2021-05-27 17:08:00,0.000000,36.000000,49621de94a49d744,Yes,5/13/21 0:00,6.0,Skilled Nursing Facility,53,Male,IR ANGIO PELVIS,3.0,Severe Systemic Disease
4993,6b4848288939cb28,2021-05-13 14:05:00,2021-05-27 17:08:00,0.000000,36.000000,f7d6617699612cd5,Yes,5/16/21 0:00,6.0,Skilled Nursing Facility,53,Male,EGD (ESOPHAGOGASTRODUODENOSCOPY),4.0,Incapacitating Disease
4994,6b4848288939cb28,2021-05-13 14:05:00,2021-05-27 17:08:00,0.000000,36.000000,35fe8538cfbfa032,Yes,5/17/21 0:00,6.0,Skilled Nursing Facility,53,Male,"LAPAROTOMY, EXPLORATORY",4.0,Incapacitating Disease


In [8]:
final_df.columns

Index(['MRN', 'HOSP_ADMSN_TIME', 'HOSP_DISCH_TIME', 'HOURS',
       'TOTAL_TIME_PRESSORS', 'LOG_ID', 'ICU_ADMIN_FLAG', 'SURGERY_DATE',
       'DISCH_DISP_C', 'DISCH_DISP', 'BIRTH_DATE', 'SEX',
       'PRIMARY_PROCEDURE_NM', 'ASA_RATING_C', 'ASA_RATING'],
      dtype='object')

In [9]:
# Merge diagnoses and post-op complications
post_op_complications = pd.read_csv('/scratch/users/shrestp/mover/EPIC_EMR/EPIC_EMR/EMR/patient_post_op_complications.csv')
patient_visit = pd.read_csv('/scratch/users/shrestp/mover/EPIC_EMR/EPIC_EMR/EMR/patient_visit.csv')
patient_visit = patient_visit.drop_duplicates()
patient_visit.rename(columns={'mrn': 'MRN'}, inplace=True)
diagnosis_and_complications = pd.merge(patient_visit[['LOG_ID', 'MRN', 'diagnosis_code', 'dx_name']],
                                       post_op_complications[['LOG_ID', 'MRN', 'Element_abbr']],
                                       on=['LOG_ID', 'MRN'],
                                       how='inner')
diagnosis_and_complications = diagnosis_and_complications.drop_duplicates()

In [10]:
diagnosis_and_complications['Element_abbr'].value_counts()
#keep = ['Respiratory', 'Cardiovascular', 'Airway', 'Neurological', 'Metabolic', 'Administra']

Element_abbr
AN Post-op Complications    108891
Other                         1172
Respiratory                   1058
Cardiovascular                 959
Airway                         440
Neurological                   229
Metabolic                      175
Administrative                 172
Medication                     135
Injury/Infection               112
Regional                        97
Chronic Pain                    31
Name: count, dtype: int64

In [11]:
#diagnosis_and_complications = diagnosis_and_complications[diagnosis_and_complications['Element_abbr'].isin(keep)]

In [12]:
diagnosis_and_complications_final = diagnosis_and_complications.groupby(['MRN', 'LOG_ID']).agg(agg_func).reset_index()
diagnosis_and_complications_final

Unnamed: 0,MRN,LOG_ID,diagnosis_code,dx_name,Element_abbr
0,0000c7ccaa009343,47fbfb9938d6067b,592.0,Nephrolithiasis|Kidney stone,AN Post-op Complications
1,0000c7ccaa009343,bd16a67880da1e77,789.00,Left flank pain,AN Post-op Complications
2,0000e45237d1fc96,611e9c50425dbf55,,"Gunshot wound of right lower extremity, initia...",AN Post-op Complications
3,000181ce0ebf1e12,be9a505e236e5b67,193,Papillary thyroid carcinoma (CMS-HCC),AN Post-op Complications
4,00018b4d9acb258b,189a1d9df810fe26,307.9|nan|786.05|786.09|790.4,"Agitation|Impaired functional mobility, balanc...",AN Post-op Complications
...,...,...,...,...,...
46248,fff634f64e07b66a,98183e21fcda44c9,585.6,ESRD (end stage renal disease) on dialysis (CM...,AN Post-op Complications
46249,fffb5d2e00cf670f,ca86eca1473b99bf,,"Ruptured globe of right eye, initial encounter",AN Post-op Complications
46250,fffc39a39b57eee7,f18c7d34c4192c9b,,"Pseudoaneurysm of AV hemodialysis fistula, ini...",AN Post-op Complications
46251,fffcb59d973c7e4c,c293e9fef7b0f159,427.89,SVT (supraventricular tachycardia) (CMS-HCC),AN Post-op Complications


In [13]:
# Merge all data
final_df_merged = pd.merge(final_df, diagnosis_and_complications_final, on=['LOG_ID', 'MRN'], how='inner')
final_df_merged

Unnamed: 0,MRN,HOSP_ADMSN_TIME,HOSP_DISCH_TIME,HOURS,TOTAL_TIME_PRESSORS,LOG_ID,ICU_ADMIN_FLAG,SURGERY_DATE,DISCH_DISP_C,DISCH_DISP,BIRTH_DATE,SEX,PRIMARY_PROCEDURE_NM,ASA_RATING_C,ASA_RATING,diagnosis_code,dx_name,Element_abbr
0,a90ec3a77eb38e3f,2020-01-26 12:24:00,2020-01-28 00:55:00,0.000000,26.016667,174438c70c5d4725,Yes,1/27/20 0:00,3.0,Expired,80,Male,"LAPAROTOMY, EXPLORATORY",5.0,Moribund,996.1|441.4,Endoleak post (EVAR) endovascular aneurysm rep...,AN Post-op Complications
1,9924af750a97222f,2021-05-08 08:54:00,2021-06-26 18:25:00,0.000000,27.683333,6d04f434bd2ffd77,Yes,5/30/21 0:00,6.0,Skilled Nursing Facility,35,Male,"LAPAROTOMY, EXPLORATORY",5.0,Moribund,953.4|852.06|807.00|596.89|924.9|263.0|nan|852...,Brachial plexus injury|Subarachnoid hemorrhage...,AN Post-op Complications
2,0043f49c66d1a29a,2020-04-12 11:04:00,2020-05-01 15:11:00,0.000000,7.283333,558d26139a0cdfc0,Yes,4/22/20 0:00,15.0,Home Routine,64,Male,"HIPEC - CHEMOTHERAPY, INTRAPERITONEAL, HYPERTH...",3.0,Severe Systemic Disease,,"Malignant neoplasm of stomach, unspecified loc...",AN Post-op Complications
3,373548e045944a83,2020-05-07 20:03:00,2020-05-28 12:45:00,0.000000,85.500000,02bd9af670b0e3ff,Yes,5/13/20 0:00,15.0,Home Routine,77,Male,CABG (CORONARY ARTERY BYPASS GRAFT),4.0,Incapacitating Disease,410.70|424.1|414.00|250.00,NSTEMI (non-ST elevated myocardial infarction)...,AN Post-op Complications
4,86068244828e169a,2020-05-20 06:38:00,2020-06-06 18:10:00,0.352941,9.916667,e1a38fc999223ad9,Yes,5/25/20 0:00,20.0,Home Healthcare IP Admit Related,61,Male,VIDEO-ASSISTED THORACOSCOPIC SURGERY (VATS),4.0,Incapacitating Disease,518.81|511.9|571.8|511.89|nan,Respiratory failure (CMS-HCC)|Pleural effusion...,AN Post-op Complications
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1671,e02d03f6176e4acc,2022-12-22 12:51:00,2023-01-16 17:56:00,0.000000,70.200000,53d6ef96fbf4509c,Yes,12/22/22 0:00,4.0,Rehab Facility (not this hospital),68,Male,"DECOMPRESSION, SPINE, CERVICAL, POSTERIOR APPR...",3.0,Severe Systemic Disease,,Fall from ground level,AN Post-op Complications
1672,3c29654ee67e7be5,2020-01-27 18:23:00,2020-01-29 22:01:00,0.000000,7.133333,46112e6602b47eac,Yes,1/29/20 0:00,16.0,Hospice Facility,78,Female,"LAPAROSCOPY, DIAGNOSTIC, WITH EXPLORATORY LAPA...",5.0,Moribund,432.1,Subdural hematoma (CMS-HCC),AN Post-op Complications
1673,8ce554d6384c34da,2023-01-02 00:37:00,2023-01-17 15:05:00,0.000000,18.333333,0b1afc87c797535e,Yes,1/8/23 0:00,15.0,Home Routine,62,Male,"REPAIR, ASCENDING AORTA AND AORTIC ARCH",3.0,Severe Systemic Disease,263.0|441.2,Moderate protein-calorie malnutrition (CMS-HCC...,AN Post-op Complications
1674,f8f45cc5d02951fa,2021-11-20 05:46:00,2021-11-29 04:56:00,0.143549,120.400000,c9fcad3baed6cb24,Yes,11/22/21 0:00,3.0,Expired,52,Male,CRANIECTOMY,4.0,Incapacitating Disease,707.03|434.91,Sacral decubitus ulcer|Acute right MCA stroke ...,AN Post-op Complications


In [14]:
def drop_duplicate_columns(df):
    # Create a DataFrame to store unique columns
    df_unique = df.T.drop_duplicates().T
    return df_unique

In [15]:
# Apply the function to the DataFrame
final_df_merged = drop_duplicate_columns(final_df_merged)

In [16]:
final_df_merged.columns

Index(['MRN', 'HOSP_ADMSN_TIME', 'HOSP_DISCH_TIME', 'HOURS',
       'TOTAL_TIME_PRESSORS', 'LOG_ID', 'ICU_ADMIN_FLAG', 'SURGERY_DATE',
       'DISCH_DISP_C', 'DISCH_DISP', 'BIRTH_DATE', 'SEX',
       'PRIMARY_PROCEDURE_NM', 'ASA_RATING_C', 'ASA_RATING', 'diagnosis_code',
       'dx_name', 'Element_abbr'],
      dtype='object')

In [17]:
def bucket_location(location):
    if location in ['Home Routine', 'Home Healthcare IP Admit Related', 'Home Healthcare Outside 3 Days', 'Home Health w Planned Readmit']:
        return 'HOME'
    elif location in ['Expired', 'Coroner']:
        return 'EXPIRED'
    else:
        return 'OTHER'

In [18]:
# bucket outcome
final_df_merged['DISCH_DISP_bucketed'] = final_df_merged['DISCH_DISP'].apply(bucket_location)

In [19]:
collapsed_df_final = final_df_merged.groupby(['MRN', 'HOSP_ADMSN_TIME', 'HOSP_DISCH_TIME']).agg(agg_func).reset_index()

In [20]:
collapsed_df_final

Unnamed: 0,MRN,HOSP_ADMSN_TIME,HOSP_DISCH_TIME,HOURS,TOTAL_TIME_PRESSORS,LOG_ID,ICU_ADMIN_FLAG,SURGERY_DATE,DISCH_DISP_C,DISCH_DISP,BIRTH_DATE,SEX,PRIMARY_PROCEDURE_NM,ASA_RATING_C,ASA_RATING,diagnosis_code,dx_name,Element_abbr,DISCH_DISP_bucketed
0,0043f49c66d1a29a,2020-04-12 11:04:00,2020-05-01 15:11:00,0.000000,7.283333,558d26139a0cdfc0,Yes,4/22/20 0:00,15.0,Home Routine,64,Male,"HIPEC - CHEMOTHERAPY, INTRAPERITONEAL, HYPERTH...",3.0,Severe Systemic Disease,,"Malignant neoplasm of stomach, unspecified loc...",AN Post-op Complications,HOME
1,008e2790ba97ce57,2021-06-20 14:26:00,2021-07-07 14:15:00,0.000000,279.000000,5a3c9156d257ebfa,Yes,6/21/21 0:00,20.0,Home Healthcare IP Admit Related,59,Female,ERCP (ENDOSCOPIC RETROGRADE CHOLANGIOPANCREATO...,4.0,Incapacitating Disease,578.9|nan|518.81|285.9|584.9|285.1|574.50|482.0,GI bleed|Cirrhosis of liver with ascites (CMS-...,AN Post-op Complications,HOME
2,009a069af41372ba,2021-07-07 03:27:00,2021-08-09 10:57:00,0.000000,10.250000,4a4f22722c0ab73e,Yes,7/9/21 0:00,20.0,Home Healthcare IP Admit Related,30,Male,"IRRIGATION AND DEBRIDEMENT, EXTREMITY",4.0,Incapacitating Disease,305.1|401.9|nan|434.91|512.89|E819.9|959.9|959...,Current smoker|Hypertension|Acute deep vein th...,AN Post-op Complications,HOME
3,00a274fce726fd9e,2019-09-24 17:58:00,2019-10-06 12:50:00,0.000000,12.100000,2d94e5f599f37ba0,Yes,9/30/19 0:00,15.0,Home Routine,61,Male,CABG (CORONARY ARTERY BYPASS GRAFT),4.0,Incapacitating Disease,410.70|790.6,NSTEMI (non-ST elevated myocardial infarction)...,AN Post-op Complications,HOME
4,00f1f53d9d620194,2022-08-09 13:48:00,2022-08-16 17:35:00,0.000000,97.616667,6b0085d333250ab6,Yes,8/12/22 0:00,16.0,Hospice Facility,61,Female,GI EGD EUS FNA,3.0,Severe Systemic Disease,782.4|578.1|790.5|574.50,Jaundice|Melena|Elevated alkaline phosphatase ...,AN Post-op Complications|Cardiovascular,OTHER
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1652,ff6f438387a2f822,2018-11-06 15:54:00,2018-11-22 15:40:00,0.000000,11.416667,2158919443f9f462,Yes,11/13/18 0:00,15.0,Home Routine,60,Male,"REPLACEMENT, AORTIC VALVE",4.0,Incapacitating Disease,428.0|428.9,Acute on chronic congestive heart failure (CMS...,AN Post-op Complications,HOME
1653,ffa83cc8c6472255,2022-06-25 09:44:00,2022-07-28 23:35:00,0.000000,395.300000,73be2a55e2002335,Yes,6/26/22 0:00,30.0,Long Term Care Facility,57,Male,"CRANIOTOMY, FOR ANEURYSM OR AVM CLIPPING, WITH...",3.0,Severe Systemic Disease,435.9|437.3|nan,Cerebral vasospasm|Aneurysm of middle cerebral...,AN Post-op Complications,OTHER
1654,ffd4085849ae2443,2019-04-30 16:17:00,2019-05-11 16:44:00,0.000000,32.316667,5f6e3e156c0aa0d3,Yes,5/8/19 0:00,6.0,Skilled Nursing Facility,63,Male,"ENDARTERECTOMY, CAROTID",3.0,Severe Systemic Disease,437.0|434.91|272.4|433.10|443.9|444.22|790.29|...,Intracranial atherosclerosis|Acute ischemic ri...,AN Post-op Complications,OTHER
1655,ffe4b7d7fa95554d,2021-06-10 05:11:00,2021-07-15 20:40:00,0.018392,567.283333,e8a53b58e0a341a1,Yes,6/10/21 0:00,3.0,Expired,85,Female,HEMIGLOSSECTOMY,3.0,Severe Systemic Disease,427.31|nan|511.9|518.81|276.4|141.9|284.19|584.9,Paroxysmal atrial fibrillation (CMS-HCC)|Acute...,AN Post-op Complications,EXPIRED


In [33]:
# one hot encode post-op complications
df_postop = collapsed_df_final[['Element_abbr']]
df_postop['Element_abbr'] = df_postop['Element_abbr'].str.split('|')
all_complications = list(set([comp for sublist in df_postop['Element_abbr'] for comp in sublist]))

def encode_as_vector(complications, all_complications):
    return [1 if comp in complications else 0 for comp in all_complications]

df_postop['Element_abbr'] = df_postop['Element_abbr'].apply(lambda x: encode_as_vector(x, all_complications))
collapsed_df_final['Element_abbr_encoded'] = df_postop['Element_abbr']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_postop['Element_abbr'] = df_postop['Element_abbr'].str.split('|')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_postop['Element_abbr'] = df_postop['Element_abbr'].apply(lambda x: encode_as_vector(x, all_complications))


In [34]:
all_complications

['Cardiovascular',
 'AN Post-op Complications',
 'Injury/Infection',
 'Airway',
 'Respiratory',
 'Other',
 'Neurological',
 'Medication',
 'Administrative',
 'Metabolic']

In [35]:
df_postop = collapsed_df_final[['Element_abbr']]

In [36]:
df_postop['Element_abbr'] = df_postop['Element_abbr'].str.split('|')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_postop['Element_abbr'] = df_postop['Element_abbr'].str.split('|')


In [37]:
test = [comp for sublist in df_postop['Element_abbr'] for comp in sublist]

In [38]:
pd.Series(test).value_counts()

AN Post-op Complications    1657
Cardiovascular                59
Other                         44
Respiratory                   37
Airway                        14
Metabolic                     12
Neurological                  10
Administrative                 8
Medication                     5
Injury/Infection               2
Name: count, dtype: int64

In [39]:
collapsed_df_final['Element_abbr'].value_counts()

Element_abbr
AN Post-op Complications                                      1499
AN Post-op Complications|Cardiovascular                         32
AN Post-op Complications|Other                                  29
AN Post-op Complications|Respiratory                            19
Cardiovascular|AN Post-op Complications                         10
AN Post-op Complications|Metabolic                               5
Respiratory|AN Post-op Complications                             5
AN Post-op Complications|Airway                                  5
AN Post-op Complications|Medication                              4
AN Post-op Complications|Neurological                            4
Other|AN Post-op Complications                                   4
AN Post-op Complications|Cardiovascular|Respiratory              4
AN Post-op Complications|Administrative                          3
Airway|AN Post-op Complications                                  3
Other|AN Post-op Complications|Cardiovascular    

In [40]:
df_postop

Unnamed: 0,Element_abbr
0,[AN Post-op Complications]
1,[AN Post-op Complications]
2,[AN Post-op Complications]
3,[AN Post-op Complications]
4,"[AN Post-op Complications, Cardiovascular]"
...,...
1652,[AN Post-op Complications]
1653,[AN Post-op Complications]
1654,[AN Post-op Complications]
1655,[AN Post-op Complications]


In [41]:
def keep_highest(value):
    if pd.isna(value):  # Handle NaN values
        return np.nan
    if isinstance(value, str):  # Check if the value is a string
        if '|' in value:  # If there's a '|', split the value and keep the highest
            values = [float(v) for v in value.split('|')]
            return max(values)
        else:
            return float(value)  # If there's no '|', return the value as a float
    else:
        return value  # If it's already a numeric type, return as is

In [44]:
# keep highest ASA scor
collapsed_df_final['ASA_RATING_C'] = collapsed_df_final['ASA_RATING_C'].apply(keep_highest)

In [45]:
collapsed_df_final.to_csv('/scratch/users/shrestp/mover/EPIC_EMR/EPIC_EMR/EMR/all_data_final_inner_greater10.csv')
collapsed_df_final

Unnamed: 0,MRN,HOSP_ADMSN_TIME,HOSP_DISCH_TIME,HOURS,TOTAL_TIME_PRESSORS,LOG_ID,ICU_ADMIN_FLAG,SURGERY_DATE,DISCH_DISP_C,DISCH_DISP,BIRTH_DATE,SEX,PRIMARY_PROCEDURE_NM,ASA_RATING_C,ASA_RATING,diagnosis_code,dx_name,Element_abbr,DISCH_DISP_bucketed,Element_abbr_encoded
0,0043f49c66d1a29a,2020-04-12 11:04:00,2020-05-01 15:11:00,0.000000,7.283333,558d26139a0cdfc0,Yes,4/22/20 0:00,15.0,Home Routine,64,Male,"HIPEC - CHEMOTHERAPY, INTRAPERITONEAL, HYPERTH...",3.0,Severe Systemic Disease,,"Malignant neoplasm of stomach, unspecified loc...",AN Post-op Complications,HOME,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
1,008e2790ba97ce57,2021-06-20 14:26:00,2021-07-07 14:15:00,0.000000,279.000000,5a3c9156d257ebfa,Yes,6/21/21 0:00,20.0,Home Healthcare IP Admit Related,59,Female,ERCP (ENDOSCOPIC RETROGRADE CHOLANGIOPANCREATO...,4.0,Incapacitating Disease,578.9|nan|518.81|285.9|584.9|285.1|574.50|482.0,GI bleed|Cirrhosis of liver with ascites (CMS-...,AN Post-op Complications,HOME,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
2,009a069af41372ba,2021-07-07 03:27:00,2021-08-09 10:57:00,0.000000,10.250000,4a4f22722c0ab73e,Yes,7/9/21 0:00,20.0,Home Healthcare IP Admit Related,30,Male,"IRRIGATION AND DEBRIDEMENT, EXTREMITY",4.0,Incapacitating Disease,305.1|401.9|nan|434.91|512.89|E819.9|959.9|959...,Current smoker|Hypertension|Acute deep vein th...,AN Post-op Complications,HOME,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
3,00a274fce726fd9e,2019-09-24 17:58:00,2019-10-06 12:50:00,0.000000,12.100000,2d94e5f599f37ba0,Yes,9/30/19 0:00,15.0,Home Routine,61,Male,CABG (CORONARY ARTERY BYPASS GRAFT),4.0,Incapacitating Disease,410.70|790.6,NSTEMI (non-ST elevated myocardial infarction)...,AN Post-op Complications,HOME,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
4,00f1f53d9d620194,2022-08-09 13:48:00,2022-08-16 17:35:00,0.000000,97.616667,6b0085d333250ab6,Yes,8/12/22 0:00,16.0,Hospice Facility,61,Female,GI EGD EUS FNA,3.0,Severe Systemic Disease,782.4|578.1|790.5|574.50,Jaundice|Melena|Elevated alkaline phosphatase ...,AN Post-op Complications|Cardiovascular,OTHER,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1652,ff6f438387a2f822,2018-11-06 15:54:00,2018-11-22 15:40:00,0.000000,11.416667,2158919443f9f462,Yes,11/13/18 0:00,15.0,Home Routine,60,Male,"REPLACEMENT, AORTIC VALVE",4.0,Incapacitating Disease,428.0|428.9,Acute on chronic congestive heart failure (CMS...,AN Post-op Complications,HOME,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
1653,ffa83cc8c6472255,2022-06-25 09:44:00,2022-07-28 23:35:00,0.000000,395.300000,73be2a55e2002335,Yes,6/26/22 0:00,30.0,Long Term Care Facility,57,Male,"CRANIOTOMY, FOR ANEURYSM OR AVM CLIPPING, WITH...",3.0,Severe Systemic Disease,435.9|437.3|nan,Cerebral vasospasm|Aneurysm of middle cerebral...,AN Post-op Complications,OTHER,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
1654,ffd4085849ae2443,2019-04-30 16:17:00,2019-05-11 16:44:00,0.000000,32.316667,5f6e3e156c0aa0d3,Yes,5/8/19 0:00,6.0,Skilled Nursing Facility,63,Male,"ENDARTERECTOMY, CAROTID",3.0,Severe Systemic Disease,437.0|434.91|272.4|433.10|443.9|444.22|790.29|...,Intracranial atherosclerosis|Acute ischemic ri...,AN Post-op Complications,OTHER,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
1655,ffe4b7d7fa95554d,2021-06-10 05:11:00,2021-07-15 20:40:00,0.018392,567.283333,e8a53b58e0a341a1,Yes,6/10/21 0:00,3.0,Expired,85,Female,HEMIGLOSSECTOMY,3.0,Severe Systemic Disease,427.31|nan|511.9|518.81|276.4|141.9|284.19|584.9,Paroxysmal atrial fibrillation (CMS-HCC)|Acute...,AN Post-op Complications,EXPIRED,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
