# Procedures Events MV

Table contains procedures on patients

In [1]:
import pandas as pd  # type: ignore
import os 
import glob 
from datetime import datetime
import numpy as np

import warnings
# Temporarily suppress FutureWarnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore", FutureWarning)

In [2]:
directory = '/Users/maxb/Library/CloudStorage/OneDrive-UniversityofWaterloo/Hospital Research/Datasets/MIMIC-III_demo'
os.chdir(directory)

proc_df = pd.read_csv("PROCEDUREEVENTS_MV.csv")

m,n = proc_df.shape

# Converting to Date Time object
for i in range(m):                                                                 
   proc_df.loc[i, 'starttime'] = datetime.strptime(proc_df.loc[i, 'starttime'], '%Y-%m-%d %H:%M:%S')
   proc_df.loc[i, 'endtime'] = datetime.strptime(proc_df.loc[i, 'endtime'], '%Y-%m-%d %H:%M:%S')

Creating Features

In [3]:
proc_df["duration"] = proc_df['endtime'] - proc_df['starttime']                 # Duration of Procedure

Removing Unnecessary features

In [4]:
col_drops = ["row_id", "valueuom", "location", "locationcategory", "ordercategoryname", "secondaryordercategoryname", 
                 "comments_editedby", "comments_canceledby", "comments_date", "statusdescription", "cancelreason", 
                 "continueinnextdept", "isopenbag", "orderid", "linkorderid", "storetime", "starttime", "endtime"]

for i in col_drops:
    proc_df = proc_df.drop(i, axis=1)

proc_df.head()

Unnamed: 0,subject_id,hadm_id,icustay_id,itemid,value,cgid,ordercategorydescription,duration
0,42367,139932,250305,224263,4520,16890,Task,"3 days, 3:20:00"
1,42367,139932,250305,225204,12957,16890,Task,"8 days, 23:57:00"
2,42367,139932,250305,225792,21485,18693,Task,"14 days, 22:05:00"
3,42367,139932,250305,221214,1,16890,Electrolytes,0:01:00
4,42367,139932,250305,221223,1,16890,Electrolytes,0:01:00


In [5]:
# Testing Columns
print(proc_df['ordercategorydescription'].unique())

['Task' 'Electrolytes']


In [6]:
cols = list(proc_df.columns)

for i in cols:
    check_nan = proc_df[i].isnull().values.any()
    if check_nan == True:
        print("Nan value found")
        break

# All Nan Values removed - Table is cleaned

In [7]:
print("MV Procedures:", proc_df.shape)
proc_df.head()

MV Procedures: (753, 8)


Unnamed: 0,subject_id,hadm_id,icustay_id,itemid,value,cgid,ordercategorydescription,duration
0,42367,139932,250305,224263,4520,16890,Task,"3 days, 3:20:00"
1,42367,139932,250305,225204,12957,16890,Task,"8 days, 23:57:00"
2,42367,139932,250305,225792,21485,18693,Task,"14 days, 22:05:00"
3,42367,139932,250305,221214,1,16890,Electrolytes,0:01:00
4,42367,139932,250305,221223,1,16890,Electrolytes,0:01:00


In [8]:
proc_df.to_csv('/Users/maxb/Library/CloudStorage/OneDrive-UniversityofWaterloo/Hospital Research/Scripts/Cleaned MIMIC-III Datasets/MV_PROCEDURES.csv')

In [9]:
proc_df[proc_df['subject_id'] == 42367]

Unnamed: 0,subject_id,hadm_id,icustay_id,itemid,value,cgid,ordercategorydescription,duration
0,42367,139932,250305,224263,4520,16890,Task,"3 days, 3:20:00"
1,42367,139932,250305,225204,12957,16890,Task,"8 days, 23:57:00"
2,42367,139932,250305,225792,21485,18693,Task,"14 days, 22:05:00"
3,42367,139932,250305,221214,1,16890,Electrolytes,0:01:00
4,42367,139932,250305,221223,1,16890,Electrolytes,0:01:00
5,42367,139932,250305,225459,1,21476,Electrolytes,0:01:00
6,42367,139932,250305,223253,1,18804,Electrolytes,0:01:00
7,42367,139932,250305,224275,5663,18804,Task,"3 days, 22:23:00"
8,42367,139932,250305,225459,1,17461,Electrolytes,0:01:00
9,42367,139932,250305,225470,1,17461,Electrolytes,0:01:00


In [None]:
picd_df.to_csv('/Users/maxb/Library/CloudStorage/OneDrive-UniversityofWaterloo/Hospital Research/Scripts/Cleaned MIMIC-III Datasets/ICD_PROCEDURES.csv')