In [1]:
import pandas as pd
import numpy as np
import os
import timeit
from tqdm.notebook import trange, tqdm

In [2]:
CHARTEVENTS_BY_ICUSTAY_ID = 'data/parquet/'
REDUCED_CE_BY_ICUSTAY_ID = 'data_demo/samples/'

In [3]:
chartevents_dir_list = os.listdir(REDUCED_CE_BY_ICUSTAY_ID)
len(chartevents_dir_list)

6

In [4]:
test = pd.read_parquet(REDUCED_CE_BY_ICUSTAY_ID + chartevents_dir_list[5])
test = test.reset_index(drop=True)


![Glascow](assets/images/GCS.jpg)

https://www.firstaidforfree.com/glasgow-coma-scale-gcs-first-aiders/

|	Chart Event	|	Dim	|	Normal	| Initial dtype | Change to dtype |
| --- | :--- | :--- | :--- | :--- |
|	1. Glasgow coma scale eye opening	|	4	|	4 Spontaneously	| str | int |
|	2. Glasgow coma scale verbal response	|	5	|	5 Oriented	| str | int |
|	3. Glasgow coma scale motor response	|	6	|	6 Obeys Commands	| str | int |
|	4. Glasgow coma scale total	|	13	|	15	| none/int | int |
|	5. Capillary refill rate	|	2	|	Normal < 3 secs	| str | int |
|	6. Diastolic blood pressure	|	1	|	70	| int | int |
|	7. Systolic blood pressure	|	1	|	105	| int | int |
|	8. Mean blood pressure	|	1	|	87.5	| int/float | int |
|	9. Heart Rate	|	1	|	80	| int | int |
|	10. Glucose	|	1	|	85	| int | int |
|	11. Fraction inspired oxygen	|	1	|	0.21	| int/float | float? |
|	12. Oxygen saturation	|	1	|	97.5	| int | int |
|	13. Respiratory rate	|	1	|	15	| int | int |
|	14. Body Temperature	|	1	|	37	| float | float |
|	15. pH	|	1	|	7.4	| float | float |
|	16. Weight	|	1	|	80.7	| float | float |
|	17. Height	|	1	|	168.8	| float | float |


Since there are so many `NaN` values, everything should just be a float?

In [6]:
test.iloc[:,0:34:2]

Unnamed: 0,GCS_EYE,GCS_MOTOR,GCS_VERBAL,GCS_TOTAL,CAPILLARY_REFILL,D_BLOOD_PRESSURE,M_BLOOD_PRESSURE,S_BLOOD_PRESSURE,HEART_RATE,GLUCOSE,FRAC_OXYGEN,O2_SAT,RESP_RATE,BODY_TEMP,PH,WEIGHT,HEIGHT
0,Spontaneously,Obeys Commands,Oriented,,,78.0,92.0,138.0,109,149.0,,98.0,17,98.0,,49.3,
1,,,,,,95.0,111.0,157.0,112,,,99.0,15,,,,
2,,,,,,79.0,88.0,116.0,86,,,99.0,6,,,,
3,Spontaneously,Obeys Commands,Oriented,,,96.0,104.0,132.0,88,127.0,,99.0,15,96.9,,,
4,,,,,,102.0,109.0,140.0,87,,,99.0,7,,,,
5,,,,,,93.0,102.0,131.0,84,,,100.0,8,,,,
6,,,,,,105.0,114.0,143.0,89,,,99.0,9,,,,
7,,,,,,98.0,108.0,141.0,93,,,99.0,12,,,,
8,Spontaneously,Obeys Commands,Oriented,,,84.0,94.0,122.0,85,,,99.0,10,97.5,,,
9,,,,,,77.0,86.0,115.0,82,247.0,,99.0,10,,,,


In [7]:
def gcs_eye_transform(x):
    if  x:
        pass

In [8]:
CHARTEVENTS_FILENAME = 'mimic-iii/CHARTEVENTS.csv'
READMISSION_FILENAME = 'data/readmission.csv'
SAMPLES_DIR = 'data/samples/'
DATASTORE_FILENAME = 'samples.h5'
FEATHER_EXT = '.feather'
ROWS_TO_READ = 1000000
MAX_ROWS_CHARTEVENTS = 330712483

skip_rows = 0

In [9]:
chartevents_columns = ["ROW_ID","SUBJECT_ID","HADM_ID","ICUSTAY_ID","ITEMID","CHARTTIME","STORETIME","CGID","VALUE","VALUENUM","VALUEUOM","WARNING","ERROR","RESULTSTATUS","STOPPED"]

In [10]:
event_to_id =[
{'CHAREVENT':'GCS_EYE', 'DESCRIPTION':'','ITEMID':[184, 220739],'UNIT':['NONE','NONE']},
{'CHAREVENT':'GCS_MOTOR', 'DESCRIPTION':'','ITEMID':[454, 223901],'UNIT':['NONE','NONE']},
{'CHAREVENT':'GCS_VERBAL', 'DESCRIPTION':'','ITEMID':[723, 223900],'UNIT':['NONE','NONE']},
{'CHAREVENT':'GCS_TOTAL', 'DESCRIPTION':'Sum of the 3 GCS events','ITEMID':[198],'UNIT':['NONE']},
]

In [11]:
chartevents_dtype = {'ROW_ID':object,
'SUBJECT_ID':object,
'HADM_ID':object,
'ICUSTAY_ID':object,
'ITEMID':object,
'CHARTTIME':object,
'STORETIME':object,
'CGID':object,
'VALUE':object,
'VALUENUM':object,
'VALUEUOM':object,
'WARNING':object,
'ERROR':object,
'RESULTSTATUS':object,
'STOPPED':object}

In [12]:
total = int(MAX_ROWS_CHARTEVENTS/ROWS_TO_READ)+1
eye_unique = []
motor_unique = []
verbal_unique = []
total_unique = []
with pd.read_csv(CHARTEVENTS_FILENAME,names=chartevents_columns,dtype=chartevents_dtype,skiprows=1,chunksize=ROWS_TO_READ) as file:
# chartevents = dd.read_csv(CHARTEVENTS_FILENAME,dtype=chartevents_dtype)
    for chartevents in tqdm(file,total=total):
        chartevents = chartevents.dropna(subset=['SUBJECT_ID','HADM_ID','ICUSTAY_ID','CHARTTIME','ITEMID']).drop('ROW_ID',axis=1)
        chartevents['ITEMID'] = chartevents['ITEMID'].astype(np.int64)
        eye_df = chartevents[(chartevents['ITEMID'].isin(event_to_id[0]['ITEMID']))]
        motor_df = chartevents[(chartevents['ITEMID'].isin(event_to_id[1]['ITEMID']))]
        verbal_df = chartevents[(chartevents['ITEMID'].isin(event_to_id[2]['ITEMID']))]
        total_df = chartevents[(chartevents['ITEMID'].isin(event_to_id[3]['ITEMID']))]

        eye_unique.extend(pd.unique(eye_df['VALUE']).tolist())
        motor_unique.extend(pd.unique(motor_df['VALUE']).tolist())
        verbal_unique.extend(pd.unique(verbal_df['VALUE']).tolist())
        total_unique.extend(pd.unique(total_df['VALUE']).tolist())

        # if len(eye_unique) > 0 or len(motor_unique) > 0 or len(verbal_unique) > 0 or len(total_unique) > 0:
        #     break
        

  0%|          | 0/331 [00:00<?, ?it/s]

In [13]:
import csv

In [14]:
with open('unique.txt', 'w') as f:
    w = csv.writer(f)
    w.writerow(eye_unique)
    w.writerow(motor_unique)
    w.writerow(verbal_unique)
    w.writerow(total_unique)
    
    
    

In [15]:
# import csv

In [16]:
gcs = []
with open('unique.txt', 'r') as f:
    r = csv.reader(f)
    for row in r:
        # print(row)
        gcs.append(row)


In [20]:
print(len(gcs[0]))
print(len(gcs[2]))
print(len(gcs[4]))
print(len(gcs[6]))

1070
1537
1540
2224


In [21]:
eye_unique      = pd.unique(np.array(gcs[0]))
motor_unique    = pd.unique(np.array(gcs[2]))
verbal_unique   = pd.unique(np.array(gcs[4]))
total_unique    = pd.unique(np.array(gcs[6]))

In [23]:
print(len(eye_unique))
print(len(motor_unique))
print(len(verbal_unique))
print(len(total_unique))

8
13
13
14


In [30]:
print(list(eye_unique))
print(list(motor_unique))
print(list(verbal_unique))


['4 Spontaneously', '1 No Response', '2 To pain', '3 To speech', 'nan', 'To Speech', 'Spontaneously', 'To Pain']
['6 Obeys Commands', '5 Localizes Pain', '1 No Response', '4 Flex-withdraws', 'nan', '2 Abnorm extensn', '3 Abnorm flexion', 'Localizes Pain', 'Obeys Commands', 'Flex-withdraws', 'No response', 'Abnormal Flexion', 'Abnormal extension']
['5 Oriented', '1.0 ET/Trach', '4 Confused', '2 Incomp sounds', '1 No Response', 'nan', '3 Inapprop words', 'No Response-ETT', 'Oriented', 'No Response', 'Confused', 'Incomprehensible sounds', 'Inappropriate Words']


In [31]:
# # eye
# ['4 Spontaneously', '1 No Response', '2 To pain', '3 To speech', 'nan', 'To Speech', 'Spontaneously', 'To Pain']
# ['1 No Response', '2 To pain', '3 To speech','4 Spontaneously']
# #   1           2       3          4
# ['response', 'pain', 'speech', 'spont']

# # motor
# ['6 Obeys Commands', '5 Localizes Pain', '1 No Response', '4 Flex-withdraws', 'nan', '2 Abnorm extensn', '3 Abnorm flexion', 
#  'Localizes Pain', 'Obeys Commands', 'Flex-withdraws', 'No response', 'Abnormal Flexion', 'Abnormal extension']
# ['1 No Response', '2 Abnorm extension', '3 Abnormal flexion', '4 Flex-withdraws', '5 Localizes Pain', '6 Obeys Commands']

# #verbal
# [ '1.0 ET/Trach',   'nan',  'No Response-ETT', 'Oriented', 'No Response', 'Confused', 'Incomprehensible sounds', 'Inappropriate Words']
# ['1 No Response', '2 Incomp sounds', '3 Inapprop words','4 Confused', '5 Oriented',]
# ['1 No Response', '2 Incomprehensible sounds', '3 Inappropriate words','4 Confused', '5 Oriented',]

# pd.Series.str.startswith(pat, na=None)


gcs_eye_map     = {'__missing__': np.NaN, '4 Spontaneously': '4', '1 No Response': '1', '2 To pain': '2', '3 To speech': '3', 'To Speech': '3', 'Spontaneously': '4', 'To Pain': '2'}
gcs_motor_map   = {'__missing__': np.NaN, '6 Obeys Commands': '6', '5 Localizes Pain': '5', '1 No Response': '1', '4 Flex-withdraws': '4', '2 Abnorm extensn': '2', '3 Abnorm flexion': '3', 'Localizes Pain': '5', 
                    'Obeys Commands': '6', 'Flex-withdraws': '4', 'No response': '1', 'Abnormal Flexion': '3', 'Abnormal extension': '2'}
gcs_verbal_map  = {'__missing__': np.NaN, '5 Oriented': '5', '1.0 ET/Trach': '1', '4 Confused': '4', '2 Incomp sounds': '2', '1 No Response': '1', '3 Inapprop words': '3', 'No Response-ETT': '1', 
                    'Oriented': '5', 'No Response': '1', 'Confused': '4', 'Incomprehensible sounds': '2', 'Inappropriate Words': '3'}

# gcs = [
#     {'EYE':}
# ]
# 'spontaneously'

In [36]:
test = pd.read_parquet(REDUCED_CE_BY_ICUSTAY_ID + chartevents_dir_list[0])
test = test.reset_index(drop=True)
test['GCS_EYE'] = test['GCS_EYE'].map(gcs_eye_map).astype(np.float64)
test['GCS_MOTOR'] = test['GCS_MOTOR'].map(gcs_motor_map).astype(np.float64)
test['GCS_VERBAL'] = test['GCS_VERBAL'].map(gcs_verbal_map).astype(np.float64)
test['GCS_TOTAL'] = test['GCS_EYE'] + test['GCS_MOTOR'] + test['GCS_VERBAL']
test.iloc[:,0:7:2]

Unnamed: 0,GCS_EYE,GCS_MOTOR,GCS_VERBAL,GCS_TOTAL
0,,,,
1,,,,
2,4.0,6.0,5.0,15.0
3,,,,
4,,,,
5,,,,
6,4.0,6.0,5.0,15.0
7,,,,
8,,,,
9,,,,


In [37]:
test.dtypes

GCS_EYE                 float64
GCS_EYE_ID              float64
GCS_MOTOR               float64
GCS_MOTOR_ID            float64
GCS_VERBAL              float64
GCS_VERBAL_ID           float64
GCS_TOTAL               float64
GCS_TOTAL_ID             object
CAPILLARY_REFILL         object
CAPILLARY_REFILL_ID     float64
D_BLOOD_PRESSURE         object
D_BLOOD_PRESSURE_ID     float64
M_BLOOD_PRESSURE         object
M_BLOOD_PRESSURE_ID     float64
S_BLOOD_PRESSURE         object
S_BLOOD_PRESSURE_ID     float64
HEART_RATE               object
HEART_RATE_ID           float64
GLUCOSE                  object
GLUCOSE_ID              float64
FRAC_OXYGEN              object
FRAC_OXYGEN_ID           object
O2_SAT                   object
O2_SAT_ID               float64
RESP_RATE                object
RESP_RATE_ID            float64
BODY_TEMP                object
BODY_TEMP_ID            float64
PH                       object
PH_ID                   float64
WEIGHT                   object
WEIGHT_I

In [None]:
GCS_EYE                 float64
GCS_EYE_ID              float64
GCS_MOTOR               float64
GCS_MOTOR_ID            float64
GCS_VERBAL              float64
GCS_VERBAL_ID           float64
GCS_TOTAL               float64
GCS_TOTAL_ID             object
CAPILLARY_REFILL         object
CAPILLARY_REFILL_ID     float64
D_BLOOD_PRESSURE         object
D_BLOOD_PRESSURE_ID     float64
M_BLOOD_PRESSURE         object
M_BLOOD_PRESSURE_ID     float64
S_BLOOD_PRESSURE         object
S_BLOOD_PRESSURE_ID     float64
HEART_RATE               object
HEART_RATE_ID           float64
GLUCOSE                  object
GLUCOSE_ID              float64
FRAC_OXYGEN              object
FRAC_OXYGEN_ID           object
O2_SAT                   object
O2_SAT_ID               float64
RESP_RATE                object
RESP_RATE_ID            float64
BODY_TEMP                object
BODY_TEMP_ID            float64
PH                       object
PH_ID                   float64
WEIGHT                   object
WEIGHT_ID               float64
HEIGHT                   object
HEIGHT_ID               float64
GCS_EYE_IND               int64
GCS_MOTOR_IND             int64
GCS_VERBAL_IND            int64
GCS_TOTAL_IND             int64
CAPILLARY_REFILL_IND      int64
D_BLOOD_PRESSURE_IND      int64
M_BLOOD_PRESSURE_IND      int64
S_BLOOD_PRESSURE_IND      int64
HEART_RATE_IND            int64
GLUCOSE_IND               int64
FRAC_OXYGEN_IND           int64
O2_SAT_IND                int64
RESP_RATE_IND             int64
BODY_TEMP_IND             int64
PH_IND                    int64
WEIGHT_IND                int64
HEIGHT_IND                int64
dtype: object