# 23-08-11

An initial exploration of the MIMIC-III dataset with Shiv Meka at RPH.

In [None]:
import pandas as pd
import numpy as np

In [None]:
!ls

## Loading Data

**ADMISSIONS**: 
* Each row corresponds to a specific hospital admission, uniquely identified by `HADM_ID`

**DIAGNOSES_ICD**: 
* Each row corresponds to a diagnosis code associated with a specific hospital admission. 
* `SEQ_NUM` indicates the order in which the diagnosis codes were recorded during that specific patient's admission. 
* We could use this to provide context about the chronological sequence of diagnoses given to a patient during their hospital stay. We would have to look at what point in time these were updated.

In [None]:
# Patient Admissions
# 58976 recorded admissions
df_admissions = pd.read_csv('data/ADMISSIONS.csv')

In [None]:
# Admission Information
df_disease = pd.read_csv('data/DIAGNOSES_ICD.csv')

In [None]:
# Dictionary for ICD9 Codes
df_ICD_desc = pd.read_csv('data/D_ICD_DIAGNOSES.csv')

In [None]:
# Merging admission info with descriptions
full_df = pd.merge(df_disease, df_ICD_desc, on='ICD9_CODE')
full_df

## ICD9 Codes

In [None]:
# A single admission can have multiple ICD codes associated with it
df_disease[df_disease['HADM_ID'].values == 172335]

In [None]:
# ICD9 codes related to sepsis
df_ICD_desc[df_ICD_desc['SHORT_TITLE'].apply(lambda x:True if 'sepsis' in x or 'Sepsis' in x else False)]

In [None]:
# store values to an array
icd_sepsis = df_ICD_desc[df_ICD_desc.apply(lambda x:'seps' in x['SHORT_TITLE'].lower(),axis=1)]['ICD9_CODE'].values

In [None]:
# Filter admission incidences based on sepsis codes
df_t01 = df_disease[df_disease.apply(lambda x:x['ICD9_CODE'] in icd_sepsis, axis=1)]

In [None]:
print(df_t01.shape[0], 'incidences of sepsis')
print(df_t01['SUBJECT_ID'].unique().shape[0], 'individuals that have had sepsis')

In [None]:
# Patients who have been admitted on multiple occassions
df_admissions.groupby('SUBJECT_ID').filter(lambda x: len(x) >= 2)['SUBJECT_ID']
print(df_admissions.groupby('SUBJECT_ID').filter(lambda x: len(x) >= 2)['SUBJECT_ID'].unique().shape[0], 'patients have been admitted on multiple occassions')

<span style="color:red; font-size:30px;"> I don't think that I got everything from here! <span>

In [None]:
# Getting patients who have had sepsis
full_df[full_df['SHORT_TITLE'].str.contains('sepsis')]['SUBJECT_ID'].unique()

In [None]:
# Merge with admissions, because we don't have time variable

In [None]:
# multiple admissions per patient. Each admission has multiple ICD Codes
# look at pathways in neo4j

## Sweetviz EDA

An auto EDA tool. Click 'association' for all the correlation matrices. 

In [None]:
import sweetviz

In [None]:
# sweetviz.analyze(df_lab)
# - will take a while to process all instances
analysis = sweetviz.analyze(df_lab.iloc[:10000])

In [None]:
analysis.show_notebook()

# LABEVENTS

In [None]:
df_lab = pd.read_csv('data/LABEVENTS.csv')
df_lab_desc = pd.read_csv('data/D_LABITEMS.csv')

In [None]:
df_lab_desc

In [None]:
pd.merge(df_lab, df_lab_desc, on='ITEMID')

In [None]:
# REF RANGE value 
# - An indicator for abnormalities
# - Is there a reference range field in MIMIC-III?

df_lab['FLAG'].unique()

In [None]:
# to know if it is abnormal, it is referenced in the 'FLAG' -> labevents

# LABITEMS
Tells you about organisms in the blood
MICROBIOLOGYEVENTS.csv

In [None]:

df_lab_desc
df_lab_desc['ITEMID'].unique()

In [None]:
pd.merge(df_lab, df_items, left_on='SPEC_ITEMID', right_on='ITEM_ID')

In [None]:
df_items[df_items['LINKSTO']=='microbiologyevents']

In [None]:
# CAREVU is the bedside monitor

In [None]:
df_items['LINKSTO'].unique()

In [None]:
# sepsis patients
df_sepsis = full_df[full_df['SHORT_TITLE'].apply(lambda x:'sepsis' in x or 'Sepsis' in x)]

In [None]:
df_sepsis

In [None]:
entity_relation = full_df[full_df]
entity_relation.groupby(['SUBJECT_ID','HADM_ID']).apply()