# eICU Collaborative Research Database

# Workshop 1: Exploring the `patient` table

In [None]:
# Import libraries
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
import seaborn as sbn

In [None]:
# Plot settings
%matplotlib inline
plt.style.use('ggplot')
fontsize = 20 # size for x and y ticks
plt.rcParams['legend.fontsize'] = fontsize
plt.rcParams.update({'font.size': fontsize})

In [None]:
# Database config
sqluser = 'postgres'
dbname = 'eicu'
schema_name = 'eicu_crd'

In [None]:
# Connect to the database
con = psycopg2.connect(dbname=dbname, user=sqluser)
cur = con.cursor()
cur.execute('SET search_path to ' + schema_name)

## 1. Display list of tables

In [None]:
query = \
"""
SELECT tablename 
FROM pg_catalog.pg_tables 
WHERE schemaname like 'eicu_crd'
ORDER BY tablename;
"""

list_of_tables = pd.read_sql_query(query,con)

In [None]:
list_of_tables

## 2. Reviewing the patient population

In [None]:
# query to load data from the patient table
query = \
"""
SELECT *
FROM patient
"""

print(query)

In [None]:
# run the query and assign the output to a variable
patient_tab = pd.read_sql_query(query,con)

In [None]:
# display the first few rows of the dataframe
patient_tab.head()

In [None]:
# list all of the columns in the table
patient_tab.columns

In [None]:
# select a limited number of columns to view
columns = ['patientunitstayid','gender','age','unitdischargestatus']
patient_tab[columns].head()

In [None]:
# what are the unique values for age?
age_col = 'age'
patient_tab[age_col].sort_values().unique()

In [None]:
# try plotting a histogram of ages
# what is the error?
figsize = (18,8)
patient_tab[age_col].plot(kind='hist',
                             figsize=figsize, 
                             fontsize=fontsize)

In [None]:
# create a column containing numerical ages
# If ‘coerce’, then invalid parsing will be set as NaN
agenum_col = 'age_num'
patient_tab[agenum_col] = pd.to_numeric(patient_tab[age_col], errors='coerce')
patient_tab[agenum_col].sort_values().unique()

In [None]:
# try plotting a histogram of ages
figsize = (18,8)
patient_tab[agenum_col].plot(kind='hist',
                             figsize=figsize, 
                             fontsize=fontsize)

In [None]:
# get some descriptive details about the age of patients
patient_tab[agenum_col].describe()

## Exercise

- What is the mean age of patients in this calculation? Why is this lower than the true mean?
- Use the `describe()` method to explore the `admissionweight` of patients. What issue do you see?

In [None]:
# set threshold based on 99th quantile
weight_col = 'admissionweight'
quant = patient_tab[weight_col].quantile(0.99)
patient_tab[patient_tab[weight_col] > quant] = None

In [None]:
# describe the admission weights
patient_tab[weight_col].describe()

## 3. Reviewing data for a single patient stay 

`patientunitstayid` is a unique ID assigned to each stay in the ICU (it is similar to icustay_id in MIMIC-III).

### 3.1. Patient characteristics

In [None]:
# select a single ICU stay
patientunitstayid = 141168

In [None]:
# query to load data from the patient table
query = \
"""
SELECT *
FROM patient
WHERE patientunitstayid = {}
""".format(patientunitstayid)

print(query)

In [None]:
# run the query and assign the output to a variable
unitstay = pd.read_sql_query(query,con)

In [None]:
# display the first few rows of the dataframe
unitstay.head()

In [None]:
# display a list of columns
unitstay.columns

In [None]:
# select a limited number of columns to view
columns = ['patientunitstayid','gender','age','unitdischargestatus']
unitstay[columns]

In [None]:
patient.describe()

### Exercise

- What does `patienthealthsystemstayid` represent? (hint, see: http://eicu-crd.mit.edu/eicutables/patient/)
- What year was the patient admitted to the ICU? What year was she discharged?
- What was the status of the patient upon discharge from the unit?
- Explore some other patients.

 ### 3.2. Periodic vital signs

In [None]:
# query to load data from the patient table
query = \
"""
SELECT *
FROM vitalperiodic
WHERE patientunitstayid = {}
""".format(patientunitstayid)

print(query)

In [None]:
# run the query and assign the output to a variable
vitalperiodic = pd.read_sql_query(query,con)

In [None]:
# display the first few rows of the dataframe
vitalperiodic.head()

In [None]:
# display a full list of columns
vitalperiodic.columns

In [None]:
# sort the values by the observationoffset (time in minutes from ICU admission)
vitalperiodic = vitalperiodic.sort_values(by='observationoffset')
vitalperiodic.head()

In [None]:
# subselect the variable columns
columns = ['observationoffset','temperature','sao2','heartrate','respiration',
          'cvp','etco2','systemicsystolic','systemicdiastolic','systemicmean',
          'pasystolic','padiastolic','pamean','st1','st2','st3','icp']

vitalperiodic = vitalperiodic[columns].set_index('observationoffset')
vitalperiodic.head()

In [None]:
# plot the data
figsize = (18,8)
title = 'Vital signs from admission to ICU for patientunitstayid = {} \n'.format(patientunitstayid)
ax = vitalperiodic.plot(title=title, figsize=figsize, fontsize=fontsize)

ax.title.set_size(fontsize)
ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
ax.set_xlabel("Minutes after admission to the ICU")
ax.set_ylabel("Absolute value")

### 3.3. Aperiodic vital signs¶

In [None]:
# query to load data from the patient table
query = \
"""
SELECT *
FROM vitalaperiodic
WHERE patientunitstayid = {}
""".format(patientunitstayid)

print(query)

In [None]:
# run the query and assign the output to a variable
vitalaperiodic = pd.read_sql_query(query,con)

In [None]:
# display the first few rows of the dataframe
vitalaperiodic.head()

In [None]:
vitalaperiodic.columns

In [None]:
# plot the data
vitalaperiodic.plot()

### 3.4. Infusion drugs

In [None]:
# query to load data from the patient table
query = \
"""
SELECT *
FROM infusiondrug
WHERE patientunitstayid = {}
""".format(patientunitstayid)

print(query)

In [None]:
# run the query and assign the output to a variable
infusiondrug = pd.read_sql_query(query,con)

In [None]:
# display the first few rows of the dataframe
infusiondrug.head()

### 3.5. Laboratory test results

In [None]:
# query to load data from the patient table
query = \
"""
SELECT *
FROM lab
WHERE patientunitstayid = {}
""".format(patientunitstayid)

print(query)

In [None]:
# run the query and assign the output to a variable
lab = pd.read_sql_query(query,con)

In [None]:
# display the first few rows of the dataframe
lab.head()