In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import psycopg2
import os 
import shutil
# below imports are used to print out pretty pandas dataframes
from IPython.display import display, HTML
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
plt.style.use('ggplot')

ModuleNotFoundError: No module named 'psycopg2'

In [5]:
# information used to create a database connection
sqluser = 'postgres'
dbname = 'mimic'
schema_name = 'mimiciii'

# Connect to postgres with a copy of the MIMIC-III database
con = psycopg2.connect(dbname=dbname, user=sqluser, password=sqluser)

# the below statement is prepended to queries to ensure they select from the right schema
query_schema = 'set search_path to ' + schema_name + ';'

In [6]:
query = query_schema + """
SELECT DISTINCT icustays.subject_id, icustays.hadm_id
FROM icustays
JOIN DIAGNOSES_ICD ON DIAGNOSES_ICD.subject_id = icustays.subject_id
WHERE icd9_code IN {ARDS_list}
"""

ARDS_list = ('51881', '51882', '51884', '51851', '51852', '51853', '769')
ARDS_list = ('51882', 'None')
# df = pd.read_sql_query(query, con)
df = pd.read_sql_query(query.format(ARDS_list=ARDS_list), con)

# df = pd.read_sql_query(query, con)

subject_id, hadm_id = df.sample(1, random_state=0).values[0]
print(f"Subject {subject_id}  selected randomly from a cohort of {len(df)} patients.")

Subject 1063  selected randomly from a cohort of 691 patients.


In [7]:
query = query_schema + """
SELECT *
FROM CHARTEVENTS
JOIN D_ITEMS ON CHARTEVENTS.itemid = D_ITEMS.itemid
WHERE hadm_id = {hadm_id}
"""
df_patient_mimic_format = pd.read_sql_query(query.format(hadm_id=hadm_id), con)

In [8]:
time_start = df_patient_mimic_format["charttime"].min()

In [12]:
rows = []

for index, row in df_patient_mimic_format.iterrows():
    # print(row)
    time = (row["charttime"]-time_start).total_seconds() / 3600
    label = row["label"]
    value = row["value"]
    rows.append(({"time": time, label: value}))

df_patient = pd.DataFrame(rows).sort_values(by="time").reset_index(drop=True)
df_patient

Unnamed: 0,time,PainLevel Acceptable,Waveform-Vent,Inspired Gas Temp,IV Site Appear #1,IV Site Appear #2,IV Site Appear #3,IV Site Appear #4,ImpSkin Character #1,ImpSkin Treatment #1,...,GI#1 Tube PlaceCheck,GI Intub #1 [Type],GI#1 Tube [Status],FiO2 Set,Family Communication,Flow-By (lpm),Flow-by Sensitivity,FIO2 Alarm-High,FIO2 Alarm-Low,NBP Mean
0,0.0,,,,,,,,,,...,,,,,,,,,,
1,0.0,,,,,,,,,,...,,,,,,,,,,
2,0.0,,,,,,,,,,...,,,,,,,,,,
3,0.0,,,,,,,,,,...,,,,,,,,,,
4,0.0,,,,,,,,,,...,,,,.30000001192092896,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12841,545.1,,,,,,,,,,...,,,,,,,,,,
12842,545.1,,,,,,,,,,...,,,,,,,,,,
12843,545.1,,,,,,,,,,...,,,,,,,,,,
12844,545.1,,,,,,,,,,...,,,,.40000000596046448,,,,,,


In [15]:
df_patient = df_patient.ffill()

Unnamed: 0,time,PainLevel Acceptable,Waveform-Vent,Inspired Gas Temp,IV Site Appear #1,IV Site Appear #2,IV Site Appear #3,IV Site Appear #4,ImpSkin Character #1,ImpSkin Treatment #1,...,GI#1 Tube PlaceCheck,GI Intub #1 [Type],GI#1 Tube [Status],FiO2 Set,Family Communication,Flow-By (lpm),Flow-by Sensitivity,FIO2 Alarm-High,FIO2 Alarm-Low,NBP Mean
1000,26.1,Yes,RAMP,,WNL,WNL,WNL,,,,...,,,,.30000001192092896,Family Visited,,,,,87.333297729492188
1001,26.1,Yes,RAMP,,WNL,WNL,WNL,,,,...,,,,.30000001192092896,Family Visited,,,,,86.333297729492188
1002,26.1,Yes,RAMP,,WNL,WNL,WNL,,,,...,,,,.30000001192092896,Family Visited,,,,,86.333297729492188
1003,26.1,Yes,RAMP,,WNL,WNL,WNL,,,,...,,,,.30000001192092896,Family Visited,,,,,86.333297729492188
1004,26.1,Yes,RAMP,,WNL,WNL,WNL,,,,...,,,,.30000001192092896,Family Visited,,,,,86.333297729492188
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12841,545.1,Yes,RAMP,34,WNL,WNL,WNL,WNL,Abrasion,Duoderm,...,Done,PEG,Feeding,.40000000596046448,Family Visited,6,2,40,25,79.333297729492188
12842,545.1,Yes,RAMP,34,WNL,WNL,WNL,WNL,Abrasion,Duoderm,...,Done,PEG,Feeding,.40000000596046448,Family Visited,6,2,40,25,79.333297729492188
12843,545.1,Yes,RAMP,34,WNL,WNL,WNL,WNL,Abrasion,Duoderm,...,Done,PEG,Feeding,.40000000596046448,Family Visited,6,2,40,25,79.333297729492188
12844,545.1,Yes,RAMP,34,WNL,WNL,WNL,WNL,Abrasion,Duoderm,...,Done,PEG,Feeding,.40000000596046448,Family Visited,6,2,40,25,79.333297729492188


In [11]:
project_dir = r"/home/julien/Documents/stage"
save_loc = r"data/MIMIC/patients"
patient_dir = os.path.join(save_loc, project_dir, r"subject_id")
if not os.path.exists(patient_dir):
    os.mkdir(patient_dir)
df_patient.to_csv(os.path.join(patient_dir, f"patient_{subject_id}.csv"), index=False)