In [1]:
%load_ext google.cloud.bigquery

In [2]:
from google.cloud import bigquery
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
client = bigquery.Client()

In [4]:
# create query scripts

def create_query_scripts(table_name):
    return "SELECT * FROM my-msc-project-345300.MIMIC_III." + table_name + " WHERE SUBJECT_ID = "

In [5]:
# Query data from Google Bigquery

def select_data(subject_ids, query):
    table_df = pd.DataFrame()

    for subject_id in subject_ids:
        new_query = query + str(subject_id)
        select_result = client.query(new_query).result().to_dataframe()

        if table_df.empty:
            table_df = select_result.copy()
        else:
            table_df = pd.concat([table_df, select_result], axis=0)

    return table_df

## Select patient sample

In [6]:
query_get_random_patients = "SELECT * FROM my-msc-project-345300.MIMIC_III.patients order by rand() limit 5;"
random_patients_df = client.query(query_get_random_patients).result().to_dataframe()

random_patients_df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,GENDER,DOB,DOD,DOD_HOSP,DOD_SSN,EXPIRE_FLAG
0,16093,17011,M,2068-07-10,2131-06-02,2131-06-02,2131-06-02,1
1,43454,88315,F,1882-01-31,NaT,NaT,NaT,0
2,13879,14674,M,2064-05-02,NaT,NaT,NaT,0
3,3106,3282,M,2122-11-11,NaT,NaT,NaT,0
4,28548,30369,M,2119-03-09,NaT,NaT,NaT,0


In [7]:
# Pick Subject_id
patients_subject_ids = random_patients_df['SUBJECT_ID']
patients_subject_ids

0    17011
1    88315
2    14674
3     3282
4    30369
Name: SUBJECT_ID, dtype: Int64

### Patients and transfer information

In [8]:
admissions_sample_df = select_data(patients_subject_ids, create_query_scripts('admissions'))

In [9]:
callout_sample_df = select_data(patients_subject_ids, create_query_scripts('callout'))

In [10]:
icustays_sample_df = select_data(patients_subject_ids, create_query_scripts('icustays'))

In [11]:
services_sample_df = select_data(patients_subject_ids, create_query_scripts('services'))

In [12]:
transfers_sample_df = select_data(patients_subject_ids, create_query_scripts('transfers'))

### ICU Information

In [13]:
caregivers_sample_df = None

In [14]:
chartevents_sample_df = select_data(patients_subject_ids, create_query_scripts('chartevents'))

In [15]:
datetimeevents_sample_df = select_data(patients_subject_ids, create_query_scripts('datetimeevents'))

In [16]:
inputevents_cv_sample_df = select_data(patients_subject_ids, create_query_scripts('inputevents_cv'))

In [17]:
inputevents_mv_sample_df = select_data(patients_subject_ids, create_query_scripts('inputevents_mv'))

In [18]:
outputevents_sample_df = select_data(patients_subject_ids, create_query_scripts('outputevents'))

In [19]:
procedureevents_mv_sample_df = select_data(patients_subject_ids, create_query_scripts('procedureevents_mv'))

### Hospital recording

In [20]:
cptevents_sample_df = select_data(patients_subject_ids, create_query_scripts('cptevents'))

In [21]:
diagnoses_icd_sample_df = select_data(patients_subject_ids, create_query_scripts('diagnoses_icd'))

In [22]:
drgcodes_sample_df = select_data(patients_subject_ids, create_query_scripts('drgcodes'))

In [23]:
labevents_sample_df = select_data(patients_subject_ids, create_query_scripts('labevents'))

In [24]:
microbiologyevents_sample_df = select_data(patients_subject_ids, create_query_scripts('microbiologyevents'))

In [25]:
prescriptions_sample_df = select_data(patients_subject_ids, create_query_scripts('prescriptions'))

In [26]:
procedures_icd_sample_df = select_data(patients_subject_ids, create_query_scripts('procedures_icd'))

## Combine samples

In [82]:
patients_df = pd.merge(random_patients_df.drop('ROW_ID', axis=1), admissions_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

In [83]:
patients_df = pd.merge(patients_df, callout_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [84]:
patients_df = pd.merge(patients_df, icustays_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [85]:
patients_df = pd.merge(patients_df, services_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [86]:
patients_df = pd.merge(patients_df, icustays_sample_df.drop(['ROW_ID', 'ICUSTAY_ID', 'DBSOURCE', 'FIRST_CAREUNIT', 'LAST_CAREUNIT', 'FIRST_WARDID', \
                                                'LAST_WARDID', 'INTIME', 'OUTTIME', 'LOS'], axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [87]:
patients_df = pd.merge(patients_df, transfers_sample_df.drop(['ROW_ID', 'ICUSTAY_ID', 'DBSOURCE', 'CURR_CAREUNIT', 'CURR_WARDID', \
                                                'INTIME', 'OUTTIME', 'LOS'], axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [88]:
len(patients_df)

26

---

In [89]:
df1 = pd.merge(random_patients_df.drop('ROW_ID', axis=1), datetimeevents_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

In [103]:
df2 = pd.merge(random_patients_df.drop('ROW_ID', axis=1), inputevents_cv_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

In [None]:
df3 = pd.merge(random_patients_df.drop('ROW_ID', axis=1), inputevents_mv_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

In [99]:
df4 = pd.merge(random_patients_df.drop('ROW_ID', axis=1), outputevents_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

Unnamed: 0,CHARTTIME_datetimeevents,CHARTTIME_inputevents_cv
0,NaT,NaT
1,2182-02-01 08:27:00,NaT
2,2182-02-01 00:03:00,NaT
3,2182-02-01 16:38:00,NaT
4,2182-02-02 08:34:00,NaT
...,...,...
2879,NaT,2116-09-07 22:00:00
2880,NaT,2116-09-07 17:00:00
2881,NaT,2116-09-09 01:00:00
2882,NaT,2116-09-09 12:00:00


In [96]:
df3 = pd.merge(df2, inputevents_mv_sample_df.drop(['ROW_ID'], axis=1), 
                on=['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID'], how='outer' suffixes=('_datetimeevents', '_inputevents_cv'))

Index(['SUBJECT_ID', 'GENDER', 'DOB', 'DOD', 'DOD_HOSP', 'DOD_SSN',
       'EXPIRE_FLAG', 'HADM_ID', 'ICUSTAY_ID', 'ITEMID', 'CHARTTIME',
       'RESULTSTATUS', 'STOPPED', 'AMOUNT', 'AMOUNTUOM', 'RATE', 'RATEUOM',
       'ORDERID', 'LINKORDERID', 'NEWBOTTLE', 'ORIGINALAMOUNT',
       'ORIGINALAMOUNTUOM', 'ORIGINALROUTE', 'ORIGINALRATE', 'ORIGINALRATEUOM',
       'ORIGINALSITE'],
      dtype='object')