In [44]:
%load_ext google.cloud.bigquery

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery


In [45]:
from google.cloud import bigquery
import pandas as pd
import matplotlib.pyplot as plt

In [46]:
client = bigquery.Client()

In [47]:
# create query scripts

def create_query_scripts(table_name):
    return "SELECT * FROM my-msc-project-345300.MIMIC_III." + table_name + " WHERE SUBJECT_ID = "

In [48]:
# Query data from Google Bigquery

def select_data(subject_ids, query):
    table_df = pd.DataFrame()

    for subject_id in subject_ids:
        new_query = query + str(subject_id)
        select_result = client.query(new_query).result().to_dataframe()

        if table_df.empty:
            table_df = select_result.copy()
        else:
            table_df = pd.concat([table_df, select_result], axis=0)

    return table_df

## Select patient sample

In [49]:
query_get_random_patients = "SELECT * FROM my-msc-project-345300.MIMIC_III.patients order by rand() limit 1000;"
random_patients_df = client.query(query_get_random_patients).result().to_dataframe()

random_patients_df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,GENDER,DOB,DOD,DOD_HOSP,DOD_SSN,EXPIRE_FLAG
0,1201,1265,M,2140-07-31,2192-04-08,NaT,2192-04-08,1
1,17722,18756,F,1840-12-01,NaT,NaT,NaT,0
2,13435,14206,F,2101-05-30,NaT,NaT,NaT,0
3,41348,80222,F,2080-02-11,NaT,NaT,NaT,0
4,21368,22639,M,2058-08-17,NaT,NaT,NaT,0


In [50]:
random_patients_df.to_csv("./temp_sets/patients.csv")

In [51]:
# Pick Subject_id
patients_subject_ids = random_patients_df['SUBJECT_ID']
patients_subject_ids

0       1265
1      18756
2      14206
3      80222
4      22639
       ...  
995     4602
996    21490
997    27982
998    54600
999    25982
Name: SUBJECT_ID, Length: 1000, dtype: Int64

### Patients and transfer information

In [None]:
admissions_sample_df = select_data(patients_subject_ids, create_query_scripts('admissions'))

In [None]:
admissions_sample_df.to_csv("./temp_sets/admissions.csv")

In [None]:
callout_sample_df = select_data(patients_subject_ids, create_query_scripts('callout'))

In [None]:
callout_sample_df.to_csv("./temp_sets/callout.csv")

In [None]:
icustays_sample_df = select_data(patients_subject_ids, create_query_scripts('icustays'))

In [None]:
icustays_sample_df.to_csv("./temp_sets/icustays.csv")

In [None]:
services_sample_df = select_data(patients_subject_ids, create_query_scripts('services'))

In [None]:
services_sample_df.to_csv("./temp_sets/services.csv")

In [None]:
transfers_sample_df = select_data(patients_subject_ids, create_query_scripts('transfers'))

In [None]:
transfers_sample_df.to_csv("./temp_sets/transfers.csv")

### ICU Information

In [None]:
caregivers_sample_df = None

In [None]:
chartevents_sample_df = select_data(patients_subject_ids, create_query_scripts('./temp_sets/chartevents'))

In [None]:
chartevents_sample_df.to_csv("./temp_sets/chartevents.csv")

In [None]:
datetimeevents_sample_df = select_data(patients_subject_ids, create_query_scripts('datetimeevents'))

In [None]:
datetimeevents_sample_df.to_csv("datetimeevents.csv")

In [None]:
inputevents_cv_sample_df = select_data(patients_subject_ids, create_query_scripts('inputevents_cv'))

In [None]:
inputevents_cv_sample_df.to_csv("inputevents_cv.csv")

In [None]:
inputevents_mv_sample_df = select_data(patients_subject_ids, create_query_scripts('inputevents_mv'))

In [None]:
inputevents_mv_sample_df.to_csv("inputevents_mv.csv")

In [None]:
outputevents_sample_df = select_data(patients_subject_ids, create_query_scripts('outputevents'))

In [None]:
outputevents_sample_df.to_csv("outputevents.csv")

In [None]:
procedureevents_mv_sample_df = select_data(patients_subject_ids, create_query_scripts('procedureevents_mv'))

In [None]:
procedureevents_mv_sample_df.to_csv("procedureevents_mv.csv")

### Hospital recording

In [None]:
cptevents_sample_df = select_data(patients_subject_ids, create_query_scripts('cptevents'))

In [None]:
cptevents_sample_df.to_csv("cptevents.csv")

In [None]:
diagnoses_icd_sample_df = select_data(patients_subject_ids, create_query_scripts('diagnoses_icd'))

In [None]:
diagnoses_icd_sample_df.to_csv("diagnoses_icd.csv")

In [None]:
drgcodes_sample_df = select_data(patients_subject_ids, create_query_scripts('drgcodes'))

In [None]:
drgcodes_sample_df.to_csv("drgcodes.csv")

In [None]:
labevents_sample_df = select_data(patients_subject_ids, create_query_scripts('labevents'))

In [None]:
labevents_sample_df.to_csv("labevents.csv")

In [None]:
microbiologyevents_sample_df = select_data(patients_subject_ids, create_query_scripts('microbiologyevents'))

In [None]:
microbiologyevents_sample_df.to_csv("microbiologyevents.csv")

In [None]:
prescriptions_sample_df = select_data(patients_subject_ids, create_query_scripts('prescriptions'))

In [None]:
prescriptions_sample_df.to_csv("prescriptions.csv")

In [None]:
procedures_icd_sample_df = select_data(patients_subject_ids, create_query_scripts('procedures_icd'))

In [None]:
procedures_icd_sample_df.to_csv("procedures_icd.csv")

## Combine samples

In [None]:
patients_df = pd.merge(random_patients_df.drop('ROW_ID', axis=1), admissions_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

In [None]:
patients_df = pd.merge(patients_df, callout_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [None]:
patients_df = pd.merge(patients_df, icustays_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [None]:
patients_df = pd.merge(patients_df, services_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [None]:
patients_df = pd.merge(patients_df, icustays_sample_df.drop(['ROW_ID', 'ICUSTAY_ID', 'DBSOURCE', 'FIRST_CAREUNIT', 'LAST_CAREUNIT', 'FIRST_WARDID', \
                                                'LAST_WARDID', 'INTIME', 'OUTTIME', 'LOS'], axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [None]:
patients_df = pd.merge(patients_df, transfers_sample_df.drop(['ROW_ID', 'ICUSTAY_ID', 'DBSOURCE', 'CURR_CAREUNIT', 'CURR_WARDID', \
                                                'INTIME', 'OUTTIME', 'LOS'], axis=1), on=['SUBJECT_ID', 'HADM_ID'], how='outer')

In [None]:
patients_df.to_csv("patients_info.csv")

---

In [None]:
df1 = pd.merge(random_patients_df.drop('ROW_ID', axis=1), datetimeevents_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

In [None]:
df2 = pd.merge(random_patients_df.drop('ROW_ID', axis=1), inputevents_cv_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

In [None]:
df3 = pd.merge(random_patients_df.drop('ROW_ID', axis=1), inputevents_mv_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

In [None]:
df4 = pd.merge(random_patients_df.drop('ROW_ID', axis=1), outputevents_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

In [None]:
df5 = pd.merge(random_patients_df.drop('ROW_ID', axis=1), procedureevents_mv_sample_df.drop('ROW_ID', axis=1), on=['SUBJECT_ID'], how='outer')

---

## Build Bayesian Network