In [2]:
import pandas as pd
import os
from sqlalchemy import create_engine, text
from sqlalchemy.engine import URL

from google.cloud import bigquery
from google.oauth2 import service_account


# Retrieve credentials from environment variables
PG_HOST = os.getenv('PG_HOST')
PG_PORT = int(os.getenv('PG_PORT'))  # Ensure port is an integer
PG_DATABASE = os.getenv('PG_DATABASE')
PG_USER = os.getenv('PG_USER')
PG_PASSWORD = os.getenv('PG_PASSWORD')

# Create the URL object, including the port
url = URL.create(
    drivername="postgresql",
    username=PG_USER,
    password=PG_PASSWORD,
    host=PG_HOST,
    port=PG_PORT,
    database=PG_DATABASE
)

# Create the engine

engine = create_engine(url)


In [3]:
from google.cloud import bigquery

# Initialize the BigQuery client using ADC
client = bigquery.Client()

# Test the connection by listing your BigQuery datasets
datasets = list(client.list_datasets())
project = client.project

if datasets:
    print(f"Datasets in project {project}:")
    for dataset in datasets:
        print(f"\t{dataset.dataset_id}")
else:
    print(f"No datasets found in project {project}.")


Datasets in project physionet-data-435019:
	mimiciii


In [13]:
client = bigquery.Client()

# Define your query
query = client.query("""
SELECT C.SUBJECT_ID,ITEM.LABEL, AVG(C.VALUENUM) AS AVG_VALUE
FROM `physionet-data.mimiciii_clinical.chartevents` AS C
INNER JOIN `physionet-data.mimiciii_clinical.diagnoses_icd` AS D ON D.SUBJECT_ID = C.SUBJECT_ID
JOIN `physionet-data.mimiciii_clinical.d_items` AS ITEM ON ITEM.ITEMID = C.ITEMID
WHERE D.ICD9_CODE IN ('99591', '99592') AND (C.ITEMID  IN (211, 220045))
GROUP BY C.SUBJECT_ID, ITEM.LABEL
ORDER BY C.SUBJECT_ID ASC
""")

# Execute the query and convert results to a DataFrame
sepsis_admissions_df = query.to_dataframe()

sepsis_admissions_df.head()

Unnamed: 0,SUBJECT_ID,LABEL,AVG_VALUE
0,21,Heart Rate,76.756098
1,38,Heart Rate,88.505956
2,61,Heart Rate,92.366972
3,62,Heart Rate,52.927711
4,64,Heart Rate,95.223684


## Patient Query

In [20]:
# Define the query using text()
query = text("""SELECT DISTINCT
    p.SUBJECT_ID,
    p.GENDER,
    p.DOB,
    p.DOD,
    p.EXPIRE_FLAG,
    a.HADM_ID,
    a.ADMITTIME,
    a.DISCHTIME,
    a.ADMISSION_TYPE,
    EXTRACT(YEAR FROM AGE(a.ADMITTIME, p.DOB)) AS AGE_AT_ADMISSION
FROM
    mimiciii.patients p
JOIN
    mimiciii.diagnoses_icd d
    ON p.SUBJECT_ID = d.SUBJECT_ID
JOIN
    mimiciii.admissions a
    ON d.HADM_ID = a.HADM_ID
WHERE
    d.ICD9_CODE IN ('99591', '99592');
""")

# Query and load data into a DataFrame using a context manager for connection
with engine.connect() as connection:
    sepsisPatient_df = pd.read_sql_query(query, connection)

# Show the first few rows of the DataFrame
sepsisPatient_df.head()

Unnamed: 0,subject_id,gender,dob,dod,expire_flag,hadm_id,admittime,dischtime,admission_type,age_at_admission
0,88081,F,2063-11-20,2138-10-29,1,113344,2138-09-12 20:44:00,2138-10-29 19:50:00,EMERGENCY,74.0
1,17112,F,2027-08-04,2105-05-17,1,121197,2105-01-10 12:34:00,2105-01-16 16:50:00,EMERGENCY,77.0
2,48453,M,2143-06-15,2201-01-11,1,183977,2200-11-15 22:15:00,2200-12-22 16:22:00,EMERGENCY,57.0
3,18471,M,2099-08-14,2175-07-23,1,150976,2175-07-14 02:36:00,2175-07-23 02:30:00,EMERGENCY,75.0
4,20385,F,2085-12-02,NaT,0,177271,2167-03-28 05:11:00,2167-04-10 16:15:00,EMERGENCY,81.0


## Heart Rate Query

In [16]:
# Define heart rate ITEMIDs (replace with actual ITEMIDs)
HEART_RATE_ITEMIDS = (211, 220045)  # Example ITEMIDs for heart rate

# Define the comprehensive SQL query
comprehensive_query = text(f"""
WITH sepsis_patients AS (
    SELECT DISTINCT
        p.SUBJECT_ID,
        p.GENDER,
        p.DOB,
        p.DOD,
        p.EXPIRE_FLAG,
        a.HADM_ID,
        a.ADMITTIME,
        a.DISCHTIME,
        a.ADMISSION_TYPE,
        EXTRACT(YEAR FROM AGE(a.ADMITTIME, p.DOB)) AS AGE_AT_ADMISSION
    FROM
        mimiciii.patients p
    JOIN
        mimiciii.diagnoses_icd d
        ON p.SUBJECT_ID = d.SUBJECT_ID
    JOIN
        mimiciii.admissions a
        ON d.HADM_ID = a.HADM_ID
    WHERE
        d.ICD9_CODE IN ('99591', '99592')
),
heart_rate_measurements AS (
    SELECT
        c.HADM_ID,
        c.VALUE AS heart_rate
    FROM
        mimiciii.chartevents c
    WHERE
        c.ITEMID IN {HEART_RATE_ITEMIDS}
        AND c.VALUE IS NOT NULL
        AND c.VALUE ~ '^[0-9]+$'  -- Ensures that VALUE is numeric
),
aggregated_heart_rate AS (
    SELECT
        hr.HADM_ID,
        AVG(CAST(hr.heart_rate AS FLOAT)) AS mean_heart_rate,
        COUNT(hr.heart_rate) AS measured_values,
        SUM(CASE WHEN hr.heart_rate IS NULL THEN 1 ELSE 0 END) AS missing_values
    FROM
        heart_rate_measurements hr
    GROUP BY
        hr.HADM_ID
)
SELECT
    sp.subject_id,
    ah.mean_heart_rate,
    ah.measured_values,
    ah.missing_values
FROM
    sepsis_patients sp
LEFT JOIN
    aggregated_heart_rate ah
    ON sp.HADM_ID = ah.HADM_ID
ORDER BY
    sp.SUBJECT_ID,
    sp.ADMITTIME
""")


# Execute the query and load data into a DataFrame using a context manager for connection
with engine.connect() as connection:
    sepsis_with_hr_df = pd.read_sql_query(comprehensive_query, connection)

# Display the first few rows of the DataFrame
print("Sepsis Patients with Heart Rate Data:")
sepsis_with_hr_df.head()


Sepsis Patients with Heart Rate Data:


Unnamed: 0,subject_id,mean_heart_rate,measured_values,missing_values
0,21,76.867841,227.0,0.0
1,38,88.488226,637.0,0.0
2,61,89.208333,48.0,0.0
3,62,52.927711,83.0,0.0
4,64,95.223684,76.0,0.0


In [19]:
heart_rate_query = text(f"""
SELECT C.SUBJECT_ID, AVG(C.VALUENUM) AS HEARTRATE_MEAN
FROM mimiciii.chartevents AS C
INNER JOIN mimiciii.diagnoses_icd AS D ON D.SUBJECT_ID = C.SUBJECT_ID
JOIN mimiciii.d_items AS ITEM ON ITEM.ITEMID = C.ITEMID
WHERE D.ICD9_CODE IN ('99591', '99592') AND (C.ITEMID  IN (211, 220045))
GROUP BY C.SUBJECT_ID, ITEM.LABEL
ORDER BY C.SUBJECT_ID ASC
""")

with engine.connect() as connection:
    patient_heart_rate = pd.read_sql_query(heart_rate_query, connection)
    
patient_heart_rate.head()

Unnamed: 0,subject_id,heartrate_mean
0,21,76.756098
1,38,88.505956
2,61,92.366972
3,62,52.927711
4,64,95.223684


In [21]:
merge = pd.merge(sepsisPatient_df, patient_heart_rate, how='left', on='subject_id')

merge.head()

Unnamed: 0,subject_id,gender,dob,dod,expire_flag,hadm_id,admittime,dischtime,admission_type,age_at_admission,heartrate_mean
0,88081,F,2063-11-20,2138-10-29,1,113344,2138-09-12 20:44:00,2138-10-29 19:50:00,EMERGENCY,74.0,101.286408
1,17112,F,2027-08-04,2105-05-17,1,121197,2105-01-10 12:34:00,2105-01-16 16:50:00,EMERGENCY,77.0,90.204004
2,48453,M,2143-06-15,2201-01-11,1,183977,2200-11-15 22:15:00,2200-12-22 16:22:00,EMERGENCY,57.0,102.121698
3,18471,M,2099-08-14,2175-07-23,1,150976,2175-07-14 02:36:00,2175-07-23 02:30:00,EMERGENCY,75.0,84.683168
4,20385,F,2085-12-02,NaT,0,177271,2167-03-28 05:11:00,2167-04-10 16:15:00,EMERGENCY,81.0,75.5
