In [2]:
from google.cloud import bigquery
from google.oauth2 import service_account

# Path to your service account key file
SERVICE_ACCOUNT_FILE = "mimic-sergi.json"
PROJECT_ID = "ogi-uci-i61"

# Create credentials and client
credentials = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE)

client = bigquery.Client(credentials=credentials, project=PROJECT_ID)

# Example query
query = "SELECT 'Hello, World!' AS greeting"
query_job = client.query(query)

# Fetch results
results = query_job.result()

for row in results:
    print(row.greeting)

Hello, World!


In [3]:
## Export ICU ICH Cohort from MIMIC-IV to CSV

## SQL query
query = """
WITH ich_admissions AS (
  SELECT DISTINCT hadm_id
  FROM `ogi-uci-i61.mimiciv_hosp.diagnoses_icd` i
  WHERE (i.icd_version = 9 AND i.icd_code = '431')
     OR (i.icd_version = 10 AND i.icd_code IN ('I610','I611','I612','I613','I614','I615','I616','I618','I619'))
),

chartevents_avg AS (
  SELECT
    c.stay_id,
    AVG(CASE WHEN itemid = 220045 AND valuenum > 0 THEN valuenum END) AS heart_rate_avg,
    AVG(CASE WHEN itemid = 220052 AND valuenum BETWEEN 30 AND 200 THEN valuenum END) AS mbp_avg,
    AVG(CASE WHEN itemid = 220210 AND valuenum BETWEEN 5 AND 60 THEN valuenum END) AS resp_rate_avg,
    AVG(CASE WHEN itemid = 220277 AND valuenum BETWEEN 70 AND 100 THEN valuenum END) AS spo2_avg,
    AVG(CASE WHEN itemid = 220602 AND valuenum IS NOT NULL THEN valuenum END) AS chloride_avg,
    AVG(CASE WHEN itemid = 220615 AND valuenum IS NOT NULL THEN valuenum END) AS creatinine_avg,
    AVG(CASE WHEN itemid = 220645 AND valuenum IS NOT NULL THEN valuenum END) AS sodium_avg
  FROM `ogi-uci-i61.mimiciv_icu.chartevents` c
  JOIN `ogi-uci-i61.mimiciv_icu.icustays` i ON c.stay_id = i.stay_id
  WHERE c.itemid IN (220045, 220052, 220210, 220277, 220602, 220615, 220645)
    AND c.valuenum IS NOT NULL
    AND c.charttime BETWEEN i.intime AND i.outtime
  GROUP BY c.stay_id
),

cbc_labs AS (
  SELECT
    subject_id,
    hadm_id,
    AVG(wbc) AS wbc_avg,
    MIN(hemoglobin) AS hgb_min
  FROM `ogi-uci-i61.mimiciv_derived.complete_blood_count`
  GROUP BY subject_id, hadm_id
),

chem_labs AS (
  SELECT
    subject_id,
    hadm_id,
    AVG(glucose) AS glucose_avg
  FROM `ogi-uci-i61.mimiciv_derived.chemistry`
  GROUP BY subject_id, hadm_id
),

coag_labs AS (
  SELECT
    subject_id,
    hadm_id,
    MAX(inr) AS inr_max,
    MAX(pt) AS pt_max
  FROM `ogi-uci-i61.mimiciv_derived.coagulation`
  GROUP BY subject_id, hadm_id
),

labevents AS (
  SELECT
    subject_id,
    hadm_id,
    AVG(CASE WHEN itemid = 51249 THEN valuenum END) AS mchc_avg,
    AVG(CASE WHEN itemid = 51254 THEN valuenum END) AS monocytes_avg,
    AVG(CASE WHEN itemid = 51256 THEN valuenum END) AS neutrophils_avg,
    AVG(CASE WHEN itemid = 50861 THEN valuenum END) AS alt_avg
  FROM `ogi-uci-i61.mimiciv_hosp.labevents`
  WHERE itemid IN (51249, 51254, 51256) AND valuenum IS NOT NULL
  GROUP BY subject_id, hadm_id
)

SELECT
  a.subject_id,
  a.hadm_id,
  a.stay_id,
  p.gender,
  p.anchor_age AS age,
  a.intime,
  a.outtime,
  a.first_careunit,
  TIMESTAMP_DIFF(a.outtime, a.intime, HOUR) AS icu_los_hours,
  TIMESTAMP_DIFF(adm.dischtime, adm.admittime, HOUR) AS hosp_los_hours,
  adm.discharge_location,
  adm.hospital_expire_flag,

  STRING_AGG(CASE WHEN LOWER(d.long_title) LIKE '%intracerebral hemorrhage%' THEN i.icd_code END, '; ') AS ich_icd_codes,
  STRING_AGG(CASE
               WHEN LOWER(d.long_title) LIKE '%intracerebral hemorrhage%' THEN CONCAT(i.icd_code, ': ', d.long_title)
               ELSE i.icd_code
             END, '; ') AS all_diagnoses,

  s.gcs_verbal,
  s.gcs_motor,
  s.gcs_eyes,

  ce.heart_rate_avg,
  ce.mbp_avg,
  ce.resp_rate_avg,
  ce.spo2_avg,
  ce.chloride_avg,
  ce.creatinine_avg,
  ce.sodium_avg,

  charlson.charlson_comorbidity_index,
  apsi.apsiii,
  cbc.wbc_avg,
  cbc.hgb_min,
  chem.glucose_avg,
  coag.inr_max,
  coag.pt_max,
  lab.mchc_avg,
  lab.monocytes_avg,
  lab.neutrophils_avg,
  lab.alt_avg,

  CASE WHEN hyper.hadm_id IS NOT NULL THEN 1 ELSE 0 END AS has_hypertension,
  CASE WHEN hydro.hadm_id IS NOT NULL THEN 1 ELSE 0 END AS has_hydrocephalus,
  CASE WHEN cad.hadm_id IS NOT NULL THEN 1 ELSE 0 END AS has_cad,
  CASE WHEN anticoag.subject_id IS NOT NULL THEN 1 ELSE 0 END AS has_anticoagulation,
  CASE WHEN neuroproc.hadm_id IS NOT NULL THEN 1 ELSE 0 END AS had_neurosurgery,
  CASE WHEN addmeds.subject_id IS NOT NULL THEN 1 ELSE 0 END AS on_statins_or_antiplatelets,

  readmit.readmitted_stay

FROM `ogi-uci-i61.mimiciv_icu.icustays` a
JOIN `ogi-uci-i61.mimiciv_hosp.patients` p ON a.subject_id = p.subject_id
JOIN `ogi-uci-i61.mimiciv_hosp.admissions` adm ON a.hadm_id = adm.hadm_id
JOIN ich_admissions ich ON a.hadm_id = ich.hadm_id

LEFT JOIN `ogi-uci-i61.mimiciv_hosp.diagnoses_icd` i ON a.hadm_id = i.hadm_id
LEFT JOIN `ogi-uci-i61.mimiciv_hosp.d_icd_diagnoses` d ON i.icd_code = d.icd_code AND i.icd_version = d.icd_version

LEFT JOIN (
  SELECT
    stay_id,
    gcs_verbal,
    gcs_motor,
    gcs_eyes,
    ROW_NUMBER() OVER (PARTITION BY stay_id ORDER BY charttime) AS rn
  FROM `ogi-uci-i61.mimiciv_derived.gcs`
) s ON a.stay_id = s.stay_id AND s.rn = 1

LEFT JOIN chartevents_avg ce ON a.stay_id = ce.stay_id

LEFT JOIN (
  SELECT
    a1.subject_id,
    a1.stay_id,
    MIN(a2.stay_id) AS readmitted_stay
  FROM `ogi-uci-i61.mimiciv_icu.icustays` a1
  JOIN `ogi-uci-i61.mimiciv_icu.icustays` a2
    ON a1.subject_id = a2.subject_id AND a2.intime > a1.outtime
  GROUP BY a1.subject_id, a1.stay_id
) readmit ON a.subject_id = readmit.subject_id AND a.stay_id = readmit.stay_id

LEFT JOIN `ogi-uci-i61.mimiciv_derived.charlson` charlson ON a.hadm_id = charlson.hadm_id
LEFT JOIN `ogi-uci-i61.mimiciv_derived.apsiii` apsi ON a.stay_id = apsi.stay_id
LEFT JOIN cbc_labs cbc ON a.subject_id = cbc.subject_id AND a.hadm_id = cbc.hadm_id
LEFT JOIN chem_labs chem ON a.subject_id = chem.subject_id AND a.hadm_id = chem.hadm_id
LEFT JOIN coag_labs coag ON a.subject_id = coag.subject_id AND a.hadm_id = coag.hadm_id
LEFT JOIN labevents lab ON a.subject_id = lab.subject_id AND a.hadm_id = lab.hadm_id

LEFT JOIN (
  SELECT DISTINCT hadm_id
  FROM `ogi-uci-i61.mimiciv_hosp.diagnoses_icd`
  WHERE icd_code LIKE '401%' OR icd_code LIKE 'I10%'
) hyper ON a.hadm_id = hyper.hadm_id

LEFT JOIN (
  SELECT DISTINCT hadm_id
  FROM `ogi-uci-i61.mimiciv_hosp.diagnoses_icd`
  WHERE icd_code LIKE '331.3' OR icd_code LIKE 'G91%'
) hydro ON a.hadm_id = hydro.hadm_id

LEFT JOIN (
  SELECT DISTINCT hadm_id
  FROM `ogi-uci-i61.mimiciv_hosp.diagnoses_icd`
  WHERE icd_code LIKE '414%' OR icd_code LIKE 'I25%'
) cad ON a.hadm_id = cad.hadm_id

LEFT JOIN (
  SELECT DISTINCT subject_id
  FROM `ogi-uci-i61.mimiciv_hosp.prescriptions`
  WHERE REGEXP_CONTAINS(LOWER(drug), r'(warfarin|heparin|apixaban|rivaroxaban|dabigatran)')
    AND drug_type = 'MAIN'
) anticoag ON a.subject_id = anticoag.subject_id

LEFT JOIN (
  SELECT DISTINCT subject_id
  FROM `ogi-uci-i61.mimiciv_hosp.prescriptions`
  WHERE REGEXP_CONTAINS(LOWER(drug), r'(aspirin|clopidogrel|atorvastatin|rosuvastatin)')
    AND drug_type = 'MAIN'
) addmeds ON a.subject_id = addmeds.subject_id

LEFT JOIN (
  SELECT DISTINCT hadm_id
  FROM `ogi-uci-i61.mimiciv_hosp.procedures_icd`
  WHERE icd_code LIKE '01%' OR icd_code LIKE '02%'
) neuroproc ON a.hadm_id = neuroproc.hadm_id

GROUP BY
  a.subject_id, a.hadm_id, a.stay_id, a.intime, a.outtime, a.first_careunit,
  p.gender, p.anchor_age,
  adm.admittime, adm.dischtime, adm.discharge_location, adm.hospital_expire_flag,
  s.gcs_verbal, s.gcs_motor, s.gcs_eyes,
  ce.heart_rate_avg, ce.mbp_avg, ce.resp_rate_avg, ce.spo2_avg, ce.chloride_avg, ce.creatinine_avg, ce.sodium_avg,
  charlson.charlson_comorbidity_index,
  apsi.apsiii,
  cbc.wbc_avg, cbc.hgb_min,
  chem.glucose_avg,
  coag.inr_max, coag.pt_max,
  lab.mchc_avg, lab.monocytes_avg, lab.neutrophils_avg, lab.alt_avg,
  hyper.hadm_id, hydro.hadm_id, cad.hadm_id,
  anticoag.subject_id, addmeds.subject_id, neuroproc.hadm_id,
  readmit.readmitted_stay
"""

# Execute Query and Export to CSV
df = client.query(query).to_dataframe()
df.to_csv("data/mimiciv_ich_readmission_raw.csv", index=False)
print("✅ Exported to mimiciv_ich_readmission_raw.csv")

df

✅ Exported to mimiciv_ich_readmission_raw.csv


Unnamed: 0,subject_id,hadm_id,stay_id,gender,age,intime,outtime,first_careunit,icu_los_hours,hosp_los_hours,...,monocytes_avg,neutrophils_avg,alt_avg,has_hypertension,has_hydrocephalus,has_cad,has_anticoagulation,had_neurosurgery,on_statins_or_antiplatelets,readmitted_stay
0,14389026,20511782,38744411,F,68,2132-04-23 19:10:00,2132-05-01 17:27:59,Neuro Intermediate,190,356,...,,,,1,0,0,1,0,1,
1,11879886,23534220,31864759,F,85,2173-05-07 18:18:00,2173-05-11 16:16:04,Trauma SICU (TSICU),94,120,...,,,,0,0,0,1,0,1,
2,14233915,20865902,32876000,M,88,2171-08-28 03:56:14,2171-08-28 13:18:06,Cardiac Vascular Intensive Care Unit (CVICU),10,110,...,4.1,75.65,,0,0,1,1,1,1,
3,19105785,28676166,35379461,M,71,2183-04-17 17:17:19,2183-04-20 19:45:42,Neuro Stepdown,74,199,...,7.6,61.35,,1,0,0,1,0,1,
4,14321092,23715132,36291653,M,65,2139-01-11 18:54:26,2139-02-05 22:51:09,Neuro Intermediate,604,1580,...,9.4,72.95,,1,1,0,1,1,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3616,10850590,24689143,33808014,F,45,2156-10-01 11:50:33,2156-10-12 18:37:04,Neuro Intermediate,271,417,...,,,,1,1,0,1,0,0,
3617,18141825,27499520,36971491,M,69,2116-11-04 03:11:00,2116-11-07 14:12:08,Neuro Intermediate,83,136,...,4.1,84.80,,1,0,0,1,0,0,
3618,12179057,23530529,37208670,F,82,2139-10-19 03:11:05,2139-10-20 07:00:51,Trauma SICU (TSICU),28,228,...,7.4,81.75,,1,0,1,1,0,1,32028609
3619,14047315,25184157,34170428,F,53,2161-05-15 17:44:02,2161-05-19 15:08:26,Surgical Intensive Care Unit (SICU),94,1025,...,5.3,86.15,,0,1,0,1,1,1,
