<a href="https://colab.research.google.com/github/DEP04929/ESICMDatathon2026/blob/main/Weaning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pre-requisites for Amsterdam UMC DB


In [None]:
# sets *your* project id
PROJECT_ID = "esicmdatathon2026" #@param {type:"string"}


In [None]:
# sets default dataset for AmsterdamUMCdb
DATASET_PROJECT_ID = 'amsterdamumcdb' #@param {type:"string"}
DATASET_ID = 'van_gogh_2026_datathon' #@param {type:"string"}
LOCATION = 'eu' #@param {type:"string"}

In [None]:
import os
from google.colab import auth

# all libraries check this environment variable, so set it:
os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID

auth.authenticate_user()
print('Authenticated')


In [None]:
%load_ext google.colab.data_table
from google.colab.data_table import DataTable

# change default limits:
DataTable.max_columns = 50
DataTable.max_rows = 30000


In [None]:
from google.cloud.bigquery import magics
from google.cloud import bigquery

# sets the default query job configuration
def_config = bigquery.job.QueryJobConfig(default_dataset=DATASET_PROJECT_ID + "." + DATASET_ID)
magics.context.default_query_job_config = def_config


In [None]:
import pandas as pd
import numpy as np

import matplotlib as plt
import seaborn as sns
sns.set_style('darkgrid')

# Vent data detect weaning
Only very simplified checks worked. I checked for PEEP, FiO2 and TV for existence of ventilation and counted hours.

In [None]:
%%bigquery ventpat --project $PROJECT_ID
with vent as
(
  with vt as ( select person_id, measurement_datetime
  , case when measurement_concept_id = 3022875 then 'PEEP'
       when measurement_concept_id in (3025408, 2000000204) then 'FiO2'
       when measurement_concept_id = 3012410 then 'TV'
       -- when measurement_concept_id = 2000000250 then 'CPAP'
       end as concept_name
  , value_as_number
  from measurement m
  where measurement_concept_id in (
        3022875 -- Positive end expiratory pressure setting Ventilator
        , 3025408 -- Oxygen/Inspired gas Respiratory system by O2 Analyzer --on ventilator
        , 2000000204 -- fio2 setting
        , 3012410 -- Tidal volume setting Ventilator
       -- , 2000000250 -- Ventilation mode Ventilator non-invasive, very few entries, not used!
  )
  -- and person_id = 18 -- 407 -- 18 -- testing only
  and value_as_number is not null
  )
  select * from vt
  PIVOT ( max(value_as_number) for concept_name in ('PEEP', 'FiO2', 'TV'))
  order by person_id, measurement_datetime
),
hr as (
select person_id, measurement_datetime
, value_as_number as hf_ekg
from measurement m
where measurement_concept_id in (
 	21490872 --Heart rate.beat-to-beat by EKG
)
and person_id in (select distinct person_id from vent ) -- i only am interested in vent patients
),
tube as (
  select person_id, measurement_datetime, value_as_number as tubesize
  from measurement m
  where measurement_concept_id in ( 36305611 -- Tracheostomy tube diameter
                                  , 21491186 -- Endotracheal tube Diameter
  )
  order by person_id, measurement_datetime
),
bga as (
select person_id, measurement_datetime
  ,  value_as_number as PaO2
  from measurement m
  where measurement_concept_id in (
       	3027315 -- Oxygen [Partial pressure] in Blood
        ,3027801 -- Oxygen [Partial pressure] in Arterial blood
  )
  and value_as_number is not null
  and unit_source_value = 'kPa' -- only kPa, ignore mmHg values
  and value_as_number > 0
), died as (
select distinct person_id, death_datetime
  from death
  where person_id in (select person_id from vent)
)
, ventall as (
-- make a summary of all values before proceeding
select hr.person_id, hr.measurement_datetime, hr.hf_ekg
, vent.PEEP, vent.FiO2, vent.TV
, bga.PaO2,
case when bga.PaO2 is not null and vent.Fio2 >0
        then ( bga.PaO2*100.0 / vent.Fio2)  else null end as pfratio
, tube.tubesize, died.death_datetime
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 minute)
                                and timestamp_add(vent.measurement_datetime, interval 2 HOUR )
        and (b.FiO2 >0 or b.PEEP >0 or b.TV > 0) -- any vent?
                                ) as check2hrs
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 minute)
                                and timestamp_add(vent.measurement_datetime, interval 6 HOUR )
        and (b.FiO2 >0 or b.PEEP >0 or b.TV > 0) -- any vent?
                                ) as check6hrs
-- problems with very short stays < 48 hours
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 minute)
                                and timestamp_add(vent.measurement_datetime, interval 48 HOUR )
        and (b.FiO2 >0 or b.PEEP >0 or b.TV > 0 -- resume vent?
              or date_diff(died.death_datetime, vent.measurement_datetime, DAY) <=2  -- death in 48hrs?
              or tube.tubesize >0 ) -- tube in 48hrs?
                                ) as check48hrs
from hr
left outer join vent
on vent.person_id = hr.person_id
and vent.measurement_datetime = hr.measurement_datetime
left outer join bga
on hr.person_id = bga.person_id
and bga.measurement_datetime between timestamp_add(hr.measurement_datetime , INTERVAL -30 Minute)
            and  timestamp_add(hr.measurement_datetime , INTERVAL 30 MINUTE)
left outer join tube
on hr.person_id = tube.person_id
and hr.measurement_datetime = tube.measurement_datetime
left outer join died
on hr.person_id = died.person_id
)
select a.*
from ventall a
where (check2hrs = 1 and check6hrs = 1)  --this means weaning for 2-6 hrs.
order by person_id, measurement_datetime


These are the weaning episodes.

In [None]:
# ventpat[ventpat['person_id']==63325]  # random check
ventpat.head(100)

# Validation of assumptions
Problems with very short stays. Count becomes also zero when no more entries are found.

In [None]:
%%bigquery test --project $PROJECT_ID
with vent as
(
  with vt as ( select person_id, measurement_datetime
  , case when measurement_concept_id = 3022875 then 'PEEP'
       when measurement_concept_id in (3025408, 2000000204) then 'FiO2'
       when measurement_concept_id = 3012410 then 'TV'
       -- when measurement_concept_id = 2000000250 then 'CPAP'
       end as concept_name
  , value_as_number
  from measurement m
  where measurement_concept_id in (
        3022875 -- Positive end expiratory pressure setting Ventilator
        , 3025408 -- Oxygen/Inspired gas Respiratory system by O2 Analyzer --on ventilator
        , 2000000204 -- fio2 setting
        , 3012410 -- Tidal volume setting Ventilator
       -- , 2000000250 -- Ventilation mode Ventilator non-invasive, very few entries, not used!
  )
  and person_id = 87 -- testing only 63325  407 18
  and value_as_number is not null
  )
  select * from vt
  PIVOT ( max(value_as_number) for concept_name in ('PEEP', 'FiO2', 'TV'))
  order by person_id, measurement_datetime
),
hr as (
select person_id, measurement_datetime
, value_as_number as hf_ekg
from measurement m
where measurement_concept_id in (
 	21490872 --Heart rate.beat-to-beat by EKG
)
and person_id in (select distinct person_id from vent ) -- i only am interested in vent patients
order by person_id, measurement_datetime
),
tube as (
  select person_id, measurement_datetime, value_as_number as tubesize
  from measurement m
  where measurement_concept_id in ( 36305611 -- Tracheostomy tube diameter
                                  , 21491186 -- Endotracheal tube Diameter
  )
  order by person_id, measurement_datetime
),
bga as (
select person_id, measurement_datetime
  ,  value_as_number as PaO2
  from measurement m
  where measurement_concept_id in (
       	3027315 -- Oxygen [Partial pressure] in Blood
        ,3027801 -- Oxygen [Partial pressure] in Arterial blood
  )
  and value_as_number is not null
  and unit_source_value = 'kPa' -- only kPa, ignore mmHg values
  and value_as_number > 0
  order by person_id, measurement_datetime
), died as (
select distinct person_id, death_datetime
  from death
  where person_id in (select person_id from vent)
  order by person_id
)
-- make a summary of all values before proceeding
select hr.person_id, hr.measurement_datetime, hr.hf_ekg
, vent.PEEP, vent.FiO2, vent.TV
, bga.PaO2,
case when bga.PaO2 is not null and vent.Fio2 >0
        then ( bga.PaO2*100.0 / vent.Fio2)  else null end as pfratio
, tube.tubesize, died.death_datetime
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 minute)
                                and timestamp_add(vent.measurement_datetime, interval 2 HOUR )
        and (b.FiO2 >0 or b.PEEP >0 or b.TV > 0) -- any vent?
                                ) as check2hrs
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 minute)
                                and timestamp_add(vent.measurement_datetime, interval 6 HOUR )
        and (b.FiO2 >0 or b.PEEP >0 or b.TV > 0) -- any vent?
                                ) as check6hrs
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 minute)
                                and timestamp_add(vent.measurement_datetime, interval 48 HOUR )
        and (b.FiO2 >0 or b.PEEP >0 or b.TV > 0 -- resume vent?
              or date_diff(died.death_datetime, vent.measurement_datetime, DAY) <=2  -- death in 48hrs?
              or tube.tubesize >0 ) -- tube in 48hrs?
                                ) as check48hrs
from hr
left outer join vent
on vent.person_id = hr.person_id
and vent.measurement_datetime = hr.measurement_datetime
left outer join bga
on hr.person_id = bga.person_id
and bga.measurement_datetime between timestamp_add(hr.measurement_datetime , INTERVAL -30 Minute)
            and  timestamp_add(hr.measurement_datetime , INTERVAL 30 MINUTE)
left outer join tube
on hr.person_id = tube.person_id
and hr.measurement_datetime = tube.measurement_datetime
left outer join died
on hr.person_id = died.person_id
order by person_id, measurement_datetime

In [None]:
# [test['measurement_datetime']>'2010-08-22 16:49:00+00:00']
test