<a href="https://colab.research.google.com/github/DEP04929/ESICMDatathon2026/blob/main/Weaning20260120.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pre-requisites for Amsterdam UMC DB


In [None]:
# sets *your* project id
PROJECT_ID = "esicmdatathon2026" #@param {type:"string"}


In [None]:
# sets default dataset for AmsterdamUMCdb
DATASET_PROJECT_ID = 'amsterdamumcdb' #@param {type:"string"}
DATASET_ID = 'van_gogh_2026_datathon' #@param {type:"string"}
LOCATION = 'eu' #@param {type:"string"}

In [None]:
import os
from google.colab import auth

# all libraries check this environment variable, so set it:
os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID

auth.authenticate_user()
print('Authenticated')


In [None]:
%load_ext google.colab.data_table
from google.colab.data_table import DataTable

# change default limits:
DataTable.max_columns = 50
DataTable.max_rows = 30000


In [None]:
from google.cloud.bigquery import magics
from google.cloud import bigquery

# sets the default query job configuration
def_config = bigquery.job.QueryJobConfig(default_dataset=DATASET_PROJECT_ID + "." + DATASET_ID)
magics.context.default_query_job_config = def_config


In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

# Vent data detect weaning
Only very simplified checks worked. I checked for inspired pressure and TV for existence of invasive ventilation and counted hours. Failure is if any of these parameters return charting in 48 hours, or death in 48 hours.

In [None]:
%%bigquery ventpat --project $PROJECT_ID
with vent as
(
  with vt as ( select person_id, measurement_datetime
  , case when measurement_concept_id = 3007469 then 'BR'
         when measurement_concept_id = 3022875 and lower(measurement_source_value) like '%cpap%' then 'PEEPcpap'
         when measurement_concept_id = 3022875 and lower(measurement_source_value) like '%epap%' then 'PEEPepap'
         when measurement_concept_id = 3022875 then 'PEEP'
         when measurement_concept_id = 2000000209 then 'Pinspset'
         when measurement_concept_id = 2000000211 then 'Pinsp'
         when measurement_concept_id = 2000000101 then 'Fi02ECMO'
         when measurement_concept_id = 2000000204 then 'FiO2'
         when measurement_concept_id = 3012410 then 'TV'
         when measurement_concept_id = 3000461 then 'PS'
       end as concept_name
  , value_as_number
  from measurement m
  where measurement_concept_id in (
        3007469 -- Breath rate setting Ventilator
        , 3022875 -- Positive end expiratory pressure setting Ventilator
        , 2000000209 -- Inspiratory Pressure Above PEEP Set
        , 2000000211 -- Inspiratory Pressure Above PEEP
        , 2000000101 -- ECMO FiO2
        , 2000000204 -- fio2 setting
        , 3012410 -- Tidal volume setting Ventilator
        , 3000461 -- Pressure support setting Ventilator
  )
  -- and person_id = 18 -- 407 -- 18 -- testing only
  and value_as_number is not null
  )
  select * from vt
  PIVOT ( max(value_as_number) for concept_name in ('BR', 'PEEPcpap', 'PEEPepap', 'PEEP', 'Pinspset', 'Pinsp',
                                                    'Fi02ECMO', 'FiO2', 'TV', 'PS'))
  order by person_id, measurement_datetime
),
hr as (
select person_id, measurement_datetime
, value_as_number as hf_ekg
from measurement m
where measurement_concept_id in (
 	21490872 --Heart rate.beat-to-beat by EKG
)
and person_id in (select distinct person_id from vent ) -- i only am interested in vent patients
),
tube as (
  select person_id, measurement_datetime, value_as_number as tubesize
  from measurement m
  where measurement_concept_id in ( 36305611 -- Tracheostomy tube diameter
                                  , 21491186 -- Endotracheal tube Diameter
  )
  order by person_id, measurement_datetime
),
bga as (
select person_id, measurement_datetime
  ,  value_as_number as PaO2
  from measurement m
  where measurement_concept_id in (
       	3027315 -- Oxygen [Partial pressure] in Blood
        ,3027801 -- Oxygen [Partial pressure] in Arterial blood
  )
  and value_as_number is not null
  and unit_source_value = 'kPa' -- only kPa, ignore mmHg values
  and value_as_number > 0
), died as (
select distinct person_id, death_datetime
  from death
  where person_id in (select person_id from vent)
)
, ventall as (
-- make a summary of all values before proceeding
select hr.person_id, hr.measurement_datetime, hr.hf_ekg
, vent.BR, vent.PEEPcpap, vent.PEEPepap, vent.PEEP, vent.Pinspset, vent.Pinsp
, vent.Fi02ECMO, vent.FiO2, vent.TV, vent.PS
, bga.PaO2,
case when bga.PaO2 is not null and vent.Fio2 >0
        then ( bga.PaO2*100.0 / vent.Fio2)  else null end as pfratio_kPa
, tube.tubesize, died.death_datetime
, case when date_diff(died.death_datetime, vent.measurement_datetime, DAY) <=2
      then 1 else 0 end as death_in_48hrs
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 MINUTE)
                                and timestamp_add(vent.measurement_datetime, interval 2 HOUR )
        and (b.FiO2 >0 or b.PEEP >0 or b.TV > 0) -- any vent?
                                ) as check2hrs
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 MINUTE)
                                and timestamp_add(vent.measurement_datetime, interval 6 HOUR )
        and (b.Pinspset >0 or b.Pinsp >0 or b.TV > 0) -- any vent?
                                ) as check6hrs
-- problems with very short stays < 48 hours
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 MINUTE)
                                and timestamp_add(vent.measurement_datetime, interval 48 HOUR )
        and (b.Pinspset >0 or b.Pinsp >0 or b.TV > 0 -- resume vent?
              or tube.tubesize >0 ) -- tube in 48hrs?
                                ) as check48hrs
from hr
left outer join vent
on vent.person_id = hr.person_id
and vent.measurement_datetime = hr.measurement_datetime
left outer join bga
on hr.person_id = bga.person_id
and bga.measurement_datetime between timestamp_add(hr.measurement_datetime , INTERVAL -30 MINUTE)
            and timestamp_add(hr.measurement_datetime , INTERVAL 30 MINUTE)
left outer join tube
on hr.person_id = tube.person_id
and hr.measurement_datetime between timestamp_add(tube.measurement_datetime , INTERVAL -30 MINUTE)
            and TIMESTAMP_ADD(tube.measurement_datetime , INTERVAL 30 MINUTE)
left outer join died
on hr.person_id = died.person_id
)
select a.*
, case when check48hrs > 1 or death_in_48hrs = 1
       then 'Failed' else 'Success' end as outcome
from ventall a
where check2hrs = 1 and check6hrs = 1  --this means weaning for 2-6 hrs.
order by person_id, measurement_datetime


# Validation of assumptions


I found 12620 possible weaning episodes.

In [None]:
ventpat.describe()

# ? Success vs Failure weaning plausibility check

Check weaning outcomes assumption plausibility

In [None]:
outcome_counts = ventpat['outcome'].value_counts()

plt.figure(figsize=(8, 8))
plt.pie(outcome_counts, labels=outcome_counts.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette('pastel'))
plt.title('Distribution of Weaning Outcomes (Failed vs. Success)')
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()


# ? Check cases for plausibility

In [None]:
# @title why tubesize so rare?

ventpat[ventpat['tubesize']>1].head(100)
# ventpat[ventpat['person_id']==45453]  # random check

In [None]:
# @title Trace one patient for plausibilty
%%bigquery test --project $PROJECT_ID
with vent as
(
  with vt as ( select person_id, measurement_datetime
  , case when measurement_concept_id = 3007469 then 'BR'
         when measurement_concept_id = 3022875 and lower(measurement_source_value) like '%cpap%' then 'PEEPcpap'
         when measurement_concept_id = 3022875 and lower(measurement_source_value) like '%epap%' then 'PEEPepap'
         when measurement_concept_id = 3022875 then 'PEEP'
         when measurement_concept_id = 2000000209 then 'Pinspset'
         when measurement_concept_id = 2000000211 then 'Pinsp'
         when measurement_concept_id = 2000000101 then 'Fi02ECMO'
         when measurement_concept_id = 2000000204 then 'FiO2'
         when measurement_concept_id = 3012410 then 'TV'
         when measurement_concept_id = 3000461 then 'PS'
       end as concept_name
  , value_as_number
  from measurement m
  where measurement_concept_id in (
        3007469 -- Breath rate setting Ventilator
        , 3022875 -- Positive end expiratory pressure setting Ventilator
        , 2000000209 -- Inspiratory Pressure Above PEEP Set
        , 2000000211 -- Inspiratory Pressure Above PEEP
        , 2000000101 -- ECMO FiO2
        , 2000000204 -- fio2 setting
        , 3012410 -- Tidal volume setting Ventilator
        , 3000461 -- Pressure support setting Ventilator
  )

  /************** please set the person_id for checking purpose *****/
  and person_id = 22938
  /******************************************************************/
  and value_as_number is not null
  )
  select * from vt
  PIVOT ( max(value_as_number) for concept_name in ('BR', 'PEEPcpap', 'PEEPepap', 'PEEP', 'Pinspset', 'Pinsp',
                                                    'Fi02ECMO', 'FiO2', 'TV', 'PS'))
  order by person_id, measurement_datetime
),
hr as (
select person_id, measurement_datetime
, value_as_number as hf_ekg
from measurement m
where measurement_concept_id in (
 	21490872 --Heart rate.beat-to-beat by EKG
)
and person_id in (select distinct person_id from vent ) -- i only am interested in vent patients
),
tube as (
  select person_id, measurement_datetime, value_as_number as tubesize
  from measurement m
  where measurement_concept_id in ( 36305611 -- Tracheostomy tube diameter
                                  , 21491186 -- Endotracheal tube Diameter
  )
  order by person_id, measurement_datetime
),
bga as (
select person_id, measurement_datetime
  ,  value_as_number as PaO2
  from measurement m
  where measurement_concept_id in (
       	3027315 -- Oxygen [Partial pressure] in Blood
        ,3027801 -- Oxygen [Partial pressure] in Arterial blood
  )
  and value_as_number is not null
  and unit_source_value = 'kPa' -- only kPa, ignore mmHg values
  and value_as_number > 0
), died as (
select distinct person_id, death_datetime
  from death
  where person_id in (select person_id from vent)
)
, ventall as (
-- make a summary of all values before proceeding
select hr.person_id, hr.measurement_datetime, hr.hf_ekg
, vent.BR, vent.PEEPcpap, vent.PEEPepap, vent.PEEP, vent.Pinspset, vent.Pinsp
, vent.Fi02ECMO, vent.FiO2, vent.TV, vent.PS
, bga.PaO2,
case when bga.PaO2 is not null and vent.Fio2 >0
        then ( bga.PaO2*100.0 / vent.Fio2)  else null end as pfratio_kPa
, tube.tubesize, died.death_datetime
, case when date_diff(died.death_datetime, vent.measurement_datetime, DAY) <=2
      then 1 else 0 end as death_in_48hrs
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 MINUTE)
                                and timestamp_add(vent.measurement_datetime, interval 2 HOUR )
        and (b.FiO2 >0 or b.PEEP >0 or b.TV > 0) -- any vent?
                                ) as check2hrs
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 MINUTE)
                                and timestamp_add(vent.measurement_datetime, interval 6 HOUR )
        and (b.Pinspset >0 or b.Pinsp >0 or b.TV > 0) -- any vent?
                                ) as check6hrs
-- problems with very short stays < 48 hours
, (select count(*)
        from vent b
        where vent.person_id = b.person_id
        and b.measurement_datetime between timestamp_add( vent.measurement_datetime, interval 1 MINUTE)
                                and timestamp_add(vent.measurement_datetime, interval 48 HOUR )
        and (b.Pinspset >0 or b.Pinsp >0 or b.TV > 0 -- resume vent?
              or tube.tubesize >0 ) -- tube in 48hrs?
                                ) as check48hrs
from hr
left outer join vent
on vent.person_id = hr.person_id
and vent.measurement_datetime = hr.measurement_datetime
left outer join bga
on hr.person_id = bga.person_id
and bga.measurement_datetime between timestamp_add(hr.measurement_datetime , INTERVAL -30 MINUTE)
            and timestamp_add(hr.measurement_datetime , INTERVAL 30 MINUTE)
left outer join tube
on hr.person_id = tube.person_id
and hr.measurement_datetime between timestamp_add(tube.measurement_datetime , INTERVAL -30 MINUTE)
            and TIMESTAMP_ADD(tube.measurement_datetime , INTERVAL 30 MINUTE)
left outer join died
on hr.person_id = died.person_id
)
select a.*
, case when  check2hrs = 1 or check6hrs = 1 then 1 end as possiblewean
, case when check48hrs > 1 then 1 end as invasivevent
from ventall a
order by person_id, measurement_datetime


In [None]:
test

In [None]:
%%bigquery test --project $PROJECT_ID
select * from visit_occurrence
where person_id = 22938



In [None]:
test

# ? sedation drugs
I don't know how to detect if the sedatives were reduced. The drug administration is a value over a date range. Do I assume equal distribution?

In [None]:
%%bigquery test --project $PROJECT_ID
select c.concept_id, c.concept_name, d.drug_source_value, quantity, dose_unit_source_value
, d.drug_exposure_start_datetime, d.drug_exposure_end_datetime
from drug_exposure d inner join concept c
on d.drug_concept_id = c.concept_id
where ( drug_exposure_start_datetime < '2018-12-08 14:00:00+00:00'
     or drug_exposure_end_datetime > '2018-12-08 15:46:00+00:00'  )
  and person_id = 22938
order by drug_exposure_start_datetime


In [None]:
test