# Cohort selection

- Inclusion criteria: > 18y, ischemic stroke, inpatient/non-transferred, not refusing to participate
- Exclusion criteria: < 12h, hospitalisation > 7d after stroke onset


Requirements:
* MIMIC-III in a PostgreSQL database


In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2

# below imports are used to print out pretty pandas dataframes
from IPython.display import display, HTML

%matplotlib inline
plt.style.use('ggplot')

# information used to create a database connection
sqluser = 'postgres'
sqlpassword = 'postgres'
dbname = 'mimic'
schema_name = 'mimiciii'

# Connect to postgres with a copy of the MIMIC-III database
# con = psycopg2.connect(dbname=dbname, user=sqluser)
con = psycopg2.connect(dbname=dbname, user=sqluser, password=sqlpassword, port=5000, host='localhost')

# the below statement is prepended to queries to ensure they select from the right schema
query_schema = 'set search_path to ' + schema_name + ';'

Cohort selection will begin with three tables: *patients*, *admissions*, and *icustays*:

* *patients*: information about a patient that does not change - e.g. date of birth, genotypical sex
* *admissions*: information recorded on hospital admission - admission type (elective, emergency), time of admission
* *icustays*: information recorded on intensive care unit admission - primarily admission and discharge time

As MIMIC-III is primarily an intensive care unit (ICU) database, the focus will be on patients admitted to and discharged from the ICU. That is, rather than selecting our cohort based off the individual patient (identified by `subject_id` in the database), we will usually want to select our cohort based off the ICU stay (identified by `icustay_id`).

# Exclude base on length of stay, first ICU stay, age

In [None]:
query = query_schema + """
WITH co AS
(
SELECT icu.subject_id, icu.hadm_id, icu.icustay_id
, EXTRACT(EPOCH FROM outtime - intime)/60.0/60.0 as icu_length_of_stay_h
, EXTRACT('epoch' from icu.intime - pat.dob) / 60.0 / 60.0 / 24.0 / 365.242 as age
, RANK() OVER (PARTITION BY icu.subject_id ORDER BY icu.intime) AS icustay_id_order
FROM icustays icu
INNER JOIN patients pat
  ON icu.subject_id = pat.subject_id
LIMIT 100
)
SELECT
  co.subject_id, co.hadm_id, co.icustay_id, co.icu_length_of_stay_h
  , co.age
  , co.icustay_id_order
  , CASE
        WHEN co.icu_length_of_stay_h < 12 then 1
    ELSE 0 END
    AS exclusion_los
  , CASE
        WHEN co.age < 18 then 1
    ELSE 0 END
    AS exclusion_age
  , CASE 
        WHEN co.icustay_id_order != 1 THEN 1
    ELSE 0 END 
    AS exclusion_first_stay
FROM co
"""
df = pd.read_sql_query(query, con)
df

# Exclusion based on discharge diagnosis

ischemic stroke (ICD-9-CM codes 433, 434 and 436)

In [None]:
query = query_schema + """
WITH co AS
(
SELECT icu.subject_id, icu.hadm_id, icu.icustay_id, dx.icd9_code
, EXTRACT(EPOCH FROM outtime - intime)/60.0/60.0 as icu_length_of_stay_h
, EXTRACT('epoch' from icu.intime - pat.dob) / 60.0 / 60.0 / 24.0 / 365.242 as age
, RANK() OVER (PARTITION BY icu.subject_id ORDER BY icu.intime) AS icustay_id_order
FROM icustays icu
INNER JOIN patients pat
  ON icu.subject_id = pat.subject_id
INNER JOIN diagnoses_icd dx
  ON icu.hadm_id = dx.hadm_id
LIMIT 10
)
SELECT
  co.subject_id, co.hadm_id, co.icustay_id, co.icu_length_of_stay_h
  , co.age
  , co.icustay_id_order
  , co.icd9_code
  , CASE
        WHEN co.icu_length_of_stay_h < 12 then 1
    ELSE 0 END
    AS exclusion_los
  , CASE
        WHEN co.age < 18 then 1
    ELSE 0 END
    AS exclusion_age
  , CASE
        WHEN co.icustay_id_order != 1 THEN 1
    ELSE 0 END
    AS exclusion_first_stay
  , CASE
        WHEN co.icd9_code LIKE '433%' OR co.icd9_code LIKE '434%' OR co.icd9_code LIKE '436%'  THEN 0
    ELSE 1 END
    AS exclusion_discharge_diagnosis
FROM co
"""
df = pd.read_sql_query(query, con)
df

# Exclusion based on admission diagnosis

- Find if admission diagnosis is part of selected strings
- Find if admission diagnosis contains: stroke or CVA

In [None]:
stroke_admission_diagnosis_equivalents = [
    'STROKE;TELEMETRY;TRANSIENT ISCHEMIC ATTACK',
    'STROKE/TIA',
    'CEREBROVASCULAR ACCIDENT',
    'STROKE;TELEMETRY',
    'STROKE;TELEMETRY;TELEMETRY',
    'STROKE',
    'STROKE-TRANSIENT ISCHEMIC ATTACK',
    'CEREBRAL VASCULAR ACCIDENT',
    'BRAIN STEM STROKE',
    'TRANSIENT ISCHEMIC ATTACK',
    'CEREBELLAR INFARCT',
    'ACUTE STROKE',
    'STROKE;TELEMETRY; TIA',
    'STROKE;TIA',
    'RIGHT FACIAL DROOP',
    'STROKE;TRANSIENT ISCHEMIC ATTACK',
    'ARTERIAL OCCLUSION',
    'R/O STROKE',
    'CEREBROVASCULAR ACCIDENT;TELEMETRY',
    'STROKE,TRANSIENT ISCHEMIC ATTACK',
    'LEFT CEBELLAR ISCHEMIC STROKE',
    'CEREBELLAR INFACTS',
    'STROKE;TELEMETRY;TELEMETRY;TELEMETRY',
    'UNRESPONSIVENESS, CVA VS. SEIZURE',
    'NEW CVA',
    'LEFT OCCIPITAL STROKE',
    'AORTIC DISSECTION;STROKE',
    'CEREBRAL VASCULAR ACCIDENT;HYPERCARBIA',
    'LT ARM WEAKNESS;R/P CVA',
    'STROKE;RHABDO,URINARY TRACT INFECTION',
    'SRTROKE/TIA',
    'CONFUDION, TEMPORAL MASS/STROKE',
    'CEREBELUM STROKE',
    'ACUTE CVA',
    'ISCHEMIC STROKE',
    'STROKE WITH HEAD BLEED'
    'VERTEBRAL DISSECTION;STROKE;TELEMETRY',
    'S/P STROKE',
    'TIA',
    'STROKE;TELEMETRY; TRANSIENT ISCHEMIC ATTACK',
    'APHASIA',
    'RIGHT SIDED WEAKNESS;ANEMIA',
    'STROKE;TELEMETRY;TRANSIENT ISCHEMIC ATTACK;TELEMETRY',
    'ACUTE ISCHEMIC STROKE',
    'DEEP VEIN THROMBOSIS;LEFT LEG WEAKNESS',
    'RIGHT MCA STROKE',
    'ACUTE STROKE;TELEMETRY',
    'CORONARY ARTERY DISEASE;TRANSIENT ISCHEMIC ATTACK',
    'MCA;STROKE',
    'STROKE,MI',
    'MCA STOKE S/P THROMBOLYSIS',
    'CVA, BRADYCARDIA',
    'LEFT PCA STROKE',
    'THROMBOTIC THROMBOCYTOPENIC PUPURA;? STROKE',
    'STROKE /TIA',
    'BILATERAL CEREBELLAR STROKE',
    'CEREBELLAR INFARCTION',
    'ACUTE CVA;TELEMETRY'
    '? CVA',
    'STROKE/ TIA',
    'CEREBELLAR CVA',
    'CEREBRAL VASCULAR ACCIDENT VS TRANSIENT ISCHEMIC ATTACK',
    'CVA;HYPOTENSION',
    'L SIDED WEAKNESS',
    'CEREBROVASCULAR ACCIDENT;CAROTID DISSECTION',
    'CEREBROVASCULAR ACCIDENT;FEV',
    'STRUKE',
    'RIGHT LEG WEAKNESS;DEMENTIA',
    'BRAIN STEM CVA',
    'RIGHT ARM  WEAKNESS',
    'RT MCA STROKE',
    'UNRESPONSIVE/? STROKE',
    'STROKE; TIA',
    'MCA STROKE',
    'CEREBELLER STOKE',
    'CVA;TELEMETRY',
    'BASILAR STROKE',
    'CN 111 PALSY',
    'TRANSIENT ISCHEMIC ATTACK;ICA CLOT',
    'DYSPHASIA',
    'STROKE-R/O VASCULITIS',
    'SUBCORTICAL STROKE',
    'BASILLAR OCCLUSION',
    'VERTEBRAL ARTERY DISSECTION',
    'CEREBRALVASCULAR ACCIDENT',
    'CEREBROVASCULAR ACCIDENT-RT HEMIPARESIS',
    'VERTEBRAL ARTERY OCCLUSION',
    'CODE STROKE',
    'RULE-OUT MYOCARDIAL INFARCTION;TELEMETRY;?CVA',
    'CEREBROBASVULAR ACCIDENT',
    'STROKE;TELEMETRY;TRANSIENT ISCHEMIC ATTACK;SHINGLES',
    'R/O CVA',
    'LT SIDED STROKE',
    'LEFT CEREBRAL VASCULAR ACCIDENT',
    'UNABLE TO AMBULATE',
    'PONTINE STROKE',
    'CEREBRO VASCULAR ACCIDENT',
    'STROKE.TIA',
    'MYOCARDIAL INFARCTION;STROKE',
    'SLURRED SPEACH;R/O STROKE',
    'CEREBRAL INFARCT',
    'RIGHT POST CIRCULATING ARTERY STROKE',
    'ENCEPHALOPATHY',
    'ACUTE CEREBRAL VASCULAR ACCIDENT',
    'RIGHT ANTERIOR CEREBRAL ARTERY STROKE',
    'VERTEBRAL ARTEY DISSECTION',
    'CEREBRAL VASCULAR ACCIDENT VS SEIZURE',
    'S/P FAL',
    'RIGHT MIDDLE CEREBRAL ARTERY STROKE',
    'FAILURE TO THRIVE;VOMITING',
    'CHEST PAIN,ARM NUMBNESS',
    'MENTAL STATUS CHANGE',
    'VERTIGO',
    'CVA VS.SEIZURES',
    'LEFT CVA',
    'STRIKE;TRANSIENT ISCHEMIC ATTACK',
    'MCA STROKE;COMP SYNDROME;RHABDOMYOLISIS',
    'CORONARY ARTERY DISEASE;TIA',
    'FALL',
    'RIGHT MCA STROKE;TELEMETRY',
    'R/O CVA;SYNCOPE',
    'R/O VERTEBRAL ARTERY DISSECTION',
    'R/O CEREBROVASCULAR ACCIDENT',
    'WEAKNESS;RULE-OUT MYOCARDIAL INFARCTION',
    'MENTAL STATUS CHANGES',
    'RULE-OUT MYOCARDIAL INFARCTION;TELEMETRY;CVA'
    'SEIZURE;STROKE;TELEMETRY;TELEMETRY',
    'RIGHT-SIDED WEAKNESS',
    'TRANSIENT ISCHEMIC ATTACK\CAROTID STENT',
    'CORONARY ARTERY DISEASE;TIA;CAROTID STENOSIS\CATH/STENT PLACEMENT'
    'CVA,INTRACRANIAL HEMORAGE',
    'DEMENTIA;FALLS',
    'HEMIPLEGIA',
    'CEREBRAL VASCULAR ACCIDENT;RESPIRATORY FAILURE;RAPIDN A-FIB',
    'SECOND DEGREE BLOCK;? STROKE',
    'BASILAR THROMBOSIS',
    'STOKE;TRANSIENT ISCHEMIC ATTACK',
    'LEFT SIDED HEMIPARESIS',
    'R-SIDED WEAKNESS',
    'STROKE;TELEMETRY;TRANSINET ISCHEMIC ATTACK',
    'STROKE,TIA',
    'CHANGE IN MENTAL STATUS',
    'TIA;A-FIB',
    'ALTERED MENTAL STATUS',
    'LEFT MIDDLE CEREBRAL ARTERY STROKE;R/O MI',
    'STROKE,DEHYDRATION,ALTERED MENTAL STATUS',
    'STROKE,RIGHT MCA',
    'SUBACUTE STROKE',
    'WEAKNESS;TELEMTRY',
    'LEFT SIDED CVA',
    'VERTEBRAL BASALAR INSUFFICENCY/SDA',
    'CEREBROVASCULAR ACCIDENT;PNEUMONIA',
    'STROKE,ENDOCARDITIS',
    'APNEA,MENTAL STATUS CHANGES',
    'TRANSIENT ISCHEMIC ATTACK; CVA',
    'ACUTE CEREBRAL INFARCTION',
    'LEFT SIDED WEAKNESS;BRADYCARDIA',
    'CEREBROVASCULAR ACCIDENT;S/P TPA',
    'FEVER;? CVA',
    'S/P CVA;CAROTID STENOSIS\CAROTID ANGIOGRAM',
    'RT CEREBELLAR INFARCTION',
    'CEREBRAL VASCULAR ACCIDENT-R/O MYOCARDIAL INFARCTION',
    'BASAL STEM STROKE'
]

In [None]:
stroke_admission_diagnosis_equivalents_string = '('+ ','.join([f"'{dx_equivalent}'" for dx_equivalent in stroke_admission_diagnosis_equivalents])+ ')'

In [None]:
query = query_schema + """
WITH co AS
(
SELECT icu.subject_id, icu.hadm_id, icu.icustay_id, dx.icd9_code, pat.dob
, EXTRACT(EPOCH FROM outtime - intime)/60.0/60.0 as icu_length_of_stay_h
, EXTRACT('epoch' from icu.intime - pat.dob) / 60.0 / 60.0 / 24.0 / 365.242 as age
, RANK() OVER (PARTITION BY icu.subject_id ORDER BY icu.intime) AS icustay_id_order
, admissions.diagnosis, admissions.admission_type, admissions.admittime, admissions.dischtime

FROM icustays icu
INNER JOIN patients pat
  ON icu.subject_id = pat.subject_id
INNER JOIN diagnoses_icd dx
  ON icu.hadm_id = dx.hadm_id
INNER JOIN admissions admissions
  ON icu.hadm_id = admissions.hadm_id
)

SELECT
  co.subject_id, co.hadm_id, co.icustay_id, co.admittime, co.dischtime, co.dob, co.icu_length_of_stay_h
  , co.age
  , co.icustay_id_order
  , co.icd9_code
  , co.admission_type
  , co.diagnosis as admission_dx
  , CASE
    WHEN co.icu_length_of_stay_h < 12 then 1
    ELSE 0 END
    AS exclusion_los
  , CASE
        WHEN co.age < 18 then 1
    ELSE 0 END
    AS exclusion_age
  , CASE
        WHEN co.icustay_id_order != 1 THEN 1
    ELSE 0 END
    AS exclusion_first_stay
  , CASE
        WHEN co.icd9_code LIKE '433%' OR co.icd9_code LIKE '434%' OR co.icd9_code LIKE '436%'  THEN 0
    ELSE 1 END
    AS exclusion_discharge_diagnosis

  , CASE
        WHEN co.admission_type = 'EMERGENCY' OR co.admission_type = 'URGENT' THEN 0
        ELSE 1 END
    AS exclusion_non_urgent

    , CASE
        WHEN LOWER(co.diagnosis) LIKE '%stroke%' OR LOWER(co.diagnosis) LIKE '%cva%'
        OR co.diagnosis IN ('STROKE;TELEMETRY;TRANSIENT ISCHEMIC ATTACK','STROKE/TIA','CEREBROVASCULAR ACCIDENT','STROKE;TELEMETRY','STROKE;TELEMETRY;TELEMETRY','STROKE','STROKE-TRANSIENT ISCHEMIC ATTACK','CEREBRAL VASCULAR ACCIDENT','BRAIN STEM STROKE','TRANSIENT ISCHEMIC ATTACK','CEREBELLAR INFARCT','ACUTE STROKE','STROKE;TELEMETRY; TIA','STROKE;TIA','RIGHT FACIAL DROOP','STROKE;TRANSIENT ISCHEMIC ATTACK','ARTERIAL OCCLUSION','R/O STROKE','CEREBROVASCULAR ACCIDENT;TELEMETRY','STROKE,TRANSIENT ISCHEMIC ATTACK','LEFT CEBELLAR ISCHEMIC STROKE','CEREBELLAR INFACTS','STROKE;TELEMETRY;TELEMETRY;TELEMETRY','UNRESPONSIVENESS, CVA VS. SEIZURE','NEW CVA','LEFT OCCIPITAL STROKE','AORTIC DISSECTION;STROKE','CEREBRAL VASCULAR ACCIDENT;HYPERCARBIA','LT ARM WEAKNESS;R/P CVA','STROKE;RHABDO,URINARY TRACT INFECTION','SRTROKE/TIA','CONFUDION, TEMPORAL MASS/STROKE','CEREBELUM STROKE','ACUTE CVA','ISCHEMIC STROKE','STROKE WITH HEAD BLEEDVERTEBRAL DISSECTION;STROKE;TELEMETRY','S/P STROKE','TIA','STROKE;TELEMETRY; TRANSIENT ISCHEMIC ATTACK','APHASIA','RIGHT SIDED WEAKNESS;ANEMIA','STROKE;TELEMETRY;TRANSIENT ISCHEMIC ATTACK;TELEMETRY','ACUTE ISCHEMIC STROKE','DEEP VEIN THROMBOSIS;LEFT LEG WEAKNESS','RIGHT MCA STROKE','ACUTE STROKE;TELEMETRY','CORONARY ARTERY DISEASE;TRANSIENT ISCHEMIC ATTACK','MCA;STROKE','STROKE,MI','MCA STOKE S/P THROMBOLYSIS','CVA, BRADYCARDIA','LEFT PCA STROKE','THROMBOTIC THROMBOCYTOPENIC PUPURA;? STROKE','STROKE /TIA','BILATERAL CEREBELLAR STROKE','CEREBELLAR INFARCTION','ACUTE CVA;TELEMETRY? CVA','STROKE/ TIA','CEREBELLAR CVA','CEREBRAL VASCULAR ACCIDENT VS TRANSIENT ISCHEMIC ATTACK','CVA;HYPOTENSION','L SIDED WEAKNESS','CEREBROVASCULAR ACCIDENT;CAROTID DISSECTION','CEREBROVASCULAR ACCIDENT;FEV','STRUKE','RIGHT LEG WEAKNESS;DEMENTIA','BRAIN STEM CVA','RIGHT ARM  WEAKNESS','RT MCA STROKE','UNRESPONSIVE/? STROKE','STROKE; TIA','MCA STROKE','CEREBELLER STOKE','CVA;TELEMETRY','BASILAR STROKE','CN 111 PALSY','TRANSIENT ISCHEMIC ATTACK;ICA CLOT','DYSPHASIA','STROKE-R/O VASCULITIS','SUBCORTICAL STROKE','BASILLAR OCCLUSION','VERTEBRAL ARTERY DISSECTION','CEREBRALVASCULAR ACCIDENT','CEREBROVASCULAR ACCIDENT-RT HEMIPARESIS','VERTEBRAL ARTERY OCCLUSION','CODE STROKE','RULE-OUT MYOCARDIAL INFARCTION;TELEMETRY;?CVA','CEREBROBASVULAR ACCIDENT','STROKE;TELEMETRY;TRANSIENT ISCHEMIC ATTACK;SHINGLES','R/O CVA','LT SIDED STROKE','LEFT CEREBRAL VASCULAR ACCIDENT','UNABLE TO AMBULATE','PONTINE STROKE','CEREBRO VASCULAR ACCIDENT','STROKE.TIA','MYOCARDIAL INFARCTION;STROKE','SLURRED SPEACH;R/O STROKE','CEREBRAL INFARCT','RIGHT POST CIRCULATING ARTERY STROKE','ENCEPHALOPATHY','ACUTE CEREBRAL VASCULAR ACCIDENT','RIGHT ANTERIOR CEREBRAL ARTERY STROKE','VERTEBRAL ARTEY DISSECTION','CEREBRAL VASCULAR ACCIDENT VS SEIZURE','S/P FAL','RIGHT MIDDLE CEREBRAL ARTERY STROKE','FAILURE TO THRIVE;VOMITING','CHEST PAIN,ARM NUMBNESS','MENTAL STATUS CHANGE','VERTIGO','CVA VS.SEIZURES','LEFT CVA','STRIKE;TRANSIENT ISCHEMIC ATTACK','MCA STROKE;COMP SYNDROME;RHABDOMYOLISIS','CORONARY ARTERY DISEASE;TIA','FALL','RIGHT MCA STROKE;TELEMETRY','R/O CVA;SYNCOPE','R/O VERTEBRAL ARTERY DISSECTION','R/O CEREBROVASCULAR ACCIDENT','WEAKNESS;RULE-OUT MYOCARDIAL INFARCTION','MENTAL STATUS CHANGES','RULE-OUT MYOCARDIAL INFARCTION;TELEMETRY;CVASEIZURE;STROKE;TELEMETRY;TELEMETRY','RIGHT-SIDED WEAKNESS','TRANSIENT ISCHEMIC ATTACK\CAROTID STENT','CORONARY ARTERY DISEASE;TIA;CAROTID STENOSIS\CATH/STENT PLACEMENTCVA,INTRACRANIAL HEMORAGE','DEMENTIA;FALLS','HEMIPLEGIA','CEREBRAL VASCULAR ACCIDENT;RESPIRATORY FAILURE;RAPIDN A-FIB','SECOND DEGREE BLOCK;? STROKE','BASILAR THROMBOSIS','STOKE;TRANSIENT ISCHEMIC ATTACK','LEFT SIDED HEMIPARESIS','R-SIDED WEAKNESS','STROKE;TELEMETRY;TRANSINET ISCHEMIC ATTACK','STROKE,TIA','CHANGE IN MENTAL STATUS','TIA;A-FIB','ALTERED MENTAL STATUS','LEFT MIDDLE CEREBRAL ARTERY STROKE;R/O MI','STROKE,DEHYDRATION,ALTERED MENTAL STATUS','STROKE,RIGHT MCA','SUBACUTE STROKE','WEAKNESS;TELEMTRY','LEFT SIDED CVA','VERTEBRAL BASALAR INSUFFICENCY/SDA','CEREBROVASCULAR ACCIDENT;PNEUMONIA','STROKE,ENDOCARDITIS','APNEA,MENTAL STATUS CHANGES','TRANSIENT ISCHEMIC ATTACK; CVA','ACUTE CEREBRAL INFARCTION','LEFT SIDED WEAKNESS;BRADYCARDIA','CEREBROVASCULAR ACCIDENT;S/P TPA','FEVER;? CVA','S/P CVA;CAROTID STENOSIS\CAROTID ANGIOGRAM','RT CEREBELLAR INFARCTION','CEREBRAL VASCULAR ACCIDENT-R/O MYOCARDIAL INFARCTION','BASAL STEM STROKE') THEN 0
        ELSE 1 END
    AS exclusion_admission_diagnosis

FROM co

"""
df = pd.read_sql_query(query, con)
df

## Summarise exclusion criteria

In [None]:
print('{:20s} {:5d}'.format('Observations', df.shape[0]))
idxExcl = np.zeros(df.shape[0],dtype=bool)
for col in df.columns:
    if "exclusion_" in col:
        print('{:20s} {:5d} ({:2.2f}%)'.format(col, df[col].sum(), df[col].sum()*100.0/df.shape[0]))
        idxExcl = (idxExcl) | (df[col]==1)

# print a summary of how many were excluded in total
print('')
print('{:20s} {:5d} ({:2.2f}%)'.format('Total excluded', np.sum(idxExcl), np.sum(idxExcl)*100.0/df.shape[0]))
print('{:20s} {:5d} ({:2.2f}%)'.format('Total remaining', df.shape[0]- np.sum(idxExcl), 100- np.sum(idxExcl)*100.0/df.shape[0]))

## Exclusion by patient records

In [None]:
print('{:20s} {:5d}'.format('Number of Patient records', df.hadm_id.nunique()))
idxExcl = np.zeros(df.shape[0],dtype=bool)
for col in df.columns:
    if "exclusion_" in col:
        print('{:20s} {:5d} ({:2.2f}%)'.format(col, len(set(df.hadm_id.unique()) - set(df[df[col] == 0].hadm_id.unique())), len(set(df.hadm_id.unique()) - set(df[df[col] == 0].hadm_id.unique()))*100.0/df.hadm_id.nunique()))
        idxExcl = (idxExcl) | (df[col]==1)

# print a summary of how many were excluded in total
print('')
print('{:20s} {:5d} ({:2.2f}%)'.format('Total excluded', np.sum(idxExcl), np.sum(idxExcl)*100.0/df.shape[0]))
print('{:20s} {:5d} ({:2.2f}%)'.format('Total remaining', df.shape[0]- np.sum(idxExcl), 100- np.sum(idxExcl)*100.0/df.shape[0]))

In [None]:
print(f'Not admitted for acute ischemic stroke: {len(set(df.hadm_id.unique()) - set(df[(df["exclusion_discharge_diagnosis"] == 0) & (df["exclusion_admission_diagnosis"] == 0) & (df["exclusion_non_urgent"] == 0)].hadm_id.unique()))}')

# Save final selection query

In [None]:
final_selection_query = """
WITH selection AS
(
WITH co AS
(
SELECT icu.subject_id, icu.hadm_id, icu.icustay_id, dx.icd9_code
, EXTRACT(EPOCH FROM outtime - intime)/60.0/60.0 as icu_length_of_stay_h
, EXTRACT('epoch' from icu.intime - pat.dob) / 60.0 / 60.0 / 24.0 / 365.242 as age
, RANK() OVER (PARTITION BY icu.subject_id ORDER BY icu.intime) AS icustay_id_order
, admissions.diagnosis, admissions.admission_type

FROM icustays icu
INNER JOIN patients pat
  ON icu.subject_id = pat.subject_id
INNER JOIN diagnoses_icd dx
  ON icu.hadm_id = dx.hadm_id
INNER JOIN admissions admissions
  ON icu.hadm_id = admissions.hadm_id
)

SELECT
  co.subject_id, co.hadm_id, co.icustay_id, co.icu_length_of_stay_h
  , co.age
  , co.icustay_id_order
  , co.icd9_code
  , co.admission_type
  , co.diagnosis as admission_dx
  , CASE
    WHEN co.icu_length_of_stay_h < 12 then 1
    ELSE 0 END
    AS exclusion_los
  , CASE
        WHEN co.age < 18 then 1
    ELSE 0 END
    AS exclusion_age
  , CASE
        WHEN co.icustay_id_order != 1 THEN 1
    ELSE 0 END
    AS exclusion_first_stay
  , CASE
        WHEN co.icd9_code LIKE '433%' OR co.icd9_code LIKE '434%' OR co.icd9_code LIKE '436%'  THEN 0
    ELSE 1 END
    AS exclusion_discharge_diagnosis

  , CASE
        WHEN co.admission_type = 'EMERGENCY' OR co.admission_type = 'URGENT' THEN 0
        ELSE 1 END
    AS exclusion_non_urgent

    , CASE
        WHEN LOWER(co.diagnosis) LIKE '%stroke%' OR LOWER(co.diagnosis) LIKE '%cva%'
        OR co.diagnosis IN """ + stroke_admission_diagnosis_equivalents_string + """ THEN 0
        ELSE 1 END
    AS exclusion_admission_diagnosis

FROM co
)
"""

In [None]:
save = False

In [None]:
if save:
    save_path = 'patient_selection_query.sql'
    with open(save_path, "w") as text_file:
        text_file.write(final_selection_query)

## How to apply selection query

In [None]:
query = query_schema + final_selection_query + """
SELECT selection.subject_id, selection.hadm_id, selection.icustay_id  , selection.age
  , selection.icustay_id_order
  , selection.icd9_code

FROM selection

WHERE selection.exclusion_discharge_diagnosis = 0
    AND selection.exclusion_first_stay = 0
    AND selection.exclusion_age = 0
    AND selection.exclusion_los = 0
    AND selection.exclusion_non_urgent = 0
    AND selection.exclusion_admission_diagnosis = 0

"""
final_df = pd.read_sql_query(query, con)
final_df

In [None]:
final_df.icustay_id.nunique()

In [None]:
# close out the database connection
con.close()

<!-- TODO: steal from hst-953 course -->