In [None]:
# Import necessary libraries
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.path as path
import seaborn as sns
from IPython.display import display, HTML
from google.colab import auth
from google.cloud import bigquery

In [None]:
auth.authenticate_user()

In [None]:
project_id = 'INSERT PROJECT ID'
client = bigquery.Client(project = project_id)

# Find hospital admissions for mechanically ventilated patients who stayed in the MICU

In [None]:
vented = client.query('''
SELECT *
FROM `physionet-data.mimiciii_derived.ventilation_classification` v
JOIN `physionet-data.mimiciii_clinical.icustays` icu
  ON v.icustay_id = icu.ICUSTAY_ID
WHERE
v.MechVent = 1
AND
icu.FIRST_CAREUNIT LIKE '%MICU%'
AND
icu.LAST_CAREUNIT LIKE '%MICU%'
''').to_dataframe()

vented.shape

In [None]:
vented_unique = vented.drop_duplicates(subset=['HADM_ID'])
vented_unique.shape

# Find hospital admissions for patients who had a diagnosis of pneumonia

In [None]:
pneumonia = client.query('''
SELECT *
FROM `physionet-data.mimiciii_clinical.diagnoses_icd` dx
JOIN `physionet-data.mimiciii_clinical.d_icd_diagnoses` icd
  ON dx.ICD9_CODE = icd.ICD9_CODE
WHERE
icd.LONG_TITLE LIKE '%pneumonia%'
''').to_dataframe()

pneumonia.shape

In [None]:
pneumonia_unique = pneumonia.drop_duplicates(subset=['HADM_ID'])
pneumonia_unique.shape

In [None]:
names_to_remove = [
    'Other specified vaccinations against streptococcus pneumoniae [pneumococcus]',
    'Need for prophylactic vaccination and inoculation against streptococcus pneumoniae [pneumococcus] and influenza',
    'Cryptogenic organizing pneumonia',
    'Congenital pneumonia',
    'Abscess of lung without pneumonia',
    'Congenital pneumonia, unspecified',
    'Congenital pneumonia due to staphylococcus',
    'Idiopathic interstitial pneumonia, not otherwise specified',
    'Congenital pneumonia due to other bacterial agents',
    'Desquamative interstitial pneumonia',
    'Congenital pneumonia due to Escherichia coli',
    'Infection by Histoplasma capsulatum, pneumonia',
    'Tuberculous pneumonia [any form], tubercle bacilli found (in sputum) by microscopy',
    'Whooping cough, unspecified species without pneumonia',
    'Congenital pneumonia due to Pseudomonas',
    'Congenital pneumonia due to viral agent',
    'Hypostatic pneumonia, unspecified organism',
    'Lymphoid interstitial pneumonia',
    'Congenital pneumonia due to streptococcus, group B',
    'Tuberculous pneumonia [any form], tubercle bacilli not found (in sputum) by microscopy, but found by bacterial culture',
    'Tuberculous pneumonia [any form], unspecified',
    'Idiopathic lymphoid interstitial pneumonia'
]

pneumonia_clean= pneumonia_unique[~pneumonia_unique['LONG_TITLE'].isin(names_to_remove)]

In [None]:
pneumonia_clean.shape

# Merge the hospital admissions for the mechanically ventilated MICU patients and the pneumonia patients

In [None]:
vent_pneumo_cohort = vented_unique.merge(pneumonia_clean, how='inner', on='HADM_ID', indicator=True)
vent_pneumo_cohort.shape

In [None]:
cohort_hadm_ids = pd.DataFrame(vent_pneumo_cohort['HADM_ID'])

# Find the admission notes for the hospital admissions represented in our cohort

In [None]:
all_admission_notes = client.query('''
SELECT * FROM `physionet-data.mimiciii_notes.noteevents` n
WHERE n.DESCRIPTION like "%Admission%"
''').to_dataframe()

In [None]:
all_admission_notes.shape

In [None]:
cohort_admission_notes = cohort_hadm_ids.merge(all_admission_notes, how='inner', on='HADM_ID', indicator=True)

In [None]:
note_types_to_remove = [
    'Social Work Admission Note',
    'Physician Surgical Admission Note',
    'Physician Attending / Resident Admission Note - MI',
    'ICU Fellow Admission Note - MICU attending addendum',
    'Physician Fellow Admission Note - MICU attending addendum',
]

cohort_admission_notes_clean= cohort_admission_notes[~cohort_admission_notes['DESCRIPTION'].isin(note_types_to_remove)]

In [None]:
cohort_admission_notes_clean.shape

# Isolate the unique patient hospital admission events these notes represent (so we can take one note for every hospital admission event)

In [None]:
len(cohort_admission_notes_clean.HADM_ID.unique())

# Export notes corpus as needed