<a href="https://colab.research.google.com/github/Anthony-Tafoya/Form_Trainer/blob/main/eMERGE_MIMICIV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MIMICIV eMERGE Diabetes Dataset Generator

Goal: The goal of this colab is to filter for patients with type-2 diabetes

**MIMICIV is accessed through Google BigQuery so in order to test this on your own, you need to replace the project_id in the client method and authenticate with your own Google account**

Method: eMERGE is a reliable and interpretable rule-based algorithm for the identification of T2D cases and controls in EHRs. The logic was based on the flow chart and paper below (please refer to the flow chart in the paper to get a better idea of the algorithm)

Overall, 11422 patients were filtered compare dot the 12375 patients in the paper and I welcome suggestions on how to improve the pipeline!

Afterward, I parsed the MIMICIV for patients eligible for my diabetes dataset and use GloVe 6b for the doctor's embeddings

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10283086/pdf/2281.pdf

In [None]:
# Install the BigQuery client library
!pip install google-cloud-bigquery
!pip install pandas_gbq



In [None]:
# Importing all the necessayr packages
import os
import numpy as np
import pandas_gbq
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.path as path
from google.colab import auth
from google.cloud import bigquery
from IPython.display import display, HTML

In [None]:
def filter_diagnosis(client):
    # Define the SQL queries as a Python string
    na_sql =  """
        SELECT * FROM `physionet-data.mimiciv_hosp.diagnoses_icd`
        WHERE
            NOT REGEXP_CONTAINS(icd_code, r'250[0-9]*[02]$') AND -- Excludes for ICD Code Diabetes Type II
            NOT REGEXP_CONTAINS(icd_code, r'250[0-9]*[13]$')     -- Excludes for ICD Code Diabetes Type I
        """

    t2_sql = """
        SELECT * FROM `physionet-data.mimiciv_hosp.diagnoses_icd`
        WHERE
            REGEXP_CONTAINS(icd_code, r'250[0-9]*[02]$') AND -- Includes for ICD Code Diabetes Type II
            NOT REGEXP_CONTAINS(icd_code, r'250[0-9]*[13]$') AND -- Excludes for ICD Code Diabetes Type I
            icd_code NOT IN ('25010', '25012') -- Excluding 250.10 and 250.12
        """

    # Execute the query for no diabetes diagnosis
    na_query_job = client.query(na_sql)
    na_results_df = na_query_job.to_dataframe()

    # Execute the query for t2 diagnosis
    t2_query_job = client.query(t2_sql)
    t2_results_df = t2_query_job.to_dataframe()

    na_ids = set(na_results_df['subject_id'])
    t2_ids = set(t2_results_df['subject_id'])

    return na_ids, t2_ids

In [None]:
def filter_t1_prescriptions(client, ids):
    # T1DM medications
    t1_medications = ['insulin', 'pramlintide']

    # Lowercase and format the list for SQL query
    formatted_meds_list = "', '".join([med.lower() for med in t1_medications])

    # Construct the SQL query
    t1_query = f"""
    SELECT DISTINCT subject_id
    FROM `physionet-data.mimiciv_hosp.prescriptions`
    WHERE LOWER(drug) IN ('{formatted_meds_list}')
    """
    t1_query_job = client.query(t1_query)
    t1_results_df = t1_query_job.to_dataframe()

    # Convert result to intersecting set for efficient processing
    satisfied_ids = set(t1_results_df['subject_id']) & ids

    # Set operation in Python to find not satisfied subject_ids
    not_satisfied_ids = ids - satisfied_ids

    return not_satisfied_ids, satisfied_ids


In [None]:
def filter_t2_prescriptions(client, ids):
    # T2DM medications
    t2_medications = [
        "acetohexamide", "tolazamide", "chlorpropamide", "glipizide", "glyburide",
        "glimepiride", "repaglinide", "nateglinide", "metformin", "rosiglitazone",
        "pioglitazone", "troglitazone", "acarbose", "miglitol", "sitagliptin",
        "exenatide", "alogliptin", "saxagliptin", "linagliptin", "ertugliflozin",
        "dapagliflozin", "empagliflozin", "canagliflozin", "dulaglutide", "semaglutide",
        "liraglutide", "lixisenatide", "colesevelam", "bromocriptine"
    ]

    # Lowercase and format the list for SQL query
    formatted_meds_list = "', '".join([med.lower() for med in t2_medications])

    # Construct the SQL query
    t2_query = f"""
    SELECT DISTINCT subject_id
    FROM `physionet-data.mimiciv_hosp.prescriptions`
    WHERE LOWER(drug) IN ('{formatted_meds_list}')
    """
    t2_query_job = client.query(t2_query)
    t2_results_df = t2_query_job.to_dataframe()

    # Convert result to intersecting set for efficient processing
    satisfied_ids = set(t2_results_df['subject_id']) & ids

    # Set operation in Python to find not satisfied subject_ids
    not_satisfied_ids = ids - satisfied_ids

    return not_satisfied_ids, satisfied_ids


In [None]:
def connect_to_MIMIC():
    # Construct a BigQuery client object
    auth.authenticate_user()
    print('Authenticated')
    project_id = 'agile-kite-406408'
    client = bigquery.Client(project=project_id)
    return client

In [None]:
# Make BigQuery connection to MIMIC
client = connect_to_MIMIC()

Authenticated


In [None]:
# Filter by diagnosis
na_diagnosed, t2_diagnosed = filter_diagnosis(client)

In [None]:
# Checking the length for debugging
print(len(na_diagnosed),len(t2_diagnosed))

180618 23092


In [None]:
# Filter by mediciation
"""
These methods are structrued by (not included, included)
At the end of these calls, the following will be relevant

ud_abn_lab - Undiagnosed patients that need abnormal lab analysis
d_abn_lab - Diagnosed patients that need abnormal lab analysis
t1_medicine - Diagnosed patients who have a t1 medicine
t1t2_medicine - Diagnosed patients who have both t1 and t2 medicines
t2_medicine - Diagnosed patients who have a t2 medicine
"""
_, ud_abn_lab = filter_t2_prescriptions(client, na_diagnosed)
na_medicine, t1_medicine = filter_t1_prescriptions(client, t2_diagnosed)
d_abn_lab, t2_medicine = filter_t2_prescriptions(client, na_medicine)
t1_medicine, t1t2_medicine = filter_t2_prescriptions(client, t1_medicine)

KeyboardInterrupt: 

In [None]:
# Checking the length for debugging
print(len(na_medicine), len(t1_medicine), len(t2_medicine), len(t1t2_medicine), len(ud_abn_lab), len(d_abn_lab))

In [None]:
def filter_medicine_by_date(client, ids):
    # T1DM medications
    t1_medications = ['insulin', 'pramlintide']

    # T2DM medications
    t2_medications = [
        "acetohexamide", "tolazamide", "chlorpropamide", "glipizide", "glyburide",
        "glimepiride", "repaglinide", "nateglinide", "metformin", "rosiglitazone",
        "pioglitazone", "troglitazone", "acarbose", "miglitol", "sitagliptin",
        "exenatide", "alogliptin", "saxagliptin", "linagliptin", "ertugliflozin",
        "dapagliflozin", "empagliflozin", "canagliflozin", "dulaglutide", "semaglutide",
        "liraglutide", "lixisenatide", "colesevelam", "bromocriptine"
    ]

    # Lowercase and format the list for SQL query
    formatted_t1_meds_list = "', '".join([med.lower() for med in t1_medications])
    formatted_t2_meds_list = "', '".join([med.lower() for med in t2_medications])

    # Construct the SQL query that compares earliest dates
    query = f"""
    SELECT DISTINCT t2.subject_id
    FROM (
        SELECT subject_id, MIN(starttime) as earliest_t2_time
        FROM `physionet-data.mimiciv_hosp.prescriptions`
        WHERE LOWER(drug) IN ('{formatted_t2_meds_list}')
        GROUP BY subject_id
    ) t2
    INNER JOIN (
        SELECT subject_id, MIN(starttime) as earliest_t1_time
        FROM `physionet-data.mimiciv_hosp.prescriptions`
        WHERE LOWER(drug) IN ('{formatted_t1_meds_list}')
        GROUP BY subject_id
    ) t1 ON t2.subject_id = t1.subject_id
    WHERE t2.earliest_t2_time < t1.earliest_t1_time;
    """

    query_job = client.query(query)
    results_df = query_job.to_dataframe()

    # Convert result to intersecting set for efficient processing
    satisfied_ids = set(results_df['subject_id']) & ids

    # Set operation in Python to find not satisfied subject_ids
    not_satisfied_ids = ids - satisfied_ids

    return not_satisfied_ids, satisfied_ids

In [None]:
# Filter by date
"""
Given the ids of a patient with a t2 diagnosis, t1 medicine prescription,
and t2 medicine prescription, we only want to keep the ones where the t2
was prescribed at an earlier start time than t1
"""
test, t2_b_t1_medicine = filter_medicine_by_date(client, t1t2_medicine)

In [None]:
# Checking the length for debugging
print(len(t2_b_t1_medicine))

924


In [None]:
def filter_t2_count_diagnoses(client, ids):
    # SQL query to select subject_ids with 2 or more T2DM diagnoses based on ICD codes
    query = """
    SELECT subject_id, COUNT(*) as num_t2dm_diagnoses
    FROM `physionet-data.mimiciv_hosp.diagnoses_icd`
    WHERE
      REGEXP_CONTAINS(icd_code, r'250[0-9]*[02]$') -- Includes ICD codes for T2DM
      AND icd_code NOT IN ('25010', '25012') -- Excludes specific ICD codes for T2DM
    GROUP BY subject_id
    HAVING num_t2dm_diagnoses >= 2
    """

    # Execute the query
    query_job = client.query(query)
    results_df = query_job.to_dataframe()

    # Convert result to intersecting set for efficient processing
    satisfied_ids = set(results_df['subject_id']) & ids

    # Set operation in Python to find not satisfied subject_ids
    not_satisfied_ids = ids - satisfied_ids

    return not_satisfied_ids, satisfied_ids

In [None]:
# Keep diagnosed patients who have no t2 medicine if they have >= 2 diagnosis
_, twod_t1_medicine = filter_t2_count_diagnoses(client, t1_medicine)

In [None]:
# Checking the length for debugging
print(len(twod_t1_medicine))

7983


In [None]:
def filter_abnormal_chartevents_values(client, ids):
    # SQL query to select subject_ids with abnormal DM-related chart values and filter out outliers
    query = """
    WITH max_glucose_per_patient AS (
        SELECT subject_id, MAX(valuenum) as max_glucose -- Groups glucose values to take max
        FROM `physionet-data.mimiciv_icu.chartevents`
        WHERE itemid IN (220621, 225664, 226537) -- These item IDs are assumed to be fasting glucose measurements
        GROUP BY subject_id
    ),
    abnormal_glucose AS (
        SELECT subject_id
        FROM max_glucose_per_patient
        WHERE max_glucose >= 125 -- Selects patients with a max fasting glucose ≥ 125 mg/dl
        AND max_glucose < 1000 -- Selects patients with a max fasting glucose that are not outliers ≥ 1000 mg/dl
    )
    SELECT DISTINCT subject_id
    FROM abnormal_glucose
    """

    # Execute the query
    query_job = client.query(query)
    results_df = query_job.to_dataframe()

    # Convert result to a set for efficient processing
    abnormal_ids = set(results_df['subject_id'])

    # Set operation in Python to find satisfied and not satisfied subject_ids
    satisfied_ids = abnormal_ids & ids
    not_satisfied_ids = ids - satisfied_ids

    return not_satisfied_ids, satisfied_ids

def filter_abnormal_labevents_values(client, ids):
    # SQL query to select subject_ids with abnormal DM-related lab values
    query = """
    WITH max_glucose_per_patient AS (
        SELECT subject_id, MAX(valuenum) as max_glucose -- Groups glucose values to take max
        FROM `physionet-data.mimiciv_hosp.labevents`
        WHERE itemid IN (50809, 50931, 52569) -- These item IDs are assumed to be fasting glucose measurements
        GROUP BY subject_id
    ),
    abnormal_glucose AS (
        SELECT subject_id
        FROM max_glucose_per_patient
        WHERE max_glucose >= 125 AND max_glucose < 1000 -- Selects patients with a max fasting glucose ≥ 125 mg/dl and below 1000 mg/dl
    ),
    abnormal_hba1c AS (
        SELECT subject_id
        FROM `physionet-data.mimiciv_hosp.labevents`
        WHERE itemid = 50852 AND valuenum >= 6.5  -- Selects patients with a Hba1c <  6.5%
    ),
    combined_abnormal AS (
        SELECT subject_id FROM abnormal_glucose
        UNION DISTINCT
        SELECT subject_id FROM abnormal_hba1c
    )
    SELECT subject_id
    FROM combined_abnormal;
    """

    # Execute the query
    query_job = client.query(query)
    results_df = query_job.to_dataframe()

    # Convert result to a set for efficient processing
    abnormal_ids = set(results_df['subject_id'])

    # Set operation in Python to find satisfied and not satisfied subject_ids
    satisfied_ids = abnormal_ids & ids
    not_satisfied_ids = ids - satisfied_ids

    return not_satisfied_ids, satisfied_ids

In [None]:
"""For patients who do have T2 diagnosis or T2 medicine, keep
   if they have an abnormal lab test which could mean one of
   the two conditions:

   1) Patient has a max fasting glucose ≥ 125 mg/dl and < 1000 mg/dl
   2) Patient has a % Hemoglobin A1c ≥ 6.5%"""

# Filtering the abnormal labs for undiagnosed patients and deleting duplicates with sets
print(len(ud_abn_lab))
_, ud_abn_lab_chart = filter_abnormal_chartevents_values(client, ud_abn_lab)
_, ud_abn_lab_lab = filter_abnormal_labevents_values(client, ud_abn_lab)
ud_abn_lab = ud_abn_lab_chart & ud_abn_lab_lab

# Filtering the abnormal labs for diagnosed patients and deleting duplicates with sets
print(len(d_abn_lab))
_, d_abn_lab_chart = filter_abnormal_chartevents_values(client, d_abn_lab)
_, d_abn_lab_lab = filter_abnormal_labevents_values(client, d_abn_lab)
d_abn_lab = d_abn_lab_chart & d_abn_lab_lab

5197
2790


In [None]:
# Checking the length for debugging
print(len(ud_abn_lab), len(d_abn_lab))

2498 300


In [None]:
# Combining patients with abnormal tests
t2_patient_ids = ud_abn_lab.union(d_abn_lab)

# Adding patients with t2 medicine before t1
t2_patient_ids = t2_patient_ids.union(t2_b_t1_medicine)

# Adding patients with t1 medicine but two diagnosis
t2_patient_ids = t2_patient_ids.union(twod_t1_medicine)

# Adding patients with t2 medicine and t2 diagnosis
t2_patient_ids = t2_patient_ids.union(t2_medicine)

In [None]:
# Checking the length for debugging
print(len(t2_patient_ids))
t2_diabetes_ids = t2_patient_ids

11422


In [None]:
"""This set of code constructs the diabetes dataset"""
def extracting_notes(client, ids):
  # SQL Query to extract the id, gender, and age
  query = f"""
    SELECT DISTINCT subject_id,
       FIRST_VALUE(text) OVER (PARTITION BY subject_id ORDER BY charttime) AS first_note
       FROM `physionet-data.mimiciv_note.discharge`
    """

  # Execute the query
  query_job = client.query(query)
  results_df = query_job.to_dataframe()

  # Fitlering for patients with diabetes
  filtered_df = results_df[results_df['subject_id'].isin(ids)]

  return filtered_df

def extracting_treatments(client, ids):
  # SQL Query to extract T2DM treatment
    t2_medications = [
        "acetohexamide", "tolazamide", "chlorpropamide", "glipizide", "glyburide",
        "glimepiride", "repaglinide", "nateglinide", "metformin", "rosiglitazone",
        "pioglitazone", "troglitazone", "acarbose", "miglitol", "sitagliptin",
        "exenatide", "alogliptin", "saxagliptin", "linagliptin", "ertugliflozin",
        "dapagliflozin", "empagliflozin", "canagliflozin", "dulaglutide", "semaglutide",
        "liraglutide", "lixisenatide", "colesevelam", "bromocriptine"
    ]

    # Lowercase and format the list for SQL query
    formatted_t2_meds_list = "', '".join([med.lower() for med in t2_medications])

    # Construct the SQL query that retrieves prescriptions
    query = f"""
        SELECT subject_id, STRING_AGG(DISTINCT LOWER(drug), ', ') AS prescriptions
          FROM `physionet-data.mimiciv_hosp.prescriptions`
          WHERE LOWER(drug) IN ('{formatted_t2_meds_list}')
          GROUP BY subject_id
        """

    # Execute the query
    query_job = client.query(query)
    results_df = query_job.to_dataframe()

    # Filter the query
    filtered_df = results_df[results_df['subject_id'].isin(ids)]

    return filtered_df

def extracting_hba1c(client, ids):
    # SQL Query to fetch all HbA1c values for each subject.
    query = """
    SELECT subject_id, valuenum, charttime
    FROM `physionet-data.mimiciv_hosp.labevents`
    WHERE itemid = 50852
    ORDER BY subject_id, charttime DESC
    """

    # Execute the query
    query_job = client.query(query)
    results_df = query_job.to_dataframe()

    # Dataframe preprocessing
    results_df['charttime'] = pd.to_datetime(results_df['charttime'])
    measurement_counts = results_df.groupby('subject_id').size()

    # Get the most recent value for each subject
    most_recent_values = results_df.groupby('subject_id').first().reset_index()

    # Calculate percentiles for each subject, excluding the most recent value
    percentiles = results_df.groupby('subject_id')['valuenum'].apply(
        lambda x: x.iloc[1:].quantile([0.25, 0.5, 0.75]) if len(x) > 1 else pd.Series(['NaN'] * 3, index=[0.25, 0.5, 0.75])
    ).unstack()

    # Combine the most recent values with the percentiles and cleanup dataset
    final_df = most_recent_values.merge(percentiles, on='subject_id')
    filtered_df = final_df[final_df['subject_id'].isin(ids)]
    filtered_df = filtered_df.rename(columns={'valuenum': 'recent_hba1c'})
    filtered_df.drop('charttime', axis=1, inplace=True)

    return filtered_df

def extracting_age_gender(client, ids):
  # SQL Query to extract the id, gender, and age
  query = f"""
    SELECT subject_id, gender, anchor_age
    FROM `physionet-data.mimiciv_hosp.patients`
    """

  # Execute the query
  query_job = client.query(query)
  results_df = query_job.to_dataframe()

  # Fitlering for patients with diabetes
  filtered_df = results_df[results_df['subject_id'].isin(ids)]

  return filtered_df

def extracting_race(client, ids):
  # SQL query to extract patient race
  query = f"""
      SELECT DISTINCT subject_id,
      FIRST_VALUE(race) OVER (PARTITION BY subject_id ORDER BY admittime) AS first_race
      FROM `physionet-data.mimiciv_hosp.admissions`;
    """

  # Execute the query
  query_job = client.query(query)
  results_df = query_job.to_dataframe()

  # Filtering for patients with diabetes
  filtered_df = results_df[results_df['subject_id'].isin(ids)]

  return filtered_df

In [None]:
# Extract patient demographics
age_gender = extracting_age_gender(client, t2_diabetes_ids)
race = extracting_race(client, t2_diabetes_ids)

# Merge the DataFrames based on subject_ds
age_gender_race = age_gender.merge(race, on='subject_id')

In [None]:
# Extract patient hemoglobin values
hba1c = extracting_hba1c(client, t2_diabetes_ids)
age_gender_race_hba1c = age_gender_race.merge(hba1c, on='subject_id')

In [None]:
# Extracting treatment history
treatments = extracting_treatments(client, t2_diabetes_ids)

# Assuming age_gender_race_hba1c and treatments are already defined DataFrames
age_gender_race_hba1c_treatement = age_gender_race_hba1c.merge(treatments, on='subject_id', how='left')

# Replace NaN in the treatment-related columns with 'No Treatment'
age_gender_race_hba1c_treatement.fillna({'prescriptions': 'NaN'}, inplace=True)

In [None]:
# Extracting notes
notes = extracting_notes(client, t2_diabetes_ids)

# Assuming age_gender_race_hba1c_treatement and notes are already defined DataFrames
complete_dataset = age_gender_race_hba1c_treatement.merge(notes, on='subject_id', how='left')

In [None]:
# Importing GloVe to begin processing note embeddings
import os
import urllib.request
urllib.request.urlretrieve('https://nlp.stanford.edu/data/glove.6B.zip','glove.6B.zip')
!unzip "/content/glove.6B.zip" -d "/content/"

emmbed_dict = {}
with open('/content/glove.6B.200d.txt','r') as f:
  for line in f:
    values = line.split()
    word = values[0]
    vector = np.asarray(values[1:],'float32')
    emmbed_dict[word]=vector

Archive:  /content/glove.6B.zip
replace /content/glove.6B.50d.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
# Methods to produce embedding for doctor's notes
def preprocess_text(text):
    # Simple tokenization and cleaning
    tokens = text.lower().split()
    # Remove placeholders and punctuation
    tokens = [token.replace('_', '').replace(':', '').replace('.', '').replace(',', '') for token in tokens]
    return tokens

def get_text_vector(text, embedding_dict):
    # Preprocess and tokenize the text
    tokens = preprocess_text(text)
    # Retrieve vectors for each token, ignoring out-of-vocabulary words
    vectors = [embedding_dict[word] for word in tokens if word in embedding_dict]
    # Handle case where no words had vectors (return zero vector)
    if not vectors:
        return np.zeros(next(iter(embedding_dict.values())).shape)
    # Aggregate vectors (mean pooling in this example)
    text_vector = np.mean(vectors, axis=0)
    return text_vector

def note_to_embedding(note):
    # If the note is NaN (missing), return a zero vector
    if pd.isna(note):
        return np.zeros(200)
    # Otherwise, get the text vector
    return get_text_vector(note, emmbed_dict)

In [None]:
# Apply the function to each note and store the result in a new column
complete_dataset['note_embedding'] = complete_dataset['first_note'].apply(note_to_embedding)

In [None]:
# Convert all column values and names to strings
complete_dataset = complete_dataset.astype(str)
complete_dataset.rename(columns={.25: 'percentile_25', .5: 'percentile_50', .75: 'percentile_75'}, inplace=True)

# Upload the dataset to Google BigQuery table
complete_dataset.to_gbq(destination_table='mimic_diabetes.patient_data',
          project_id='agile-kite-406408',
          if_exists='replace')

ArrowTypeError: Expected a string or bytes dtype, got int64

In [None]:
# These are some util methods for testing IGNORE FOR MAIN FUNCTION
def get_column_names(client, table_name):
    query = f"""
    SELECT column_name
    FROM `physionet-data.mimiciv_hosp.INFORMATION_SCHEMA.COLUMNS`
    WHERE table_name = '{table_name}'
    """
    query_job = client.query(query)
    results = query_job.result()

    # Extract the column names
    columns = [row.column_name for row in results]
    return columns

column_names = get_column_names(client, "labevents")
print(column_names)

def get_example_rows(client, table_name):
    query = f"""
    SELECT *
    FROM `physionet-data.mimiciv_hosp.{table_name}`
    LIMIT 5
    """
    query_job = client.query(query)
    results = query_job.result()

    # Iterate over the rows and print them
    for row in results:
        print(row)

# Now call the function
get_example_rows(client, "labevents")

['labevent_id', 'subject_id', 'hadm_id', 'specimen_id', 'itemid', 'order_provider_id', 'charttime', 'storetime', 'value', 'valuenum', 'valueuom', 'ref_range_lower', 'ref_range_upper', 'flag', 'priority', 'comments']
Row((1058, 10000117, None, 25331308, 50965, 'P51F7B', datetime.datetime(2175, 1, 27, 16, 15), datetime.datetime(2175, 1, 27, 21, 11), '59', 59.0, 'pg/mL', 15.0, 65.0, None, 'ROUTINE', None), {'labevent_id': 0, 'subject_id': 1, 'hadm_id': 2, 'specimen_id': 3, 'itemid': 4, 'order_provider_id': 5, 'charttime': 6, 'storetime': 7, 'value': 8, 'valuenum': 9, 'valueuom': 10, 'ref_range_lower': 11, 'ref_range_upper': 12, 'flag': 13, 'priority': 14, 'comments': 15})
Row((4784, 10000935, None, 69202241, 51010, 'P61VDC', datetime.datetime(2187, 2, 26, 8, 45), datetime.datetime(2187, 2, 26, 12, 41), '680', 680.0, 'pg/mL', 240.0, 900.0, None, 'ROUTINE', None), {'labevent_id': 0, 'subject_id': 1, 'hadm_id': 2, 'specimen_id': 3, 'itemid': 4, 'order_provider_id': 5, 'charttime': 6, 'storet