# Adult_ED_Cohort_no_allergy 

In [1]:
import pandas as pd
from google.cloud import bigquery;
from google.cloud.bigquery import dbapi;
%load_ext google.cloud.bigquery
from google.cloud import bigquery
# client=bigquery.Client()
client = bigquery.Client("som-nero-phi-jonc101")



In [2]:
def find_unique_orders(df, return_combinations=False,):
    """
    Find the number of unique combinations of specific columns in a DataFrame.

    Parameters:
        df (pd.DataFrame): The input DataFrame.
        return_combinations (bool): If True, return the unique combinations as a DataFrame.
                                   If False, return only the count of unique combinations.

    Returns:
        int or pd.DataFrame: The number of unique combinations or a DataFrame of unique combinations.
    """
    # Drop duplicates based on the specified columns
    subset = ['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc']
    unique_combinations = df.drop_duplicates(subset=subset)
    
    # Count the number of unique combinations
    num_unique_combinations = len(unique_combinations)
    
    # Return the result based on the return_combinations flag
    if return_combinations:
        return unique_combinations
    else:
        return num_unique_combinations

In [3]:
# Define a function to find antibiotic matches in the medication name
def find_antibiotics(med_name, antibiotic_list):
    if pd.isna(med_name):
        return None
    matches = [j for j in antibiotic_list if j.lower() in med_name.lower()]
    return matches if matches else "No Match"


In [4]:
# clean_antibiotic.csv is downloaded from the following URL: https://docs.google.com/spreadsheets/d/1NUBXLbMZ4n3A0-b-waVQGTOmcFSi0rSp/edit?gid=1568991887#gid=1568991887 
clean_antibiotic = pd.read_csv("clean_antibiotic.csv") # the top 50 is the same as prior_antibiotics_list in bigquery
antibiotic_list = clean_antibiotic["antibiotic_name"]
clean_antibiotic

Unnamed: 0,antibiotic_name,prescription_count,class_name
0,Nitrofurantoin,253258,
1,Cephalexin,213739,
2,Piperacillin-Tazobactam-Dextrs,194573,
3,Sulfamethoxazole-Trimethoprim,169941,
4,Ciprofloxacin Hcl,166196,
...,...,...,...
84,Ceftazidime,643,
85,Zyvox,579,
86,Methenamine Mandelate,568,
87,Rifabutin,535,


<img src="Aim_4_Cohort_Flowchart_previous/Aim_4_Cohort_Flowchart_previous.001.png" alt="Previous Adult ED Flowchart" width="500">

<img src="Aim_4_Cohort_Flowchart/Aim_4_Cohort_Flowchart.001.png" alt="Adult ED Flowchart" width="500">
<img src="Aim_4_Cohort_Flowchart/Aim_4_Cohort_Flowchart.002.png" alt="Adult ED Flowchart" width="500">

# ---------------------------------------**Step 1** -------------------------------------------

### Refactor Fateme's code for urine culture only
Reference: https://github.com/HealthRex/CDSS/blob/master/scripts/antibiotic-susceptibility/sql/queries/microbiology_cultures_cohort_query.sql

In [7]:
%%bigquery --use_rest_api all_urine_cohort
-- This query is the main query for creating a table named microbiology_cultures_cohort that forms the basis of the microbiology cultures cohort. 
-- The table is generated through a series of steps, each designed to filter and enrich the dataset. 
-- Once the main cohort table is created, additional features will be added to this table to complete the dataset for analysis.


######################################################################################## 
-- Create or replace the cohort table named microbiology_urine_cultures_cohort
######################################################################################## 


-- Step 1: Extract microbiology cultures for specific types (URINE)
WITH microbiology_cultures AS (
    SELECT DISTINCT
        op.anon_id, 
        op.pat_enc_csn_id_coded, 
        op.order_proc_id_coded, 
        op.order_time_jittered_utc, 
        op.ordering_mode,
        lr.result_time_jittered_utc, # added the result time too
        CASE 
            WHEN op.description LIKE '%URINE%' THEN 'URINE'
            ELSE 'OTHER' 
        END AS culture_description  -- Capture the culture type
    FROM 
        `som-nero-phi-jonc101.shc_core_2023.order_proc` op
    INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
    ON
        op.order_proc_id_coded = lr.order_id_coded
    WHERE
        op.order_type LIKE "Microbiology%"
        AND (op.description LIKE "%URINE%")
) # Only keep urine culture
select * from microbiology_cultures

Query is running:   0%|          |

Downloading:   0%|          |

In [11]:
print("the unique order number for urine only culture is {}".format(find_unique_orders(all_urine_cohort)))

the unique order number for urine only culture is 544780


<img src="Aim_4_Cohort_Flowchart_stepwise/Aim_4_Cohort_Flowchart_stepwise.001.png" alt="Step 1" width="500">

# ---------------------------------------**Step 2** -------------------------------------------

In [14]:
%%bigquery --use_rest_api starting_cohort
-- This query is the main query for creating a table named microbiology_cultures_cohort that forms the basis of the microbiology cultures cohort. 
-- The table is generated through a series of steps, each designed to filter and enrich the dataset. 
-- Once the main cohort table is created, additional features will be added to this table to complete the dataset for analysis.


######################################################################################## 
-- Create or replace the cohort table named microbiology_urine_cultures_cohort
######################################################################################## 


-- Step 1: Extract microbiology cultures for specific types (URINE)
WITH microbiology_cultures AS (
    SELECT DISTINCT
        op.anon_id, 
        op.pat_enc_csn_id_coded, 
        op.order_proc_id_coded, 
        op.order_time_jittered_utc, 
        op.ordering_mode,
        lr.result_time_jittered_utc, # added the result time too
        CASE 
            WHEN op.description LIKE '%URINE%' THEN 'URINE'
            ELSE 'OTHER' 
        END AS culture_description  -- Capture the culture type
    FROM 
        `som-nero-phi-jonc101.shc_core_2023.order_proc` op
    INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
    ON
        op.order_proc_id_coded = lr.order_id_coded
    WHERE
        op.order_type LIKE "Microbiology%"
        AND (op.description LIKE "%URINE%")
), # Only keep urine culture


######################################################################################## 
-- Step 2: Filter for adult patients only
########################################################################################    

adult_microbiology_cultures AS (
    SELECT 
        mc.anon_id, 
        mc.pat_enc_csn_id_coded, 
        mc.order_proc_id_coded, 
        mc.order_time_jittered_utc, 
        mc.ordering_mode,
        mc.culture_description, -- Include culture_description here
        mc.result_time_jittered_utc

    FROM 
        microbiology_cultures mc
    INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.demographic` demo
    USING
        (anon_id)
    WHERE
        DATE_DIFF(CAST(mc.order_time_jittered_utc as DATE), demo.BIRTH_DATE_JITTERED, YEAR) >= 18
),

    
######################################################################################## 
-- Step 3: Identify culture orders within the prior two weeks
########################################################################################     
    
order_in_prior_two_weeks AS (
    SELECT DISTINCT
         auc.order_proc_id_coded
    FROM 
        `som-nero-phi-jonc101.shc_core_2023.order_proc` op
    INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
    ON
        op.order_proc_id_coded = lr.order_id_coded
    INNER JOIN
        adult_microbiology_cultures auc 
    ON
        op.anon_id = auc.anon_id
    WHERE
        op.order_type LIKE "Microbiology%"
        AND (op.description LIKE "%URINE%")
        AND auc.order_time_jittered_utc > op.order_time_jittered_utc
        AND TIMESTAMP_DIFF(auc.order_time_jittered_utc, op.order_time_jittered_utc, DAY) < 14
),

    
######################################################################################## 
-- Step 4: Exclude cultures with a prior culture order in the last two weeks
########################################################################################       

included_microbiology_cultures AS (
    SELECT DISTINCT
        amc.*
    FROM 
        adult_microbiology_cultures amc
    WHERE 
        amc.order_proc_id_coded NOT IN (SELECT order_proc_id_coded FROM order_in_prior_two_weeks)
),

###########################################################################################################
-- Step 5: Flag cultures as positive if they have corresponding entries in the culture_sensitivity table
###########################################################################################################    

all_cultures_with_flag AS (
    SELECT 
        imc.anon_id, 
        imc.pat_enc_csn_id_coded, 
        imc.order_proc_id_coded, 
        imc.order_time_jittered_utc, 
        imc.result_time_jittered_utc,
        imc.ordering_mode,
        imc.culture_description,
        IF(cs.order_proc_id_coded IS NOT NULL, 1, 0) AS was_positive
    FROM 
        included_microbiology_cultures imc
    LEFT JOIN 
        (SELECT DISTINCT order_proc_id_coded FROM `som-nero-phi-jonc101.shc_core_2023.culture_sensitivity`) cs
    ON 
        imc.order_proc_id_coded = cs.order_proc_id_coded
),


#########################################################################################################################
-- Step 6: Get detailed information for positive cultures, clean antibiotic names, and exclude non-antibiotic entries
#########################################################################################################################     
positive_culture_details AS (
    SELECT 
        cs.order_proc_id_coded,
        cs.organism,
        -- Clean and standardize the antibiotic name using the updated cleaning approach
        INITCAP(TRIM(
          REGEXP_REPLACE(
          REGEXP_REPLACE(
          REGEXP_REPLACE(
          REGEXP_REPLACE(
            REGEXP_REPLACE(
                REGEXP_REPLACE(
                    LOWER(cs.antibiotic),
                    'penicillin[^a-z].*$', 'penicillin'
                ),
                '^[^a-z]*|\\s+\\S*[^a-z\\s]+.*$|\\.+$', ''
            ),
              '\\s*\\d+(\\.\\d+)?\\s*(mg|mcg|gram|ml|%)', ''  -- Remove dosages or concentrations
          ),
          '\\(.*?\\)', ''  -- Remove text in parentheses
        ),' in.*$|tablet|capsule|intravenous|piggyback|' ||
                            'solution|suspension|oral|sodium|chloride|' ||
                            'injection|citrate|soln|dextrose|iv|' ||
                            'macrocrystals|macrocrystal|axetil|potassium|packet|' ||
                            'monohydrate|ethylsuccinate|powder|mandelate|' ||
                            'hyclate|hcl|hippurate|tromethamine|' ||
                            'million|unit|syrup|chewable|delayed|mphase|' ||
                            'release|benzathine|syringe|dispersible|' ||
                            'sulfate|procaine|blue|hyos|sod*phos|' ||
                            'susp|and|fosamil|extended|succinate|granules|' ||
                            'delay|pot|ext|rel|cyam|salicylate|salicyl|' ||
                            'sodphos|methylene|stearate|synergy', ''                   
        ),
        '\\d|\\sfor\\s*|/ml\\s*|\\sml\\s*|\\-+\\s*|\\,+\\s*',''
        )
        )
      )  AS antibiotic,
        cs.suscept AS susceptibility
    FROM 
        som-nero-phi-jonc101.shc_core_2023.culture_sensitivity cs
    INNER JOIN (
        -- Subquery to get antibiotic counts for those with more than 10 occurrences
        SELECT 
          INITCAP(TRIM(
          REGEXP_REPLACE(
          REGEXP_REPLACE(
          REGEXP_REPLACE(
          REGEXP_REPLACE(
            REGEXP_REPLACE(
                REGEXP_REPLACE(
                    LOWER(antibiotic),
                    'penicillin[^a-z].*$', 'penicillin'
                ),
                '^[^a-z]*|\\s+\\S*[^a-z\\s]+.*$|\\.+$', ''
            ),
              '\\s*\\d+(\\.\\d+)?\\s*(mg|mcg|gram|ml|%)', ''  -- Remove dosages or concentrations
          ),
          '\\(.*?\\)', ''  -- Remove text in parentheses
        ),' in.*$|tablet|capsule|intravenous|piggyback|' ||
                            'solution|suspension|oral|sodium|chloride|' ||
                            'injection|citrate|soln|dextrose|iv|' ||
                            'macrocrystals|macrocrystal|axetil|potassium|packet|' ||
                            'monohydrate|ethylsuccinate|powder|mandelate|' ||
                            'hyclate|hcl|hippurate|tromethamine|' ||
                            'million|unit|syrup|chewable|delayed|mphase|' ||
                            'release|benzathine|syringe|dispersible|' ||
                            'sulfate|procaine|blue|hyos|sod*phos|' ||
                            'susp|and|fosamil|extended|succinate|granules|' ||
                            'delay|pot|ext|rel|cyam|salicylate|salicyl|' ||
                            'sodphos|methylene|stearate|synergy', ''                   
        ),
        '\\d|\\sfor\\s*|/ml\\s*|\\sml\\s*|\\-+\\s*|\\,+\\s*',''
        )
        )
      )AS cleaned_antibiotic,
            COUNT(*) AS count
        FROM 
            som-nero-phi-jonc101.shc_core_2023.culture_sensitivity
        GROUP BY 
            cleaned_antibiotic
        HAVING 
            COUNT(*) >= 10  -- Include only antibiotics that appear 10 times or more
    ) AS antibiotic_counts 
    ON 


      INITCAP(TRIM(
          REGEXP_REPLACE(
          REGEXP_REPLACE(
          REGEXP_REPLACE(
          REGEXP_REPLACE(
            REGEXP_REPLACE(
                REGEXP_REPLACE(
                    LOWER(cs.antibiotic),
                    'penicillin[^a-z].*$', 'penicillin'
                ),
                '^[^a-z]*|\\s+\\S*[^a-z\\s]+.*$|\\.+$', ''
            ),
              '\\s*\\d+(\\.\\d+)?\\s*(mg|mcg|gram|ml|%)', ''  -- Remove dosages or concentrations
          ),
          '\\(.*?\\)', ''  -- Remove text in parentheses
        ),' in.*$|tablet|capsule|intravenous|piggyback|' ||
                            'solution|suspension|oral|sodium|chloride|' ||
                            'injection|citrate|soln|dextrose|iv|' ||
                            'macrocrystals|macrocrystal|axetil|potassium|packet|' ||
                            'monohydrate|ethylsuccinate|powder|mandelate|' ||
                            'hyclate|hcl|hippurate|tromethamine|' ||
                            'million|unit|syrup|chewable|delayed|mphase|' ||
                            'release|benzathine|syringe|dispersible|' ||
                            'sulfate|procaine|blue|hyos|sod*phos|' ||
                            'susp|and|fosamil|extended|succinate|granules|' ||
                            'delay|pot|ext|rel|cyam|salicylate|salicyl|' ||
                            'sodphos|methylene|stearate|synergy', ''                   
        ),
        '\\d|\\sfor\\s*|/ml\\s*|\\sml\\s*|\\-+\\s*|\\,+\\s*',''
        )
        )
      )= antibiotic_counts.cleaned_antibiotic
    WHERE
        NOT (
            cs.antibiotic LIKE '%InBasket%'  
            OR cs.antibiotic LIKE '%Beta Lactamase%'  
            OR cs.antibiotic LIKE '%BlaZ PCR%'  
            OR cs.antibiotic LIKE '%Carbapenemase%'  
            OR cs.antibiotic LIKE '%D-Test%'  
            OR cs.antibiotic LIKE '%Esbl%'  
            OR cs.antibiotic LIKE '%ermPCR%'  
            OR cs.antibiotic LIKE '%Mupirocin%'  
            OR cs.antibiotic LIKE '%IMP%'  
            OR cs.antibiotic LIKE '%Inducible Clindamycin%'  
            OR cs.antibiotic LIKE '%INTERNAL CONTROL%'  
            OR cs.antibiotic LIKE '%KPC%'  
            OR cs.antibiotic LIKE '%MecA PCR%'  
            OR cs.antibiotic LIKE '%NDM%'  
            OR cs.antibiotic LIKE '%Ox Plate Screen%'  
            OR cs.antibiotic LIKE '%OXA-48-LIKE%'  
            OR cs.antibiotic LIKE '%VIM%'  
            OR cs.antibiotic LIKE '%Method%'  
            OR cs.antibiotic LIKE '%INH%'   
            OR cs.antibiotic LIKE '%Polymyxin B%' 
            OR cs.antibiotic LIKE '%Nalidixic%'   
            OR cs.antibiotic LIKE '%Flucytosine%' 
            OR cs.antibiotic LIKE '%Rifampin%' 
            OR cs.antibiotic LIKE '%Ethambutol%' 
            OR cs.antibiotic LIKE '%Pyrazinamide%' 
            OR cs.antibiotic LIKE '%Clofazimine%' 
            OR cs.antibiotic LIKE '%Rifabutin%' 
            OR cs.antibiotic IN ('Posaconazole','Penicillin/Ampicillin','Omadacycline', 'Amphotericin B', 'Polymixin B', 'Fluconazole', 'Itraconazole', 'Caspofungin', 'Voriconazole', 'Anidulafungin', 'Micafungin', 'Isavuconazole', 'Antibiotic', 'OXA48-LIKE PCR', 'ESBL confirmation test', 'Oxacillin Screen')
        )
)

#########################################################################################################################
  -- Step 7: Final selection of required fields
#########################################################################################################################   

SELECT 
    acwf.anon_id,
    acwf.pat_enc_csn_id_coded,
    acwf.order_proc_id_coded,
    acwf.order_time_jittered_utc,
    acwf.result_time_jittered_utc,
    acwf.ordering_mode,
    acwf.culture_description,
    acwf.was_positive,
    pcd.organism,
    pcd.antibiotic,
    CASE
        WHEN pcd.susceptibility IS NULL THEN NULL  -- Keep NULL values unchanged
        WHEN pcd.susceptibility IN ('Susceptible', 'Not Detected', 'Negative' ) THEN 'Susceptible'
        WHEN pcd.susceptibility IN ('Resistant', 'Non Susceptible', 'Positive', 'Detected') THEN 'Resistant'
        WHEN pcd.susceptibility IN ('Intermediate', 'Susceptible - Dose Dependent') THEN 'Intermediate'
        WHEN pcd.susceptibility IN ('No Interpretation', 'Not done', 'Inconclusive', 'See Comment') THEN 'Inconclusive'
        WHEN pcd.susceptibility IN ('Synergy', 'No Synergy') THEN 'Synergism'
        ELSE 'Unknown'  -- Mark unexpected values as Unknown
    END AS susceptibility
FROM
    all_cultures_with_flag acwf
LEFT JOIN
    positive_culture_details pcd
ON
    acwf.order_proc_id_coded = pcd.order_proc_id_coded
WHERE
    -- Exclude rows where susceptibility would be 'Unknown'
    (pcd.susceptibility IS NULL OR
    pcd.susceptibility IN ('Susceptible', 'Positive', 'Detected',
                           'Resistant', 'Non Susceptible', 'Negative',
                           'Intermediate', 'Susceptible - Dose Dependent',
                           'No Interpretation', 'Not done', 'Inconclusive', 'See Comment',
                           'Synergy', 'No Synergy', 'Not Detected'));


Query is running:   0%|          |

Downloading:   0%|          |

In [15]:
print("the unique order number for urine only culture is {}".format(find_unique_orders(starting_cohort)))

the unique order number for urine only culture is 423185


<img src="Aim_4_Cohort_Flowchart_stepwise/Aim_4_Cohort_Flowchart_stepwise.002.png" alt="Step 1-2" width="500">

In [5]:
# %%bigquery --use_rest_api starting_cohort
# select * from `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort`

Query is running:   0%|          |

Downloading:   0%|          |

# ---------------------------------------**Step 3** -------------------------------------------

### Refactor Fateme's code for ward info in order to select ED patient
Reference: https://github.com/HealthRex/CDSS/blob/master/scripts/antibiotic-susceptibility/sql/queries/microbiology_cultures_ward_info.sql

In [18]:
%%bigquery --use_rest_api df_hosp_ward_info
WITH
-- Step 1: Extract ER and ICU Information from adt Table
temp_er_icu_info_adt AS (
    SELECT
        anon_id,
        pat_enc_csn_id_coded,
        CASE 
            WHEN pat_class = 'Emergency' OR pat_class = 'Emergency Services' THEN 1
            ELSE 0
        END AS hosp_ward_ER,
        CASE 
            WHEN pat_class = 'Intensive Care (IC)' THEN 1
            ELSE 0
        END AS hosp_ward_ICU,
        CASE 
            WHEN pat_lv_of_care LIKE "%Critical Care" THEN 1
            ELSE 0
        END AS hosp_ward_Critical_Care
    FROM
        `som-nero-phi-jonc101.shc_core_2023.adt`
),

-- Step 2: Extract ER Information from order_proc Table
temp_er_info_order_proc AS (
    SELECT
        anon_id,
        pat_enc_csn_id_coded,
        order_proc_id_coded,
        CASE 
            WHEN proc_pat_class = 'Emergency' OR proc_pat_class = 'Emergency Services' THEN 1
            ELSE 0
        END AS hosp_ward_ER_order_proc
    FROM
        `som-nero-phi-jonc101.shc_core_2023.order_proc`
),

-- Step 3: Combine ER and ICU Information
temp_combined_er_icu_info AS (
    SELECT
        adt.anon_id,
        adt.pat_enc_csn_id_coded,
        adt.hosp_ward_ER,
        adt.hosp_ward_ICU,
        adt.hosp_ward_Critical_Care,
        er.order_proc_id_coded,
        er.hosp_ward_ER_order_proc
    FROM
        temp_er_icu_info_adt adt
    LEFT JOIN
        temp_er_info_order_proc er
    ON
        adt.pat_enc_csn_id_coded = er.pat_enc_csn_id_coded
),

-- Step 4: Extract IP and OP Information from order_proc Table
temp_ip_op_info AS (
    SELECT
        anon_id,
        pat_enc_csn_id_coded,
        order_proc_id_coded,
        order_time_jittered_utc,
        CASE 
            WHEN ordering_mode = 'Inpatient' THEN 1
            ELSE 0
        END AS hosp_ward_IP,
        CASE 
            WHEN ordering_mode = 'Outpatient' THEN 1
            ELSE 0
        END AS hosp_ward_OP
    FROM
        `som-nero-phi-jonc101.shc_core_2023.order_proc`
),

-- Step 5: Combine All Information into One Temporary Table
temp_combined_hosp_ward_info AS (
    SELECT
        ipop.anon_id,
        ipop.pat_enc_csn_id_coded,
        ipop.order_proc_id_coded,
        ipop.order_time_jittered_utc,
        ipop.hosp_ward_IP,
        ipop.hosp_ward_OP,
        COALESCE(icu.hosp_ward_ER, 0) AS hosp_ward_ER_adt,
        COALESCE(icu.hosp_ward_ER_order_proc, 0) AS hosp_ward_ER_order_proc,
        COALESCE(icu.hosp_ward_ICU, 0) AS hosp_ward_ICU,
        COALESCE(icu.hosp_ward_Critical_Care, 0) AS hosp_ward_Critical_Care
    FROM
        temp_ip_op_info ipop
    LEFT JOIN
        temp_combined_er_icu_info icu
    ON
        ipop.pat_enc_csn_id_coded = icu.pat_enc_csn_id_coded AND ipop.order_proc_id_coded = icu.order_proc_id_coded
),

-- Step 6: Extract ICU stay based on transfer orders
temp_cohortOfInterest AS (
    SELECT DISTINCT
        pat_enc_csn_id_coded,
        hosp_disch_time_jittered_utc
    FROM `som-nero-phi-jonc101.shc_core_2023.encounter`
    WHERE hosp_disch_time_jittered_utc IS NOT NULL
),

temp_ordersTransfer AS (
    SELECT DISTINCT
        pat_enc_csn_id_coded,
        description,
        level_of_care,
        service,
        order_inst_jittered_utc
    FROM `som-nero-phi-jonc101.shc_core_2023.order_proc` AS procedures
    WHERE (description LIKE "CHANGE LEVEL OF CARE/TRANSFER PATIENT" OR description LIKE "ADMIT TO INPATIENT") AND level_of_care IS NOT NULL
),

temp_icuTransferCount AS (
    SELECT
        mc.pat_enc_csn_id_coded,
        COUNT(CASE WHEN level_of_care LIKE "Critical Care" THEN 1 END) AS numICUTransfers
    FROM
       `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort` mc # only change this to the starting cohort above
    LEFT JOIN
        temp_ordersTransfer ot
    ON
        mc.pat_enc_csn_id_coded = ot.pat_enc_csn_id_coded
    GROUP BY
        mc.pat_enc_csn_id_coded
),

microbiology_cultures_with_icu_flag AS (
    SELECT DISTINCT
        mc.anon_id,
        mc.pat_enc_csn_id_coded,
        mc.order_proc_id_coded,
        mc.order_time_jittered_utc,
        CASE WHEN itc.numICUTransfers > 0 THEN 1 ELSE 0 END AS icu_flag
    FROM
       `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort` mc
    LEFT JOIN
        temp_icuTransferCount itc
    ON
        mc.pat_enc_csn_id_coded = itc.pat_enc_csn_id_coded
)

-- Step 7: Create the Final Table with Correct Binary Indicators for Each Hospital Ward and ICU Flag
SELECT
    mc.anon_id,
    mc.pat_enc_csn_id_coded,
    mc.order_proc_id_coded,
    mc.order_time_jittered_utc,
    MAX(CASE WHEN chwi.hosp_ward_IP = 1 THEN 1 ELSE 0 END) AS hosp_ward_IP,
    MAX(CASE WHEN chwi.hosp_ward_OP = 1 THEN 1 ELSE 0 END) AS hosp_ward_OP,
    MAX(CASE WHEN chwi.hosp_ward_ER_adt = 1 OR chwi.hosp_ward_ER_order_proc = 1 THEN 1 ELSE 0 END) AS hosp_ward_ER,
    MAX(
        CASE 
            WHEN chwi.hosp_ward_ICU = 1 THEN 1 
            WHEN icu_flag.icu_flag = 1 THEN 1 
            WHEN chwi.hosp_ward_Critical_Care = 1 THEN 1
            ELSE 0 
        END
    ) AS hosp_ward_ICU
FROM
   `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort` mc
LEFT JOIN
    temp_combined_hosp_ward_info chwi
ON
    mc.anon_id = chwi.anon_id 
    AND mc.pat_enc_csn_id_coded = chwi.pat_enc_csn_id_coded 
    AND mc.order_proc_id_coded = chwi.order_proc_id_coded
LEFT JOIN
    microbiology_cultures_with_icu_flag icu_flag
ON
    mc.anon_id = icu_flag.anon_id 
    AND mc.pat_enc_csn_id_coded = icu_flag.pat_enc_csn_id_coded 
    AND mc.order_proc_id_coded = icu_flag.order_proc_id_coded
GROUP BY
    mc.anon_id, 
    mc.pat_enc_csn_id_coded, 
    mc.order_proc_id_coded, 
    mc.order_time_jittered_utc;

Query is running:   0%|          |

Downloading:   0%|          |

In [6]:
# %%bigquery --use_rest_api df_hosp_ward_info
# select * from `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_ward_info`

Query is running:   0%|          |

Downloading:   0%|          |

### Refactor Fateme's code to find all prior med information for my starting cohort
Reference: https://github.com/HealthRex/CDSS/blob/master/scripts/antibiotic-susceptibility/sql/queries/time-to-event-augmented-queries/medication_exposure.sql


In [None]:
%%bigquery --use_rest_api microbiology_urine_cultures_prior_med_augmented
##############################################################################################################################################################
# Goal:Creating the microbiology_cultures_prior_med_augmented Table. This table indicates of a patient having been treated with a specific antibiotic before specimen collection.
##############################################################################################################################################################
# CREATE OR REPLACE TABLE `som-nero-phi-jonc101.antimicrobial_stewardship.microbiology_cultures_prior_med_augmented` AS (
WITH base_cohort AS (
    SELECT DISTINCT
        anon_id,
        pat_enc_csn_id_coded,
        order_proc_id_coded,
        order_time_jittered_utc,
        result_time_jittered_utc
    FROM 
        `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort`
),
cleaned_medications AS (
    SELECT 
        c.anon_id,
        c.pat_enc_csn_id_coded,
        c.order_proc_id_coded,
        c.order_time_jittered_utc,
        c.result_time_jittered_utc,
        mo.ordering_date_jittered_utc as medication_time,
        INITCAP(
            REGEXP_REPLACE(
                REGEXP_REPLACE(
                    TRIM(
                        REGEXP_REPLACE(
                            REGEXP_REPLACE(
                                REGEXP_REPLACE(
                                    LOWER(mm.name),  -- Convert to lowercase
                                    '\\s*\\d+(\\.\\d+)?\\s*(mg|mcg|gram|ml|%)', ''  -- Remove dosages or concentrations
                                ),
                                '\\(.*?\\)', ''  -- Remove text in parentheses
                            ),
                            ' in.*$|tablet|capsule|intravenous|piggyback|' ||
                            'solution|suspension|oral|sodium|chloride|' ||
                            'injection|citrate|soln|dextrose|iv|' ||
                            'macrocrystals|macrocrystal|axetil|potassium|packet|' ||
                            'monohydrate|ethylsuccinate|powder|mandelate|' ||
                            'hyclate|hcl|hippurate|tromethamine|' ||
                            'million|unit|syrup|chewable|delayed|mphase|' ||
                            'release|benzathine|syringe|dispersible|' ||
                            'sulfate|procaine|blue|hyos|sod*phos|' ||
                            'susp|and|fosamil|extended|succinate|granules|' ||
                            'delay|pot|ext|rel|cyam|salicylate|salicyl|' ||
                            'sodphos|methylene|stearate|synergy', ''  -- Remove pharmacy filler words and "synergy"
                        )
                    ),
                    '\\d|\\sfor\\s*|\\ser\\s*|\\shr\\s*|/ml\\s*|' ||
                    '\\sml\\s*|\\sv\\s*|\\sg\\s*|\\sim\\s*', ''  -- General cleaning for non-relevant patterns
                ),
                '\\s|\\/|\\.|-$', ''  -- Remove extra characters like spaces, slashes, dots, etc.
            )
        ) AS medication_name,
        TIMESTAMP_DIFF(c.order_time_jittered_utc,mo.ordering_date_jittered_utc,day) as medication_time_to_cultureTime,       
    FROM 
        base_cohort c
    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.order_med` mo
        using(anon_id)  # this includes all prior meds
    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm
    ON mo.med_description = mm.name # this is problematic
),
valid_antibiotics AS (
    SELECT 
        pae.anon_id,
        pae.pat_enc_csn_id_coded,
        pae.order_proc_id_coded,
        pae.order_time_jittered_utc,
        pae.medication_name,
        pae.medication_time,
        pae.medication_time_to_cultureTime,
        CASE WHEN medication_name = 'Nitrofurantoin' THEN 'NIT'
    WHEN medication_name = 'Cephalexin' THEN 'CEP'
    WHEN medication_name = 'Piperacillin-Tazobactam-Dextrs' THEN 'PIP'
    WHEN medication_name = 'Sulfamethoxazole-Trimethoprim' THEN 'SUL'
    WHEN medication_name = 'Ciprofloxacin Hcl' THEN 'CIP'
    WHEN medication_name = 'Cefazolin' THEN 'CEF'
    WHEN medication_name = 'Cefazolin In Dextrose' THEN 'CEF1'
    WHEN medication_name = 'Levofloxacin' THEN 'LEV'
    WHEN medication_name = 'Azithromycin' THEN 'AZI'
    WHEN medication_name = 'Amoxicillin-Pot Clavulanate' THEN 'AMO'
    WHEN medication_name = 'Metronidazole In Nacl' THEN 'MET'
    WHEN medication_name = 'Ceftriaxone' THEN 'CEF2'
    WHEN medication_name = 'Vancomycin' THEN 'VAN'
    WHEN medication_name = 'Levofloxacin In' THEN 'LEV1'
    WHEN medication_name = 'Vancomycin In Dextrose' THEN 'VAN1'
    WHEN medication_name = 'Metronidazole' THEN 'MET1'
    WHEN medication_name = 'Ciprofloxacin In' THEN 'CIP1'
    WHEN medication_name = 'Doxycycline Hyclate' THEN 'DOX'
    WHEN medication_name = 'Cefpodoxime' THEN 'CEF3'
    WHEN medication_name = 'Piperacillin-Tazobactam' THEN 'PIP1'
    WHEN medication_name = 'Rifaximin' THEN 'RIF'
    WHEN medication_name = 'Vancomycin-Diluent Combo' THEN 'VAN2'
    WHEN medication_name = 'Clindamycin In' THEN 'CLI'
    WHEN medication_name = 'Amoxicillin' THEN 'AMO1'
    WHEN medication_name = 'Nitrofurantoin Macrocrystal' THEN 'NIT1'
    WHEN medication_name = 'Macrobid' THEN 'MAC'
    WHEN medication_name = 'Gentamicin-Sodium Citrate' THEN 'GEN'
    WHEN medication_name = 'Cefdinir' THEN 'CEF4'
    WHEN medication_name = 'Clindamycin Phosphate' THEN 'CLI1'
    WHEN medication_name = 'Cefoxitin' THEN 'CEF5'
    WHEN medication_name = 'Cipro' THEN 'CIP2'
    WHEN medication_name = 'Clindamycin Hcl' THEN 'CLI2'
    WHEN medication_name = 'Vancomycin In' THEN 'VAN3'
    WHEN medication_name = 'Moxifloxacin' THEN 'MOX'
    WHEN medication_name = 'Gentamicin' THEN 'GEN1'
    WHEN medication_name = 'Linezolid' THEN 'LIN'
    WHEN medication_name = 'Zithromax' THEN 'ZIT'
    WHEN medication_name = 'Erythromycin' THEN 'ERY'
    WHEN medication_name = 'Bactrim Ds' THEN 'BAC'
    WHEN medication_name = 'Fosfomycin Tromethamine' THEN 'FOS'
    WHEN medication_name = 'Cefepime' THEN 'CEF6'
    WHEN medication_name = 'Keflex' THEN 'KEF'
    WHEN medication_name = 'Colistin' THEN 'COL'
    WHEN medication_name = 'Doxycycline Monohydrate' THEN 'DOX1'
    WHEN medication_name = 'Levaquin' THEN 'LEV2'
    WHEN medication_name = 'Clarithromycin' THEN 'CLA'
    WHEN medication_name = 'Rifampin' THEN 'RIF1'
    WHEN medication_name = 'Ciprofloxacin' THEN 'CIP3'
    WHEN medication_name = 'Cefuroxime Axetil' THEN 'CEF7'
    WHEN medication_name = 'Augmentin' THEN 'AUG'
    WHEN medication_name = 'Cefadroxil' THEN 'CEF8'
    WHEN medication_name = 'Methenamine Hippurate' THEN 'MET2'
    WHEN medication_name = 'Ertapenem' THEN 'ERT'
    WHEN medication_name = 'Linezolid In Dextrose' THEN 'LIN1'
    WHEN medication_name = 'Ofloxacin' THEN 'OFL'
    WHEN medication_name = 'Penicillin' THEN 'PEN'
    WHEN medication_name = 'Silver Sulfadiazine' THEN 'SIL'
    WHEN medication_name = 'Dapsone' THEN 'DAP'
    WHEN medication_name = 'Ciprofloxacin-Dexamethasone' THEN 'CIP4'
    WHEN medication_name = 'Ampicillin Sodium' THEN 'AMP'
    WHEN medication_name = 'Isoniazid' THEN 'ISO'
    WHEN medication_name = 'Bactrim' THEN 'BAC1'
    WHEN medication_name = 'Fidaxomicin' THEN 'FID'
    WHEN medication_name = 'Aztreonam In' THEN 'AZT'
    WHEN medication_name = 'Ethambutol' THEN 'ETH'
    WHEN medication_name = 'Tobramycin Sulfate' THEN 'TOB'
    WHEN medication_name = 'Cefepime In' THEN 'CEF9'
    WHEN medication_name = 'Ampicillin' THEN 'AMP1'
    WHEN medication_name = 'Minocycline' THEN 'MIN'
    WHEN medication_name = 'Ceftazidime-Dextrose' THEN 'CEF10'
    WHEN medication_name = 'Aztreonam' THEN 'AZT1'
    WHEN medication_name = 'Xifaxan' THEN 'XIF'
    WHEN medication_name = 'Erythromycin Ethylsuccinate' THEN 'ERY1'
    WHEN medication_name = 'Gentamicin In Nacl' THEN 'GEN2'
    WHEN medication_name = 'Meropenem' THEN 'MER'
    WHEN medication_name = 'Gatifloxacin' THEN 'GAT'
    WHEN medication_name = 'Flagyl' THEN 'FLA'
    WHEN medication_name = 'Macrodantin' THEN 'MAC1'
    WHEN medication_name = 'Amikacin' THEN 'AMI'
    WHEN medication_name = 'Trimethoprim' THEN 'TRI'
    WHEN medication_name = 'Tobramycin-Dexamethasone' THEN 'TOB1'
    WHEN medication_name = 'Dicloxacillin' THEN 'DIC'
    WHEN medication_name = 'Moxifloxacin-Sod.Chloride(Iso)' THEN 'MOX1'
    WHEN medication_name = 'Hiprex' THEN 'HIP'
    WHEN medication_name = 'Ceftazidime' THEN 'CEF11'
    WHEN medication_name = 'Zyvox' THEN 'ZYV'
    WHEN medication_name = 'Methenamine Mandelate' THEN 'MET3'
    WHEN medication_name = 'Rifabutin' THEN 'RIF2'
    WHEN medication_name = 'Tedizolid' THEN 'TED'
    end as medication_category
    FROM 
        cleaned_medications pae
   where pae.medication_name in (select distinct antibiotic_name from `som-nero-phi-jonc101.antimicrobial_stewardship.temp_antibiotics`) # this is problematic
)

select c.*,
        pae.medication_name,
        pae.medication_time,
        pae.medication_time_to_cultureTime,
        pae.medication_category
from base_cohort c
left join valid_antibiotics pae
using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc)
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time,medication_time_to_cultureTime,medication_name,medication_category
order by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time_to_cultureTime,medication_time,medication_name,medication_category

In [7]:
%%bigquery --use_rest_api microbiology_urine_cultures_prior_med_augmented
select * from `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_prior_med_augmented`


Query is running:   0%|          |

Downloading:   0%|          |

# Part 1

# microbiology_urine_cultures_prior_med_augmented, what we been exploring recently

### Let's do a demo analysis on this cohort

In [26]:
microbiology_urine_cultures_prior_med_augmented

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,medication_name,medication_time,medication_time_to_cultureTime,medication_category
0,JC1005213,131020439517,406688461,2012-09-16 16:54:00+00:00,2012-09-18 18:07:00+00:00,Amikacin,2017-11-12 08:00:00+00:00,-1882,AMI
1,JC1011062,131267776141,624393734,2019-07-18 20:20:00+00:00,2019-07-20 19:16:00+00:00,Linezolid,2020-10-26 07:00:00+00:00,-465,LIN
2,JC1011062,131298109295,691632647,2020-10-14 18:56:00+00:00,2020-10-16 14:47:00+00:00,Linezolid,2020-10-26 07:00:00+00:00,-11,LIN
3,JC1011062,131312158352,730034167,2021-05-13 15:34:00+00:00,2021-05-15 15:58:00+00:00,Linezolid,2020-10-26 07:00:00+00:00,199,LIN
4,JC1012926,131260710141,588017762,2018-12-13 16:18:00+00:00,2018-12-15 14:33:00+00:00,Linezolid,2018-11-21 08:00:00+00:00,22,LIN
...,...,...,...,...,...,...,...,...,...
1041190,JC993259,131258025537,578644003,2018-09-28 03:06:00+00:00,2018-09-30 00:13:00+00:00,Vancomycin,2022-10-17 07:00:00+00:00,-1480,VAN
1041191,JC994004,131237828673,534847064,2017-09-21 09:14:00+00:00,2017-09-25 20:27:00+00:00,Vancomycin,2017-09-22 07:00:00+00:00,0,VAN
1041192,JC994717,131015698606,390421763,2011-11-06 19:31:00+00:00,2011-11-08 16:19:00+00:00,Vancomycin,2016-08-13 07:00:00+00:00,-1741,VAN
1041193,JC994717,131189127911,499607574,2016-08-14 05:25:00+00:00,2016-08-16 15:17:00+00:00,Vancomycin,2016-08-13 07:00:00+00:00,0,VAN


### Total number of unique urine culture order is 423185

In [24]:
find_unique_orders(microbiology_urine_cultures_prior_med_augmented)

423185

### Step 1: Total number of unique ED orders is 111862

In [15]:
ED_order = df_hosp_ward_info[df_hosp_ward_info['hosp_ward_ER'] == 1]
ED_order

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,hosp_ward_IP,hosp_ward_OP,hosp_ward_ER,hosp_ward_ICU
311323,JC1783941,131355397449,879930149,2023-06-02 20:17:00+00:00,0,1,1,0
311324,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1
311325,JC561062,131152772437,480348716,2015-10-31 05:52:00+00:00,1,0,1,0
311326,JC1426476,131076943795,459620731,2015-03-30 04:46:00+00:00,1,0,1,0
311327,JC689618,131289176218,674136233,2020-07-11 01:41:00+00:00,1,0,1,1
...,...,...,...,...,...,...,...,...
423180,JC2511105,131186021383,493795751,2016-05-31 04:09:00+00:00,1,0,1,0
423181,JC1266055,131032973898,439244514,2014-05-23 10:46:00+00:00,1,0,1,0
423182,JC815723,131366627809,920688268,2023-10-26 23:26:00+00:00,1,0,1,0
423183,JC573303,131331405981,790075838,2022-04-18 20:01:00+00:00,1,0,1,0


### Step 2: merge ED order with microbiology_urine_cultures_prior_med_augmented

In [16]:
ED_culture = starting_cohort.merge(ED_order, on=['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc']).\
    drop(columns=["hosp_ward_IP", "hosp_ward_OP", "hosp_ward_ER", "hosp_ward_ICU"])
assert find_unique_orders(ED_culture) == len(ED_order) # this confirms that all ED orders are included in the ED culture dataframe
ED_culture

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,ordering_mode,culture_description,was_positive,organism,antibiotic,susceptibility
0,JC865576,131004065645,355748550,2009-08-22 02:15:00+00:00,2009-08-24 00:07:00+00:00,Inpatient,URINE,0,,,
1,JC1219600,131004074764,355815166,2009-09-05 04:01:00+00:00,2009-09-07 02:27:00+00:00,Inpatient,URINE,0,,,
2,JC1263064,131006585638,355857316,2009-09-19 02:02:00+00:00,2009-09-21 00:01:00+00:00,Inpatient,URINE,0,,,
3,JC574462,131006629494,356049733,2009-09-16 05:39:00+00:00,2009-09-18 01:10:00+00:00,Inpatient,URINE,0,,,
4,JC971272,131006666735,356201366,2009-09-06 21:22:00+00:00,2009-09-08 19:39:00+00:00,Inpatient,URINE,0,,,
...,...,...,...,...,...,...,...,...,...,...,...
767856,JC728639,131368437417,927624571,2024-01-01 08:25:00+00:00,2024-01-03 15:27:00+00:00,Inpatient,URINE,0,,,
767857,JC6457401,131368515977,927935197,2024-01-03 19:36:00+00:00,2024-01-05 15:18:00+00:00,Inpatient,URINE,0,,,
767858,JC6110624,131368544703,928054501,2023-12-19 02:03:00+00:00,2023-12-20 21:45:00+00:00,Inpatient,URINE,0,,,
767859,JC2875927,131368529510,928062971,2023-12-18 03:28:00+00:00,2023-12-19 18:03:00+00:00,Inpatient,URINE,0,,,


In [17]:
ED_culture_merged_med = ED_culture.merge(microbiology_urine_cultures_prior_med_augmented, 
                                         on =['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', "order_time_jittered_utc"] )
ED_culture_merged_med = ED_culture_merged_med.drop(columns=['result_time_jittered_utc_y'])
ED_culture_merged_med = ED_culture_merged_med.rename(columns={'result_time_jittered_utc_x': 'result_time_jittered_utc'})
assert find_unique_orders(ED_culture_merged_med) == len(ED_order) # this confirms that all ED cultures are included in the ED culture dataframe
ED_culture_merged_med

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,ordering_mode,culture_description,was_positive,organism,antibiotic,susceptibility,medication_name,medication_time,medication_time_to_cultureTime,medication_category
0,JC865576,131004065645,355748550,2009-08-22 02:15:00+00:00,2009-08-24 00:07:00+00:00,Inpatient,URINE,0,,,,Levofloxacin,2014-08-22 07:00:00+00:00,-1826,LEV
1,JC865576,131004065645,355748550,2009-08-22 02:15:00+00:00,2009-08-24 00:07:00+00:00,Inpatient,URINE,0,,,,Levofloxacin,2008-09-04 07:00:00+00:00,351,LEV
2,JC865576,131004065645,355748550,2009-08-22 02:15:00+00:00,2009-08-24 00:07:00+00:00,Inpatient,URINE,0,,,,Levofloxacin,2008-09-03 07:00:00+00:00,352,LEV
3,JC865576,131004065645,355748550,2009-08-22 02:15:00+00:00,2009-08-24 00:07:00+00:00,Inpatient,URINE,0,,,,Levofloxacin,2010-07-14 07:00:00+00:00,-326,LEV
4,JC865576,131004065645,355748550,2009-08-22 02:15:00+00:00,2009-08-24 00:07:00+00:00,Inpatient,URINE,0,,,,Levofloxacin,2010-07-13 07:00:00+00:00,-325,LEV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2796930,JC6457401,131368515977,927935197,2024-01-03 19:36:00+00:00,2024-01-05 15:18:00+00:00,Inpatient,URINE,0,,,,,NaT,,
2796931,JC6110624,131368544703,928054501,2023-12-19 02:03:00+00:00,2023-12-20 21:45:00+00:00,Inpatient,URINE,0,,,,,NaT,,
2796932,JC2875927,131368529510,928062971,2023-12-18 03:28:00+00:00,2023-12-19 18:03:00+00:00,Inpatient,URINE,0,,,,,NaT,,
2796933,JC2050507,131368734203,928668178,2024-01-25 05:37:00+00:00,2024-01-27 15:23:00+00:00,Inpatient,URINE,0,,,,Cefazolin,2023-05-13 07:00:00+00:00,256,CEF


### Step 3: Filter culture order with Med history: 63845 orders have **current** + **prior** med history
the problem is: some orders might noe prior med but not current med

### 

In [18]:
order_with_med = ED_culture_merged_med[~ED_culture_merged_med["medication_time"].isnull()]
find_unique_orders(order_with_med)

63845

### Step 4: Filter for orders with emprical antibiotic prescription
### Filter condition:
1. medication_time is greater than culture order time but smaller than result time, OR
2. medication_time is within 6 hours before the culture order time

In [19]:
# Group by the specified columns
grouped = ED_culture_merged_med.groupby(['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc'])

# Function to filter each group
def filter_group_exclusion(group):
    # Check if any medication_time_to_cultureTime is between 6 hours and 720 hours (30 days)
    # time_diff_hours = (group['medication_time'] - group['order_time_jittered_utc']).dt.total_seconds() / 3600  # Convert to hours
    # if any((time_diff_hours > 6) & (time_diff_hours < 720)):
    #     return None  # Disregard the whole group
   
        # Keep rows where:
        # 1. medication_time is greater than culture order time but smaller than result time, OR
        # 2. medication_time is within 6 hours before the culture order time
        condition = (
            ((group['medication_time'] > group['order_time_jittered_utc']) & 
                (group['medication_time'] < group['result_time_jittered_utc'])) | 
            ((group['medication_time'] >= (group['order_time_jittered_utc'] - pd.Timedelta(hours=6))) & 
                (group['medication_time'] <= group['order_time_jittered_utc'])
        ))
        return group[condition]

# Apply the filter to each group
filtered_groups_exclusion = [filter_group_exclusion(group) for _, group in grouped]

# Combine the filtered groups into a new DataFrame
filtered_df_exclusion = pd.concat([group for group in filtered_groups_exclusion if group is not None])

# Reset the index
filtered_df_exclusion = filtered_df_exclusion.reset_index(drop=True)


### The unique order with empirical order is 8288 (only current med)

In [33]:
find_unique_orders(filtered_df_exclusion)

8288

### Step 5: Filter for orders with emprical antibiotic prescription and without prior antibiotic exposure 
### filter criteria is: Check if any medication_time_to_cultureTime is between 6 hours and 720 hours (30 days)

In [20]:
# Group by the specified columns
grouped = ED_culture_merged_med.groupby(['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc'])

# Function to filter each group
def filter_group(group):
    # Check if any medication_time_to_cultureTime is between 6 hours and 720 hours (30 days)
    time_diff_hours = (group['medication_time'] - group['order_time_jittered_utc']).dt.total_seconds() / 3600  # Convert to hours
    if any((time_diff_hours > 6) & (time_diff_hours < 720)):
        return None  # Disregard the whole group
    else:
        # Keep rows where:
        # 1. medication_time is greater than culture order time but smaller than result time, OR
        # 2. medication_time is within 6 hours before the culture order time
        condition = (
            ((group['medication_time'] > group['order_time_jittered_utc']) & 
             (group['medication_time'] < group['result_time_jittered_utc'])) | 
            ((group['medication_time'] >= (group['order_time_jittered_utc'] - pd.Timedelta(hours=6))) & 
             (group['medication_time'] <= group['order_time_jittered_utc'])
        ))
        return group[condition]

# Apply the filter to each group
filtered_groups = [filter_group(group) for _, group in grouped]

# Combine the filtered groups into a new DataFrame
filtered_df = pd.concat([group for group in filtered_groups if group is not None])

# Reset the index
filtered_df = filtered_df.reset_index(drop=True)


### The unique order with empirical order is 3099 (only current med)

In [35]:
find_unique_orders(filtered_df)

3099

# Part 2

## Problems

### Problem 1
### The number jumps from 63845(step 3) to 8288(step4) is because of we mixed up the current med and prior med, so we should focus on current med now

### Problem 2
### THe number of unique antibiotic prescribed is too few: only 21

In [36]:
abx_lst = microbiology_urine_cultures_prior_med_augmented["medication_name"].unique()
print(abx_lst)
print("the number of unique antibiotics is: {}".format(len(abx_lst)))

['Amikacin' 'Linezolid' 'Colistin' 'Ceftazidime' 'Penicillin' 'Cipro'
 'Levaquin' 'Cefoxitin' 'Isoniazid' None 'Ampicillin' 'Aztreonam'
 'Cefazolin' 'Ceftriaxone' 'Cefepime' 'Ciprofloxacin' 'Ertapenem'
 'Gentamicin' 'Levofloxacin' 'Metronidazole' 'Vancomycin']
the number of unique antibiotics is: 21


## So I removed the second part of the code that filters out antibiotic and instead kept only current medication info

### Refactor Fateme's prior_med_augmented code to include only current medication and keep all medication

In [None]:
%%bigquery --use_rest_api current_med
##############################################################################################################################################################
# Goal:Creating the microbiology_cultures_prior_med_augmented Table. This table indicates of a patient having been treated with a specific antibiotic before specimen collection.
##############################################################################################################################################################
# CREATE OR REPLACE TABLE `som-nero-phi-jonc101.antimicrobial_stewardship.microbiology_cultures_prior_med_augmented` AS (
WITH base_c AS (
    SELECT DISTINCT
        anon_id,
        pat_enc_csn_id_coded,
        order_proc_id_coded,
        order_time_jittered_utc,
        result_time_jittered_utc
    FROM 
        `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort`
),
cleaned_medications AS (
    SELECT 
        c.anon_id,
        c.pat_enc_csn_id_coded,
        c.order_proc_id_coded,
        c.order_time_jittered_utc,
        c.result_time_jittered_utc,
        mo.ordering_date_jittered_utc as medication_time,
        INITCAP(
            REGEXP_REPLACE(
                REGEXP_REPLACE(
                    TRIM(
                        REGEXP_REPLACE(
                            REGEXP_REPLACE(
                                REGEXP_REPLACE(
                                    LOWER(mm.name),  -- Convert to lowercase
                                    '\\s*\\d+(\\.\\d+)?\\s*(mg|mcg|gram|ml|%)', ''  -- Remove dosages or concentrations
                                ),
                                '\\(.*?\\)', ''  -- Remove text in parentheses
                            ),
                            ' in.*$|tablet|capsule|intravenous|piggyback|' ||
                            'solution|suspension|oral|sodium|chloride|' ||
                            'injection|citrate|soln|dextrose|iv|' ||
                            'macrocrystals|macrocrystal|axetil|potassium|packet|' ||
                            'monohydrate|ethylsuccinate|powder|mandelate|' ||
                            'hyclate|hcl|hippurate|tromethamine|' ||
                            'million|unit|syrup|chewable|delayed|mphase|' ||
                            'release|benzathine|syringe|dispersible|' ||
                            'sulfate|procaine|blue|hyos|sod*phos|' ||
                            'susp|and|fosamil|extended|succinate|granules|' ||
                            'delay|pot|ext|rel|cyam|salicylate|salicyl|' ||
                            'sodphos|methylene|stearate|synergy', ''  -- Remove pharmacy filler words and "synergy"
                        )
                    ),
                    '\\d|\\sfor\\s*|\\ser\\s*|\\shr\\s*|/ml\\s*|' ||
                    '\\sml\\s*|\\sv\\s*|\\sg\\s*|\\sim\\s*', ''  -- General cleaning for non-relevant patterns
                ),
                '\\s|\\/|\\.|-$', ''  -- Remove extra characters like spaces, slashes, dots, etc.
            )
        ) AS medication_name,
        TIMESTAMP_DIFF(c.order_time_jittered_utc,mo.ordering_date_jittered_utc,day) as medication_time_to_cultureTime,       
    FROM 
        base_c c
    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.order_med` mo
        using(anon_id, pat_enc_csn_id_coded) # i added the pat_enc_csn_id_coded here to make sure we are only getting meds for the same encounter
    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm
    ON mo.med_description = mm.name # this is problematic 
)

select * 
from cleaned_medications 
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time,medication_time_to_cultureTime,medication_name
order by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time_to_cultureTime,medication_time,medication_name

In [8]:
%%bigquery --use_rest_api current_med
select * from `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_cultures_current_med`

Query is running:   0%|          |

Downloading:   0%|          |

### Total number of unique urine culture order is still 423185

In [37]:
find_unique_orders(current_med)

423185

###  so this line of code is only checking for exact match and leaves out some other matches:   
... where pae.medication_name in (select distinct antibiotic_name from `som-nero-phi-jonc101.antimicrobial_stewardship.temp_antibiotics`) # this is problematic

In [25]:
# examples
# Ampicillin,Levaquin, Levofloxacin,
current_med[current_med["medication_name"].fillna("").str.contains("Levofloxacin")]

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,medication_time,medication_name,medication_time_to_cultureTime
142,JC1479886,131021647430,412484035,2013-01-08 08:40:00+00:00,2013-01-10 00:05:00+00:00,2013-01-17 08:00:00+00:00,Levofloxacinpotabs,-8
457,JC1480655,131024107342,418623643,2013-03-24 08:51:00+00:00,2013-03-26 14:29:00+00:00,2013-03-18 07:00:00+00:00,Levofloxacin,6
673,JC1480658,131023712725,423731503,2013-08-10 08:22:00+00:00,2013-08-12 14:59:00+00:00,2013-05-09 07:00:00+00:00,Levofloxacin,93
994,JC1481750,131018903932,402804701,2012-05-20 02:54:00+00:00,2012-05-26 20:30:00+00:00,2012-05-08 07:00:00+00:00,Levofloxacin,11
1302,JC1482746,131022024415,412555072,2013-01-25 00:28:00+00:00,2013-01-27 00:13:00+00:00,2013-02-01 08:00:00+00:00,Levofloxacin,-7
...,...,...,...,...,...,...,...,...
6192909,JC544863,131185056473,493823133,2016-04-11 14:42:00+00:00,2016-04-13 16:48:00+00:00,2016-04-24 07:00:00+00:00,Levofloxacinpotabs,-12
6193543,JC545499,131282123042,648321785,2019-12-28 06:35:00+00:00,2019-12-30 17:16:00+00:00,2019-12-31 08:00:00+00:00,Levofloxacinpotabs,-3
6193724,JC545625,17043453,338405416,2008-11-08 14:32:00+00:00,2008-11-11 16:50:00+00:00,2008-11-09 08:00:00+00:00,Levofloxacinpotabs,0
6193845,JC545843,131024759261,420279581,2013-05-28 04:00:00+00:00,2013-05-31 01:27:00+00:00,2013-05-27 07:00:00+00:00,Levofloxacin,0


# Now let's check orders with any current medication

## Step 1: check orders with any current medication including non-antibiotic medication
 423185 -> 216783

In [648]:
# medication time of null indicautes no antibiotic prescription 
find_unique_orders(current_med[~current_med["medication_time"].isnull()])

216783

#### Problem again

In [653]:
# a little side exploration
# we should expect medication_name to be null when medication_time is null because there is no medication prescribed. But apparaently there are some cases where medication_name is not null but medication_time is null.
# check big query for example, and this is the line that causes the results:
"""    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.order_med` mo
        using(anon_id, pat_enc_csn_id_coded) # i added the pat_enc_csn_id_coded here to make sure we are only getting meds for the same encounter
    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm
    ON mo.med_description = mm.name # this is problematic """
find_unique_orders(current_med[~current_med["medication_name"].isnull()])
assert find_unique_orders(current_med[~current_med["medication_name"].isnull()]) == find_unique_orders(current_med[~current_med["medication_time"].isnull()]), "this is unexpected, there are some cases where medication_name is not null but medication_time is null"

AssertionError: this is unexpected, there are some cases where medication_name is not null but medication_time is null

In [None]:
# %%bigquery --use_rest_api current_med_original
# ##############################################################################################################################################################
# # Goal:Creating the microbiology_cultures_prior_med_augmented Table. This table indicates of a patient having been treated with a specific antibiotic before specimen collection.
# ##############################################################################################################################################################
# # CREATE OR REPLACE TABLE `som-nero-phi-jonc101.antimicrobial_stewardship.microbiology_cultures_prior_med_augmented` AS (
# WITH base_c AS (
#     SELECT DISTINCT
#         anon_id,
#         pat_enc_csn_id_coded,
#         order_proc_id_coded,
#         order_time_jittered_utc,
#         result_time_jittered_utc
#     FROM 
#         `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort`
# ),
# cleaned_medications AS (
#     SELECT 
#         c.anon_id,
#         c.pat_enc_csn_id_coded,
#         c.order_proc_id_coded,
#         c.order_time_jittered_utc,
#         c.result_time_jittered_utc,
#         mo.ordering_date_jittered_utc as medication_time,
#         mm.name AS medication_name,
#         # INITCAP(
#         #     REGEXP_REPLACE(
#         #         REGEXP_REPLACE(
#         #             TRIM(
#         #                 REGEXP_REPLACE(
#         #                     REGEXP_REPLACE(
#         #                         REGEXP_REPLACE(
#         #                             LOWER(mm.name),  -- Convert to lowercase
#         #                             '\\s*\\d+(\\.\\d+)?\\s*(mg|mcg|gram|ml|%)', ''  -- Remove dosages or concentrations
#         #                         ),
#         #                         '\\(.*?\\)', ''  -- Remove text in parentheses
#         #                     ),
#         #                     ' in.*$|tablet|capsule|intravenous|piggyback|' ||
#         #                     'solution|suspension|oral|sodium|chloride|' ||
#         #                     'injection|citrate|soln|dextrose|iv|' ||
#         #                     'macrocrystals|macrocrystal|axetil|potassium|packet|' ||
#         #                     'monohydrate|ethylsuccinate|powder|mandelate|' ||
#         #                     'hyclate|hcl|hippurate|tromethamine|' ||
#         #                     'million|unit|syrup|chewable|delayed|mphase|' ||
#         #                     'release|benzathine|syringe|dispersible|' ||
#         #                     'sulfate|procaine|blue|hyos|sod*phos|' ||
#         #                     'susp|and|fosamil|extended|succinate|granules|' ||
#         #                     'delay|pot|ext|rel|cyam|salicylate|salicyl|' ||
#         #                     'sodphos|methylene|stearate|synergy', ''  -- Remove pharmacy filler words and "synergy"
#         #                 )
#         #             ),
#         #             '\\d|\\sfor\\s*|\\ser\\s*|\\shr\\s*|/ml\\s*|' ||
#         #             '\\sml\\s*|\\sv\\s*|\\sg\\s*|\\sim\\s*', ''  -- General cleaning for non-relevant patterns
#         #         ),
#         #         '\\s|\\/|\\.|-$', ''  -- Remove extra characters like spaces, slashes, dots, etc.
#         #     )
#         # ) AS medication_name,
#         TIMESTAMP_DIFF(c.order_time_jittered_utc,mo.ordering_date_jittered_utc,day) as medication_time_to_cultureTime,       
#     FROM 
#         base_c c
#     LEFT JOIN 
#         `som-nero-phi-jonc101.shc_core_2023.order_med` mo
#         using(anon_id, pat_enc_csn_id_coded)
#     LEFT JOIN 
#         `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm
#     ON mo.med_description = mm.name
# )

# select * 
# from cleaned_medications 
# group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time,medication_time_to_cultureTime,medication_name
# order by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time_to_cultureTime,medication_time,medication_name

In [553]:
# %%bigquery --use_rest_api current_med_original_med_id
# ##############################################################################################################################################################
# # Goal:Creating the microbiology_cultures_prior_med_augmented Table. This table indicates of a patient having been treated with a specific antibiotic before specimen collection.
# ##############################################################################################################################################################
# # CREATE OR REPLACE TABLE `som-nero-phi-jonc101.antimicrobial_stewardship.microbiology_cultures_prior_med_augmented` AS (
# WITH base_c AS (
#     SELECT DISTINCT
#         anon_id,
#         pat_enc_csn_id_coded,
#         order_proc_id_coded,
#         order_time_jittered_utc,
#         result_time_jittered_utc
#     FROM 
#         `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort`
# ),
# cleaned_medications AS (
#     SELECT 
#         c.anon_id,
#         c.pat_enc_csn_id_coded,
#         c.order_proc_id_coded,
#         c.order_time_jittered_utc,
#         c.result_time_jittered_utc,
#         mo.ordering_date_jittered_utc as medication_time,
#         mm.name AS medication_name,
#         mm2.name AS medication_name_with_id,
#         # INITCAP(
#         #     REGEXP_REPLACE(
#         #         REGEXP_REPLACE(
#         #             TRIM(
#         #                 REGEXP_REPLACE(
#         #                     REGEXP_REPLACE(
#         #                         REGEXP_REPLACE(
#         #                             LOWER(mm.name),  -- Convert to lowercase
#         #                             '\\s*\\d+(\\.\\d+)?\\s*(mg|mcg|gram|ml|%)', ''  -- Remove dosages or concentrations
#         #                         ),
#         #                         '\\(.*?\\)', ''  -- Remove text in parentheses
#         #                     ),
#         #                     ' in.*$|tablet|capsule|intravenous|piggyback|' ||
#         #                     'solution|suspension|oral|sodium|chloride|' ||
#         #                     'injection|citrate|soln|dextrose|iv|' ||
#         #                     'macrocrystals|macrocrystal|axetil|potassium|packet|' ||
#         #                     'monohydrate|ethylsuccinate|powder|mandelate|' ||
#         #                     'hyclate|hcl|hippurate|tromethamine|' ||
#         #                     'million|unit|syrup|chewable|delayed|mphase|' ||
#         #                     'release|benzathine|syringe|dispersible|' ||
#         #                     'sulfate|procaine|blue|hyos|sod*phos|' ||
#         #                     'susp|and|fosamil|extended|succinate|granules|' ||
#         #                     'delay|pot|ext|rel|cyam|salicylate|salicyl|' ||
#         #                     'sodphos|methylene|stearate|synergy', ''  -- Remove pharmacy filler words and "synergy"
#         #                 )
#         #             ),
#         #             '\\d|\\sfor\\s*|\\ser\\s*|\\shr\\s*|/ml\\s*|' ||
#         #             '\\sml\\s*|\\sv\\s*|\\sg\\s*|\\sim\\s*', ''  -- General cleaning for non-relevant patterns
#         #         ),
#         #         '\\s|\\/|\\.|-$', ''  -- Remove extra characters like spaces, slashes, dots, etc.
#         #     )
#         # ) AS medication_name,
#         TIMESTAMP_DIFF(c.order_time_jittered_utc,mo.ordering_date_jittered_utc,day) as medication_time_to_cultureTime,       
#     FROM 
#         base_c c
#     LEFT JOIN 
#         `som-nero-phi-jonc101.shc_core_2023.order_med` mo
#         using(anon_id, pat_enc_csn_id_coded)
#     LEFT JOIN 
#         `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm
#     ON mo.med_description = mm.name
#     LEFT JOIN 
#         `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm2
#     ON mo.medication_id = mm2.medication_id
# )

# select * 
# from cleaned_medications 
# group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time,medication_time_to_cultureTime,medication_name,medication_name_with_id
# order by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time_to_cultureTime,medication_time,medication_name,medication_name_with_id

Query is running:   0%|          |

Downloading:   0%|          |

# Part 3

## Because the med mapping leaves out some medication name and the regex expression extraction messes up the medicaition name, let's use original med description and check for antibiotic drug only

In [561]:
%%bigquery --use_rest_api current_med_original_no_mapped
##############################################################################################################################################################
# Goal:Creating the microbiology_cultures_prior_med_augmented Table. This table indicates of a patient having been treated with a specific antibiotic before specimen collection.
##############################################################################################################################################################
# CREATE OR REPLACE TABLE `som-nero-phi-jonc101.antimicrobial_stewardship.microbiology_cultures_prior_med_augmented` AS (
WITH base_c AS (
    SELECT DISTINCT
        anon_id,
        pat_enc_csn_id_coded,
        order_proc_id_coded,
        order_time_jittered_utc,
        result_time_jittered_utc
    FROM 
        `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort`
),
cleaned_medications AS (
    SELECT 
        c.anon_id,
        c.pat_enc_csn_id_coded,
        c.order_proc_id_coded,
        c.order_time_jittered_utc,
        c.result_time_jittered_utc,
        mo.ordering_date_jittered_utc as medication_time,
        mo.med_description AS medication_name,
        # INITCAP(
        #     REGEXP_REPLACE(
        #         REGEXP_REPLACE(
        #             TRIM(
        #                 REGEXP_REPLACE(
        #                     REGEXP_REPLACE(
        #                         REGEXP_REPLACE(
        #                             LOWER(mm.name),  -- Convert to lowercase
        #                             '\\s*\\d+(\\.\\d+)?\\s*(mg|mcg|gram|ml|%)', ''  -- Remove dosages or concentrations
        #                         ),
        #                         '\\(.*?\\)', ''  -- Remove text in parentheses
        #                     ),
        #                     ' in.*$|tablet|capsule|intravenous|piggyback|' ||
        #                     'solution|suspension|oral|sodium|chloride|' ||
        #                     'injection|citrate|soln|dextrose|iv|' ||
        #                     'macrocrystals|macrocrystal|axetil|potassium|packet|' ||
        #                     'monohydrate|ethylsuccinate|powder|mandelate|' ||
        #                     'hyclate|hcl|hippurate|tromethamine|' ||
        #                     'million|unit|syrup|chewable|delayed|mphase|' ||
        #                     'release|benzathine|syringe|dispersible|' ||
        #                     'sulfate|procaine|blue|hyos|sod*phos|' ||
        #                     'susp|and|fosamil|extended|succinate|granules|' ||
        #                     'delay|pot|ext|rel|cyam|salicylate|salicyl|' ||
        #                     'sodphos|methylene|stearate|synergy', ''  -- Remove pharmacy filler words and "synergy"
        #                 )
        #             ),
        #             '\\d|\\sfor\\s*|\\ser\\s*|\\shr\\s*|/ml\\s*|' ||
        #             '\\sml\\s*|\\sv\\s*|\\sg\\s*|\\sim\\s*', ''  -- General cleaning for non-relevant patterns
        #         ),
        #         '\\s|\\/|\\.|-$', ''  -- Remove extra characters like spaces, slashes, dots, etc.
        #     )
        # ) AS medication_name,
        # TIMESTAMP_DIFF(c.order_time_jittered_utc,mo.ordering_date_jittered_utc,day) as medication_time_to_cultureTime,       
    FROM 
        base_c c
    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.order_med` mo
        using(anon_id, pat_enc_csn_id_coded)
)

select * 
from cleaned_medications 
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time,medication_name
order by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time,medication_name

Query is running:   0%|          |

Downloading:   0%|          |

In [9]:
%%bigquery --use_rest_api current_med_original_no_mapped
select * from `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.current_med_original_no_mapped`

Query is running:   0%|          |

Downloading:   0%|          |

## Confirm that the number of unique orders in the new table is the same as the original table

In [24]:
find_unique_orders(current_med_original_no_mapped)

423185

### Step 1: I added  clean_antibiotic column, no match means no antibiotic, None mean no antibiotic prescribed, the list contanis potential match

In [11]:
current_med_original_no_mapped["cleaned_antibiotic"] = current_med_original_no_mapped["medication_name"].apply(
    lambda x: find_antibiotics(x, antibiotic_list)
)

In [39]:
current_med_original_no_mapped

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
0,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-23 07:00:00+00:00,PHYTONADIONE (VITAMIN K) IVPB,No Match
1,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-24 07:00:00+00:00,SODIUM PHOSPHATE 10 MMOL IN 100 ML D5W IVPB (VC),No Match
2,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-25 07:00:00+00:00,SODIUM PHOSPHATE 5 MMOL IN 100 ML D5W IVPB (VC),No Match
3,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-27 07:00:00+00:00,DIGOXIN 250 MCG/ML (0.25 MG/ML) INJ SOLN,No Match
4,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-27 07:00:00+00:00,HYDRALAZINE 20 MG/ML INJ SOLN,No Match
...,...,...,...,...,...,...,...,...
7797124,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-16 07:00:00+00:00,FENTANYL CITRATE (PF) 50 MCG/ML INJ SOLN,No Match
7797125,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-21 07:00:00+00:00,HEPARIN IV INFUSION (MD TO SPECIFY PARAMETERS),No Match
7797126,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-28 07:00:00+00:00,"HEPARIN (PORCINE) 1,000 UNIT/ML INJ SOLN",No Match
7797127,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-28 07:00:00+00:00,MIDODRINE 5 MG PO TABS,No Match


### 423185 -> 216783, still conform to previous number of the unique order with any med includign non-antibiotic

In [40]:
find_unique_orders(current_med_original_no_mapped[(current_med_original_no_mapped["cleaned_antibiotic"].notnull())])

216783

### The number of unique order with antibiotic medication is 156314

In [26]:
current_med_original_no_mapped

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
0,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-23 07:00:00+00:00,PHYTONADIONE (VITAMIN K) IVPB,No Match
1,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-24 07:00:00+00:00,SODIUM PHOSPHATE 10 MMOL IN 100 ML D5W IVPB (VC),No Match
2,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-25 07:00:00+00:00,SODIUM PHOSPHATE 5 MMOL IN 100 ML D5W IVPB (VC),No Match
3,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-27 07:00:00+00:00,DIGOXIN 250 MCG/ML (0.25 MG/ML) INJ SOLN,No Match
4,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-27 07:00:00+00:00,HYDRALAZINE 20 MG/ML INJ SOLN,No Match
...,...,...,...,...,...,...,...,...
7797124,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-16 07:00:00+00:00,FENTANYL CITRATE (PF) 50 MCG/ML INJ SOLN,No Match
7797125,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-21 07:00:00+00:00,HEPARIN IV INFUSION (MD TO SPECIFY PARAMETERS),No Match
7797126,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-28 07:00:00+00:00,"HEPARIN (PORCINE) 1,000 UNIT/ML INJ SOLN",No Match
7797127,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-28 07:00:00+00:00,MIDODRINE 5 MG PO TABS,No Match


In [41]:
find_unique_orders([(~current_med_original_no_mapped["cleaned_antibiotic"].isnull()) & (current_med_original_no_mapped["cleaned_antibiotic"] != 'No Match')])current_med_original_no_mapped

156314

In [692]:
156314 - 111862

44452

In [676]:
216783 - 156314

60469

In [61]:
current_med_original_no_mapped[""]

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
0,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-23 07:00:00+00:00,PHYTONADIONE (VITAMIN K) IVPB,No Match
1,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-24 07:00:00+00:00,SODIUM PHOSPHATE 10 MMOL IN 100 ML D5W IVPB (VC),No Match
2,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-25 07:00:00+00:00,SODIUM PHOSPHATE 5 MMOL IN 100 ML D5W IVPB (VC),No Match
3,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-27 07:00:00+00:00,DIGOXIN 250 MCG/ML (0.25 MG/ML) INJ SOLN,No Match
4,JC1782205,131295201460,683773486,2020-10-23 10:16:00+00:00,2020-10-25 15:57:00+00:00,2020-10-27 07:00:00+00:00,HYDRALAZINE 20 MG/ML INJ SOLN,No Match
...,...,...,...,...,...,...,...,...
7797124,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-16 07:00:00+00:00,FENTANYL CITRATE (PF) 50 MCG/ML INJ SOLN,No Match
7797125,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-21 07:00:00+00:00,HEPARIN IV INFUSION (MD TO SPECIFY PARAMETERS),No Match
7797126,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-28 07:00:00+00:00,"HEPARIN (PORCINE) 1,000 UNIT/ML INJ SOLN",No Match
7797127,JC6241803,131327847353,785239703,2022-04-24 06:32:00+00:00,2022-04-26 14:45:00+00:00,2022-07-28 07:00:00+00:00,MIDODRINE 5 MG PO TABS,No Match


In [60]:
current_med_original_no_mapped.groupby(['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc'])["cleaned_antibiotic"].apply(lambda x: len(x) >1)

anon_id    pat_enc_csn_id_coded  order_proc_id_coded  order_time_jittered_utc  
JC1000010  15404221              325204620            2008-05-12 03:14:00+00:00    False
JC1000013  15174722              302914865            2008-04-11 23:45:00+00:00    False
JC1000021  32992898              351382857            2009-06-06 23:12:00+00:00    False
           131009159044          366795029            2010-05-29 23:22:00+00:00    False
JC1000022  15142402              323829550            2008-03-13 03:50:00+00:00    False
                                                                                   ...  
JC999925   131027103283          427190306            2013-09-16 18:42:00+00:00     True
           131107270857          471989931            2015-08-09 09:12:00+00:00     True
JC999935   14537303              312599852            2005-07-23 19:47:00+00:00    False
JC999992   131189547541          497201103            2016-06-06 03:57:00+00:00    False
JC999998   15725437           

In [80]:
antibiotic_order = current_med_original_no_mapped[(~current_med_original_no_mapped["cleaned_antibiotic"].isnull()) & (current_med_original_no_mapped["cleaned_antibiotic"] != 'No Match')]

In [65]:
find_unique_orders(antibiotic_order)

156314

In [92]:
# Count number of rows per group
group_counts = antibiotic_order.groupby(
    ['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc']
)['cleaned_antibiotic'].transform('count')

# Filter rows where group count is greater than 1
filtered_df = antibiotic_order[group_counts > 5]
sorted_df = filtered_df.sort_values(by=['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc'])

In [93]:
sorted_df

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
29215,JC1000129,131354606122,877003103,2023-04-24 01:28:00+00:00,2023-04-26 01:57:00+00:00,2023-04-23 07:00:00+00:00,VANCOMYCIN PER PHARMACY PROTOCOL (ED USE ONLY),[Vancomycin]
29219,JC1000129,131354606122,877003103,2023-04-24 01:28:00+00:00,2023-04-26 01:57:00+00:00,2023-04-24 07:00:00+00:00,MEROPENEM 1 GRAM/50 ML NS MINIBAG PLUS,[Meropenem]
2427438,JC1000129,131354606122,877003103,2023-04-24 01:28:00+00:00,2023-04-26 01:57:00+00:00,2023-04-23 07:00:00+00:00,CEFEPIME 2 GRAM/20 ML IV PUSH,[Cefepime]
2427441,JC1000129,131354606122,877003103,2023-04-24 01:28:00+00:00,2023-04-26 01:57:00+00:00,2023-04-27 07:00:00+00:00,ERTAPENEM IV PUSH,[Ertapenem]
3026702,JC1000129,131354606122,877003103,2023-04-24 01:28:00+00:00,2023-04-26 01:57:00+00:00,2023-04-23 07:00:00+00:00,VANCOMYCIN-DILUENT COMBO NO.1 1.25 GRAM/250 ML...,"[Vancomycin, Vancomycin-Diluent Combo]"
...,...,...,...,...,...,...,...,...
5336020,JC999842,131192863784,500208763,2016-07-16 14:51:00+00:00,2016-07-18 16:13:00+00:00,2016-07-20 07:00:00+00:00,VANCOMYCIN IVPB (CUSTOM DOSE),[Vancomycin]
5936077,JC999842,131192863784,500208763,2016-07-16 14:51:00+00:00,2016-07-18 16:13:00+00:00,2016-07-20 07:00:00+00:00,PIPERACILLIN-TAZOBACTAM-DEXTRS 3.375 GRAM/50 M...,"[Piperacillin-Tazobactam-Dextrs, Piperacillin-..."
6536187,JC999842,131192863784,500208763,2016-07-16 14:51:00+00:00,2016-07-18 16:13:00+00:00,2016-07-15 07:00:00+00:00,SULFAMETHOXAZOLE-TRIMETHOPRIM 800-160 MG PO TABS,"[Sulfamethoxazole-Trimethoprim, Trimethoprim]"
7735372,JC999842,131192863784,500208763,2016-07-16 14:51:00+00:00,2016-07-18 16:13:00+00:00,2016-07-18 07:00:00+00:00,AMOXICILLIN-POT CLAVULANATE 500-125 MG PO TABS,"[Amoxicillin-Pot Clavulanate, Amoxicillin]"


In [86]:
filtered_df.head(50)

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
28,JC1782224,131346827315,845694287,2022-12-31 00:31:00+00:00,2023-01-01 15:51:00+00:00,2022-12-30 08:00:00+00:00,AZITHROMYCIN 250 MG PO TABS,[Azithromycin]
34,JC1782258,131279449835,639673721,2019-11-11 03:54:00+00:00,2019-11-12 18:43:00+00:00,2019-11-10 08:00:00+00:00,CEPHALEXIN 500 MG PO CAPS,[Cephalexin]
55,JC1782343,131315438146,740526740,2021-07-14 09:13:00+00:00,2021-07-16 17:52:00+00:00,2021-07-15 07:00:00+00:00,AMOXICILLIN-POT CLAVULANATE 500-125 MG PO TABS,"[Amoxicillin-Pot Clavulanate, Amoxicillin]"
65,JC1782347,131256884695,577330817,2018-09-15 20:24:00+00:00,2018-09-17 16:29:00+00:00,2018-09-15 07:00:00+00:00,CIPROFLOXACIN HCL 250 MG PO TABS,"[Ciprofloxacin Hcl, Cipro, Ciprofloxacin, Oflo..."
70,JC1782395,131353567881,875896521,2023-05-16 20:54:00+00:00,2023-05-19 15:08:00+00:00,2023-05-04 07:00:00+00:00,VANCOMYCIN PER PHARMACY PROTOCOL TV,[Vancomycin]
79,JC1782408,131320539499,755957170,2021-11-25 04:34:00+00:00,2021-11-26 16:35:00+00:00,2021-11-24 08:00:00+00:00,CIPROFLOXACIN HCL 500 MG PO TABS,"[Ciprofloxacin Hcl, Cipro, Ciprofloxacin, Oflo..."
93,JC1782418,131275173055,627816574,2019-09-13 22:39:00+00:00,2019-09-15 15:09:00+00:00,2019-09-13 07:00:00+00:00,CEPHALEXIN 500 MG PO CAPS,[Cephalexin]
172,JC1782604,131360324027,898030538,2023-09-17 03:53:00+00:00,2023-09-19 16:38:00+00:00,2023-09-19 07:00:00+00:00,CIPROFLOXACIN HCL 250 MG PO TABS,"[Ciprofloxacin Hcl, Cipro, Ciprofloxacin, Oflo..."
183,JC1782659,131293108810,677684166,2020-08-13 15:37:00+00:00,2020-08-16 18:05:00+00:00,2020-08-13 07:00:00+00:00,PIPERACILLIN-TAZOBACTAM-DEXTRS 3.375 GRAM/50 M...,"[Piperacillin-Tazobactam-Dextrs, Piperacillin-..."
185,JC1782659,131293108810,677684166,2020-08-13 15:37:00+00:00,2020-08-16 18:05:00+00:00,2020-08-19 07:00:00+00:00,PIPERACILLIN-TAZOBACTAM-DEXTRS 3.375 GRAM/50 M...,"[Piperacillin-Tazobactam-Dextrs, Piperacillin-..."


In [66]:
find_unique_orders(antibiotic_order.drop_duplicates(subset=['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc'], keep='first'))

156314

In [52]:
find_unique_orders(current_med_original_no_mapped)

423185

In [42]:
ED_cur_med = ED_order.merge(current_med_original_no_mapped, 
                                      on =['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', "order_time_jittered_utc"] )
ED_cur_med

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,hosp_ward_IP,hosp_ward_OP,hosp_ward_ER,hosp_ward_ICU,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
0,JC1783941,131355397449,879930149,2023-06-02 20:17:00+00:00,0,1,1,0,2023-06-04 15:54:00+00:00,2023-06-02 07:00:00+00:00,NITROFURANTOIN MONOHYD/M-CRYST 100 MG PO CAPS,[Nitrofurantoin]
1,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1,2020-06-07 04:12:00+00:00,2020-05-31 07:00:00+00:00,BISACODYL 10 MG PR SUPP,No Match
2,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1,2020-06-07 04:12:00+00:00,2020-05-31 07:00:00+00:00,HYDROMORPHONE 1 MG/ML INJ SYRG,No Match
3,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1,2020-06-07 04:12:00+00:00,2020-05-31 07:00:00+00:00,SENNOSIDES 8.6 MG PO TABS,No Match
4,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1,2020-06-07 04:12:00+00:00,2020-05-31 07:00:00+00:00,VASOPRESSIN IV INFUSION,No Match
...,...,...,...,...,...,...,...,...,...,...,...,...
3685484,JC2248974,131286880465,659124622,2020-03-20 18:06:00+00:00,1,0,1,0,2020-03-22 23:41:00+00:00,2020-03-20 07:00:00+00:00,MELATONIN 3 MG PO TABS,No Match
3685485,JC2248974,131286880465,659124622,2020-03-20 18:06:00+00:00,1,0,1,0,2020-03-22 23:41:00+00:00,2020-03-20 07:00:00+00:00,POLYETHYLENE GLYCOL 3350 17 GRAM PO PWPK,No Match
3685486,JC2248974,131286880465,659124622,2020-03-20 18:06:00+00:00,1,0,1,0,2020-03-22 23:41:00+00:00,2020-03-21 07:00:00+00:00,IBUPROFEN 600 MG PO TABS,No Match
3685487,JC2248974,131286880465,659124622,2020-03-20 18:06:00+00:00,1,0,1,0,2020-03-22 23:41:00+00:00,2020-03-21 07:00:00+00:00,METOCLOPRAMIDE HCL 10 MG PO TABS,No Match


In [53]:
less_than_2015_cur_med = current_med_original_no_mapped[current_med_original_no_mapped["order_time_jittered_utc"] <  pd.to_datetime("2015").tz_localize("UTC")]


In [54]:
find_unique_orders(less_than_2015_cur_med)

122804

In [55]:
less_than_2015_cur_med[(~less_than_2015_cur_med["cleaned_antibiotic"].isnull()) & (less_than_2015_cur_med["cleaned_antibiotic"] != 'No Match')]

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
29210,JC1000083,131008798135,364762539,2010-03-29 21:10:00+00:00,2010-03-31 19:00:00+00:00,2010-03-30 07:00:00+00:00,TRIMETHOPRIM-SULFAMETHOXAZOLE 160-800 MG PO TABS,[Trimethoprim]
29312,JC1001034,131012582298,379655816,2011-02-03 12:13:00+00:00,2011-02-07 19:06:00+00:00,2011-03-03 08:00:00+00:00,VANCOMYCIN 125 MG PO CAPS,[Vancomycin]
29366,JC1001764,16880403,332221328,2008-10-22 02:58:00+00:00,2008-10-23 21:18:00+00:00,2008-11-02 07:00:00+00:00,METRONIDAZOLE IN NACL (ISO-OS) 500 MG/100 ML I...,"[Metronidazole In Nacl, Metronidazole]"
29374,JC1001821,33104680,352041530,2009-06-16 15:28:00+00:00,2009-06-18 14:54:00+00:00,2009-06-23 07:00:00+00:00,LINEZOLID 600 MG/300 ML IV SOLP,[Linezolid]
29423,JC1002167,131018946170,402348326,2012-06-17 17:36:00+00:00,2012-06-19 15:12:00+00:00,2012-06-03 07:00:00+00:00,ERYTHROMYCIN IVPB (CUSTOM DOSE),[Erythromycin]
...,...,...,...,...,...,...,...,...
7796610,JC623848,131008035713,361324764,2009-12-26 07:06:00+00:00,2009-12-28 14:57:00+00:00,2010-01-01 08:00:00+00:00,MOXIFLOXACIN 400 MG PO TABS,[Moxifloxacin]
7796805,JC623908,131017760585,398052070,2012-04-11 05:14:00+00:00,2012-04-12 23:41:00+00:00,2012-04-09 07:00:00+00:00,CIPROFLOXACIN IN D5W 400 MG/200 ML IV PGBK,"[Ciprofloxacin In, Cipro, Ciprofloxacin, Oflox..."
7796813,JC623916,131002174338,353629943,2009-06-27 00:35:00+00:00,2009-06-28 18:19:00+00:00,2009-06-26 07:00:00+00:00,TRIMETHOPRIM-SULFAMETHOXAZOLE 160-800 MG PO TABS,[Trimethoprim]
7796832,JC623930,131014781370,388108605,2011-08-01 22:23:00+00:00,2011-08-03 20:17:00+00:00,2011-08-01 07:00:00+00:00,DOXYCYCLINE HYCLATE 100 MG PO CAPS,[Doxycycline Hyclate]


In [56]:
find_unique_orders(less_than_2015_cur_med[(~less_than_2015_cur_med["cleaned_antibiotic"].isnull()) & (less_than_2015_cur_med["cleaned_antibiotic"] != 'No Match')])

45129

In [57]:
45129/122804

0.3674880297058728

In [43]:
find_unique_orders(ED_cur_med)

111862

In [688]:
find_unique_orders(ED_cur_med[(~ED_cur_med["cleaned_antibiotic"].isnull()) & (ED_cur_med["cleaned_antibiotic"] != 'No Match')])

79916

In [690]:
111862-79916

31946

## question: for example:this patient, should we count 3 times or one time of cefepime: JC3243957	131288047970	662720316	2020-06-04 

In [689]:
ED_cur_med[(~ED_cur_med["cleaned_antibiotic"].isnull()) & (ED_cur_med["cleaned_antibiotic"] != 'No Match')]

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,hosp_ward_IP,hosp_ward_OP,hosp_ward_ER,hosp_ward_ICU,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
4,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1,2020-06-07 04:12:00+00:00,2020-05-31 07:00:00+00:00,AZITHROMYCIN 250 MG PO TABS,[Azithromycin]
5,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1,2020-06-07 04:12:00+00:00,2020-05-31 07:00:00+00:00,AZITHROMYCIN 500 MG IN D5W 250 ML IVPB,[Azithromycin]
7,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1,2020-06-07 04:12:00+00:00,2020-05-31 07:00:00+00:00,CEFEPIME 1 GRAM/50 ML NS EXTENDED INFUSION MIN...,[Cefepime]
8,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1,2020-06-07 04:12:00+00:00,2020-05-31 07:00:00+00:00,CEFEPIME 2 GRAM/20 ML IV PUSH,[Cefepime]
9,JC3243957,131288047970,662720316,2020-06-04 22:01:00+00:00,0,1,1,1,2020-06-07 04:12:00+00:00,2020-05-31 07:00:00+00:00,CEFEPIME 2 GRAM/50 ML NS EXTENDED INFUSION MIN...,[Cefepime]
...,...,...,...,...,...,...,...,...,...,...,...,...
3685400,JC561653,131025361630,421988991,2013-07-08 23:32:00+00:00,1,0,1,0,2013-07-10 17:27:00+00:00,2013-07-08 07:00:00+00:00,CIPROFLOXACIN IN D5W 400 MG/200 ML IV PGBK,"[Ciprofloxacin In, Cipro, Ciprofloxacin, Oflox..."
3685408,JC561653,131025361630,421988991,2013-07-08 23:32:00+00:00,1,0,1,0,2013-07-10 17:27:00+00:00,2013-07-08 07:00:00+00:00,METRONIDAZOLE IN NACL (ISO-OS) 500 MG/100 ML I...,"[Metronidazole In Nacl, Metronidazole]"
3685412,JC561653,131025361630,421988991,2013-07-08 23:32:00+00:00,1,0,1,0,2013-07-10 17:27:00+00:00,2013-07-08 07:00:00+00:00,PIPERACILLIN-TAZOBACTAM-DEXTRS 3.375 GRAM/50 M...,"[Piperacillin-Tazobactam-Dextrs, Piperacillin-..."
3685413,JC561653,131025361630,421988991,2013-07-08 23:32:00+00:00,1,0,1,0,2013-07-10 17:27:00+00:00,2013-07-08 07:00:00+00:00,PIPERACILLIN-TAZOBACTAM-DEXTRS 4.5 GRAM/100 ML...,"[Piperacillin-Tazobactam-Dextrs, Piperacillin-..."


In [685]:
find_unique_orders(ED_cur_med[~ED_cur_med["medication_name"].isnull()])

108299

In [693]:
# Group by the specified columns
grouped = ED_cur_med.groupby(['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc'])

# Function to filter each group
def filter_group_exclusion(group):
    # Check if any medication_time_to_cultureTime is between 6 hours and 720 hours (30 days)
    # time_diff_hours = (group['medication_time'] - group['order_time_jittered_utc']).dt.total_seconds() / 3600  # Convert to hours
    # if any((time_diff_hours > 6) & (time_diff_hours < 720)):
    #     return None  # Disregard the whole group
   
        # Keep rows where:
        # 1. medication_time is greater than culture order time but smaller than result time, OR
        # 2. medication_time is within 6 hours before the culture order time
        condition = (
            ((group['medication_time'] > group['order_time_jittered_utc']) & 
                (group['medication_time'] < group['result_time_jittered_utc'])) | 
            ((group['medication_time'] >= (group['order_time_jittered_utc'] - pd.Timedelta(hours=6))) & 
                (group['medication_time'] <= group['order_time_jittered_utc'])
        ))
        return group[condition]

# Apply the filter to each group
filtered_groups_exclusion = [filter_group_exclusion(group) for _, group in grouped]

# Combine the filtered groups into a new DataFrame
filtered_df_exclusion = pd.concat([group for group in filtered_groups_exclusion if group is not None])

# Reset the index
filtered_df_exclusion = filtered_df_exclusion.reset_index(drop=True)
filtered_df_exclusion


Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,hosp_ward_IP,hosp_ward_OP,hosp_ward_ER,hosp_ward_ICU,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
0,JC1000083,131008798135,364762539,2010-03-29 21:10:00+00:00,1,0,1,0,2010-03-31 19:00:00+00:00,2010-03-30 07:00:00+00:00,TRIMETHOPRIM-SULFAMETHOXAZOLE 160-800 MG PO TABS,[Trimethoprim]
1,JC1000129,131354606122,877003103,2023-04-24 01:28:00+00:00,1,0,1,1,2023-04-26 01:57:00+00:00,2023-04-24 07:00:00+00:00,"ACETAMINOPHEN 1,000 MG/100 ML (10 MG/ML) IV SOLN",No Match
2,JC1000129,131354606122,877003103,2023-04-24 01:28:00+00:00,1,0,1,1,2023-04-26 01:57:00+00:00,2023-04-24 07:00:00+00:00,ASCORBIC ACID (VITAMIN C) 500 MG PO TABS,No Match
3,JC1000129,131354606122,877003103,2023-04-24 01:28:00+00:00,1,0,1,1,2023-04-26 01:57:00+00:00,2023-04-24 07:00:00+00:00,CALCIUM CHLORIDE 1 GRAM (13.6 MEQ)/50 ML NS IVPB,No Match
4,JC1000129,131354606122,877003103,2023-04-24 01:28:00+00:00,1,0,1,1,2023-04-26 01:57:00+00:00,2023-04-24 07:00:00+00:00,"CALCIUM GLUC IN NACL, ISO-OSM 1 GRAM/50 ML IV ...",No Match
...,...,...,...,...,...,...,...,...,...,...,...,...
968839,JC999925,131107270857,471989931,2015-08-09 09:12:00+00:00,1,0,1,0,2015-08-11 14:59:00+00:00,2015-08-10 07:00:00+00:00,IBUPROFEN 600 MG PO TABS,No Match
968840,JC999925,131107270857,471989931,2015-08-09 09:12:00+00:00,1,0,1,0,2015-08-11 14:59:00+00:00,2015-08-10 07:00:00+00:00,ONDANSETRON 4 MG PO TBDL,No Match
968841,JC999925,131107270857,471989931,2015-08-09 09:12:00+00:00,1,0,1,0,2015-08-11 14:59:00+00:00,2015-08-10 07:00:00+00:00,ONDANSETRON 8 MG PO TBDL,No Match
968842,JC999925,131107270857,471989931,2015-08-09 09:12:00+00:00,1,0,1,0,2015-08-11 14:59:00+00:00,2015-08-11 07:00:00+00:00,ALPRAZOLAM 0.25 MG PO TABS,No Match


In [694]:
find_unique_orders(filtered_df_exclusion[filtered_df_exclusion["cleaned_antibiotic"] != 'No Match'])

43414

In [None]:
sorted_df = filtered_df.sort_values(by=['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc'])

In [695]:
# Group by the specified columns
grouped = ED_cur_med.groupby(['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', 'order_time_jittered_utc'])

# Function to filter each group
def filter_group(group):
    # Check if any medication_time_to_cultureTime is between 6 hours and 720 hours (30 days)
    time_diff_hours = (group['medication_time'] - group['order_time_jittered_utc']).dt.total_seconds() / 3600  # Convert to hours
    if any((time_diff_hours > 6) & (time_diff_hours < 720)):
        return None  # Disregard the whole group
    else:
        # Keep rows where:
        # 1. medication_time is greater than culture order time but smaller than result time, OR
        # 2. medication_time is within 6 hours before the culture order time
        condition = (
            ((group['medication_time'] > group['order_time_jittered_utc']) & 
             (group['medication_time'] < group['result_time_jittered_utc'])) | 
            ((group['medication_time'] >= (group['order_time_jittered_utc'] - pd.Timedelta(hours=6))) & 
             (group['medication_time'] <= group['order_time_jittered_utc'])
        ))
        return group[condition]

# Apply the filter to each group
filtered_groups = [filter_group(group) for _, group in grouped]

# Combine the filtered groups into a new DataFrame
filtered_df = pd.concat([group for group in filtered_groups if group is not None])

# Reset the index
filtered_df = filtered_df.reset_index(drop=True)


question: one order could have multiple medication prescribed, how do we evaluate this order then?

In [698]:
filtered_df[filtered_df["cleaned_antibiotic"] != 'No Match']

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,hosp_ward_IP,hosp_ward_OP,hosp_ward_ER,hosp_ward_ICU,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
0,JC1000441,131191612156,499016548,2016-07-09 06:48:00+00:00,1,0,1,0,2016-07-11 16:17:00+00:00,2016-07-09 07:00:00+00:00,CEFTRIAXONE 1 GRAM INJ SOLR,[Ceftriaxone]
1,JC1000441,131191612156,499016548,2016-07-09 06:48:00+00:00,1,0,1,0,2016-07-11 16:17:00+00:00,2016-07-09 07:00:00+00:00,CEPHALEXIN 500 MG PO CAPS,[Cephalexin]
2,JC1000441,131191612156,499016548,2016-07-09 06:48:00+00:00,1,0,1,0,2016-07-11 16:17:00+00:00,2016-07-09 07:00:00+00:00,NITROFURANTOIN MONOHYD/M-CRYST 100 MG PO CAPS,[Nitrofurantoin]
7,JC1001429,131220834647,519979399,2017-02-14 09:10:00+00:00,1,0,1,0,2017-02-16 18:10:00+00:00,2017-02-14 08:00:00+00:00,SULFAMETHOXAZOLE-TRIMETHOPRIM 800-160 MG PO TABS,"[Sulfamethoxazole-Trimethoprim, Trimethoprim]"
39,JC1003289,131333657400,797227064,2022-07-03 02:29:00+00:00,1,0,1,0,2022-07-04 17:22:00+00:00,2022-07-03 07:00:00+00:00,NITROFURANTOIN MONOHYD/M-CRYST 100 MG PO CAPS,[Nitrofurantoin]
...,...,...,...,...,...,...,...,...,...,...,...,...
97216,JC997774,131359464813,894993996,2023-09-02 08:19:00+00:00,1,0,1,0,2023-09-04 16:44:00+00:00,2023-09-02 07:00:00+00:00,CEFTRIAXONE 1 GRAM/10 ML IV PUSH (VIAL ONLY),[Ceftriaxone]
97217,JC997774,131359464813,894993996,2023-09-02 08:19:00+00:00,1,0,1,0,2023-09-04 16:44:00+00:00,2023-09-02 07:00:00+00:00,CEPHALEXIN 500 MG PO CAPS,[Cephalexin]
97224,JC999190,131153320458,480462224,2015-12-19 09:12:00+00:00,1,0,1,0,2015-12-21 15:10:00+00:00,2015-12-19 08:00:00+00:00,CEPHALEXIN 250 MG PO CAPS,[Cephalexin]
97229,JC999190,131257518115,577313529,2018-10-08 12:58:00+00:00,1,0,1,0,2018-10-10 13:53:00+00:00,2018-10-08 07:00:00+00:00,CEPHALEXIN 500 MG PO CAPS,[Cephalexin]


In [697]:
find_unique_orders(filtered_df[filtered_df["cleaned_antibiotic"] != 'No Match'])

10119

In [686]:
# ED_culture_merged_cur_med_no_mapped = ED_culture.merge(current_med_original_no_mapped, 
#                                       on =['anon_id', 'pat_enc_csn_id_coded', 'order_proc_id_coded', "order_time_jittered_utc"] )
# ED_culture_merged_cur_med_no_mapped = ED_culture_merged_cur_med_no_mapped.drop(columns=['result_time_jittered_utc_y'])
# ED_culture_merged_cur_med_no_mapped = ED_culture_merged_cur_med_no_mapped.rename(columns={'result_time_jittered_utc_x': 'result_time_jittered_utc'})
# assert find_unique_orders(ED_culture_merged_cur_med_no_mapped) == len(ED_order) # this confirms that all ED cultures are included in the ED culture dataframe
# ED_culture_merged_cur_med_no_mapped

In [672]:
current_med_original_no_mapped[current_med_original_no_mapped["medication_name"].fillna("").str.contains("Gentamicin".upper()) & current_med_original_no_mapped["medication_name"].fillna("").str.contains("Cefazolin".upper())]

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc,medication_time,medication_name,cleaned_antibiotic
294140,JC1118369,131331875871,796488344,2022-06-29 01:16:00+00:00,2022-07-04 01:59:00+00:00,2022-06-28 07:00:00+00:00,NS W/CEFAZOLIN + GENTAMICIN MIXTURE (SHC OR),"[Cefazolin, Gentamicin]"
2230701,JC1554669,131025663099,423368506,2013-08-07 13:10:00+00:00,2013-08-09 14:20:00+00:00,2013-07-29 07:00:00+00:00,NS W/CEFAZOLIN + GENTAMICIN MIXTURE (SHC OR),"[Cefazolin, Gentamicin]"
2318198,JC1573928,131358158974,890540955,2023-06-24 04:24:00+00:00,2023-06-26 21:42:00+00:00,2023-06-27 07:00:00+00:00,NS W/ CEFAZOLIN + GENTAMICIN + VANCOMYCIN (SHC...,"[Cefazolin, Vancomycin, Gentamicin]"
4137093,JC2434525,131343876874,831586542,2022-10-24 05:31:00+00:00,2022-10-26 17:10:00+00:00,2022-10-24 07:00:00+00:00,NS W/CEFAZOLIN + GENTAMICIN MIXTURE (SHC OR),"[Cefazolin, Gentamicin]"
4137944,JC2434525,131343876874,850889861,2022-12-29 00:32:00+00:00,2022-12-31 01:18:00+00:00,2022-10-24 07:00:00+00:00,NS W/CEFAZOLIN + GENTAMICIN MIXTURE (SHC OR),"[Cefazolin, Gentamicin]"
4837337,JC2949644,131363299802,912585759,2023-10-05 03:10:00+00:00,2023-10-11 18:52:00+00:00,2023-10-15 07:00:00+00:00,NS W/ CEFAZOLIN + GENTAMICIN + VANCOMYCIN (SHC...,"[Cefazolin, Vancomycin, Gentamicin]"
6686710,JC6488197,131358295097,890580901,2023-06-24 10:04:00+00:00,2023-06-26 23:30:00+00:00,2023-06-27 07:00:00+00:00,NS W/ CEFAZOLIN + GENTAMICIN + VANCOMYCIN (SHC...,"[Cefazolin, Vancomycin, Gentamicin]"
6756494,JC6527468,131361542246,908600474,2023-09-19 01:35:00+00:00,2023-09-21 00:30:00+00:00,2023-11-26 08:00:00+00:00,NS W/ CEFAZOLIN + GENTAMICIN + VANCOMYCIN (SHC...,"[Cefazolin, Vancomycin, Gentamicin]"
6756535,JC6527468,131361542246,908600474,2023-09-19 01:35:00+00:00,2023-09-21 00:30:00+00:00,2023-11-30 08:00:00+00:00,NS W/ CEFAZOLIN + GENTAMICIN + VANCOMYCIN (SHC...,"[Cefazolin, Vancomycin, Gentamicin]"
6757239,JC6527468,131361542246,912635672,2023-10-05 13:02:00+00:00,2023-10-08 19:29:00+00:00,2023-11-26 08:00:00+00:00,NS W/ CEFAZOLIN + GENTAMICIN + VANCOMYCIN (SHC...,"[Cefazolin, Vancomycin, Gentamicin]"


In [659]:
current_med_original_no_mapped["cleaned_antibiotic"].value_counts()

cleaned_antibiotic
No Match                                                     6957617
[Vancomycin]                                                   87117
[Ceftriaxone]                                                  53470
[Piperacillin-Tazobactam-Dextrs, Piperacillin-Tazobactam]      41046
[Cefepime]                                                     33182
                                                              ...   
[Hiprex]                                                          35
[Zyvox]                                                           20
[Cefazolin, Vancomycin, Gentamicin]                               10
[Cefazolin, Gentamicin]                                            4
[Levofloxacin, Vancomycin, Ofloxacin]                              1
Name: count, Length: 94, dtype: int64

In [528]:
for i in current_med_original["medication_name"].unique():
    if i is not None:
        if "Nitrofurantoin".lower() in i.lower():
            print(i)

NITROFURANTOIN MONOHYD/M-CRYST 100 MG PO CAPS
NITROFURANTOIN MACROCRYSTAL 50 MG PO CAPS
NITROFURANTOIN MACROCRYSTAL 100 MG PO CAPS
NITROFURANTOIN 25 MG/5 ML PO SUSP
NITROFURANTOIN 100 MG PO CAPS
NITROFURANTOIN 50 MG PO CAPS
NITROFURANTOIN MACROCRYSTAL 25 MG PO CAPS


In [701]:
%%bigquery --use_rest_api past_med
##############################################################################################################################################################
# Goal:Creating the microbiology_cultures_prior_med_augmented Table. This table indicates of a patient having been treated with a specific antibiotic before specimen collection.
##############################################################################################################################################################
# CREATE OR REPLACE TABLE `som-nero-phi-jonc101.antimicrobial_stewardship.microbiology_cultures_prior_med_augmented` AS (
WITH base_c AS (
    SELECT DISTINCT
        anon_id,
        pat_enc_csn_id_coded,
        order_proc_id_coded,
        order_time_jittered_utc,
        result_time_jittered_utc
    FROM 
        `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.microbiology_urine_cultures_cohort`
),
cleaned_medications AS (
    SELECT 
        c.anon_id,
        c.pat_enc_csn_id_coded,
        c.order_proc_id_coded,
        c.order_time_jittered_utc,
        c.result_time_jittered_utc,
        mo.ordering_date_jittered_utc as medication_time,
        mo.med_description AS medication_name,
        mo.pat_enc_csn_id_coded AS medication_encounter_id,
        # INITCAP(
        #     REGEXP_REPLACE(
        #         REGEXP_REPLACE(
        #             TRIM(
        #                 REGEXP_REPLACE(
        #                     REGEXP_REPLACE(
        #                         REGEXP_REPLACE(
        #                             LOWER(mm.name),  -- Convert to lowercase
        #                             '\\s*\\d+(\\.\\d+)?\\s*(mg|mcg|gram|ml|%)', ''  -- Remove dosages or concentrations
        #                         ),
        #                         '\\(.*?\\)', ''  -- Remove text in parentheses
        #                     ),
        #                     ' in.*$|tablet|capsule|intravenous|piggyback|' ||
        #                     'solution|suspension|oral|sodium|chloride|' ||
        #                     'injection|citrate|soln|dextrose|iv|' ||
        #                     'macrocrystals|macrocrystal|axetil|potassium|packet|' ||
        #                     'monohydrate|ethylsuccinate|powder|mandelate|' ||
        #                     'hyclate|hcl|hippurate|tromethamine|' ||
        #                     'million|unit|syrup|chewable|delayed|mphase|' ||
        #                     'release|benzathine|syringe|dispersible|' ||
        #                     'sulfate|procaine|blue|hyos|sod*phos|' ||
        #                     'susp|and|fosamil|extended|succinate|granules|' ||
        #                     'delay|pot|ext|rel|cyam|salicylate|salicyl|' ||
        #                     'sodphos|methylene|stearate|synergy', ''  -- Remove pharmacy filler words and "synergy"
        #                 )
        #             ),
        #             '\\d|\\sfor\\s*|\\ser\\s*|\\shr\\s*|/ml\\s*|' ||
        #             '\\sml\\s*|\\sv\\s*|\\sg\\s*|\\sim\\s*', ''  -- General cleaning for non-relevant patterns
        #         ),
        #         '\\s|\\/|\\.|-$', ''  -- Remove extra characters like spaces, slashes, dots, etc.
        #     )
        # ) AS medication_name,
        # TIMESTAMP_DIFF(c.order_time_jittered_utc,mo.ordering_date_jittered_utc,day) as medication_time_to_cultureTime,       
    FROM 
        base_c c
    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.order_med` mo
        using(anon_id)
)

select * 
from cleaned_medications 
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time,medication_name,medication_encounter_id
order by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,order_time_jittered_utc,result_time_jittered_utc, medication_time,medication_name,medication_encounter_id

Query is running:   0%|          |

Downloading:   0%|          |

423185 -> 216783 -->

In [10]:
%%bigquery --use_rest_api past_med
SELECT * FROM `som-nero-phi-jonc101.antimicrobial_stewardship_sandy_refactor.all_med` where ARRAY_LENGTH(cleaned_antibiotic) > 0

Query is running:   0%|          |

Downloading:   0%|          |

In [21]:
79916 - 43414

36502

In [23]:
43414 - 13760

29654

In [36]:
13760/423185

0.03251533017474627

In [None]:
# 2020 -2023 ED
7204 urine cultures 

In [19]:
43414 -16392

27022