##### Project: Opioid Exposed Infant Covariates
##### Investigator: Stephen Patrick, Sarah Loch
##### Programmers: Sander Su, Chris Guardo
##### Date Created: 01/17/23
##### Last Modified: 09/30/25

#### Notes:
Using the Drug Related codes from the maternal health record, this notebook filters for evidence of present/positive/non-zero laboratory toxicology results indicative of illicit substance use recorded from 30-days prior to delivery up through and including the birth hospitalization. 


In [0]:
%run "../Project_modules"

##### Maternal illicit substance use:
##### use values from 'Sheet 12 mat illicit toxicology' in table 'measurement'
##### cohort: the numbers in popuation 40k(All moms of mom-baby pairs after 2010 and passed the exclusion criteria)
##### lab value: positive, present or reactive
##### time window updated: 30 days before birth date through end of birth hospitalization stay

In [0]:
phenotype_table_location = " ***Insert file location*** "
phenotype_table=spark.sql(f"SELECT * FROM {phenotype_table_location}")
phenotype_cohort=get_phenotype_cohort(phenotype_table)
phenotype_cohort.createOrReplaceTempView("phenotype_cohort")

sheet_name="phenotyping.mprint_sheet_12_mat_illicit_toxicology" ### combine main and extra list into one sheet

##### mom_baby_step1_matopioidtoxicology_main

In [0]:
def search_str_df(col_name):
    df1=spark.sql(f"select * from global_temp.mom_meas_data")
    df2=spark.sql(f"""select lower({col_name}) as search_term, drug_type_grouping, '{col_name}' as search_term_type , source 
                  from {sheet_name} where {col_name} is not null""")
    merged_df = df1.join(F.broadcast(df2), df1.measurement_source_value.contains(df2["search_term"]), "inner")
    result_df=merged_df.filter("(value_source_value like '%positive%' or value_source_value like '%present%' or value_source_value like '%reactive%') or value_as_number is not null")
  
    return result_df

In [0]:
sql=f"""
          select * from {meas_table} where person_id in 
          (select mom_person_id from global_temp.mom_baby_step1_2010_mombabypair);
     """
mom_meas_df=spark.sql(sql)
mom_meas_df = mom_meas_df.withColumn("measurement_source_value", trim(mom_meas_df.measurement_source_value))
mom_meas_df = mom_meas_df.withColumn("measurement_source_value",F.lower(F.col("measurement_source_value")))
mom_meas_df = mom_meas_df.withColumn("value_source_value",F.lower(F.col("value_source_value")))
mom_meas_df.name="mom_meas_data"
register_parquet_global_view(mom_meas_df)

short_name_df=search_str_df('short_name')
long_name_df=search_str_df('long_name')
meas_df = short_name_df.union(long_name_df).distinct()
meas_df.name="illicit_substance_meas_df"
register_parquet_global_view(meas_df)
 
sql="select a.*,b.* from global_temp.illicit_substance_meas_df as a, global_temp.mom_baby_step1_2010_mombabypair as b where a.person_id = b.mom_person_id;"

mom_baby_step1_2010_mombabypair_illicit_substance  = spark.sql(sql)
mom_baby_step1_2010_mombabypair_illicit_substance.name='mom_baby_step1_2010_mombabypair_illicit_substance'
register_parquet_global_view(mom_baby_step1_2010_mombabypair_illicit_substance)

##### Validation

In [0]:
df_inspection("global_temp.mom_baby_step1_2010_mombabypair_illicit_substance","all")

##### Remove unwant item

In [0]:

sql="""
     select * from global_temp.mom_baby_step1_2010_mombabypair_illicit_substance
     where measurement_source_value not like '%streptococcus%';   
     --remove 'GROUP B STREPTOCOCCUS VAGINAL CULTURE - EXTERNAL MANUAL' from the result list
    """

mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all = spark.sql(sql)
mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all.name="mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all"
register_parquet_global_view(mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all)

In [0]:
df_inspection("global_temp.mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all","all")


##### time window updated: 30 days before birth date through end of birth hospitalization stay

In [0]:
sql="""
     select * from global_temp.mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all a
     inner join global_temp.mom_baby_step1_baby1stvisit_all b
     using (baby_person_id)
     where measurement_date >= date_sub(baby_birth_datetime, 30) and measurement_date <= first_visit_end_date;
    """

mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all_30bh = spark.sql(sql)
mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all_30bh.createOrReplaceTempView("mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all_30bh")

In [0]:
df_inspection("mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all_30bh","all")


##### Cohort:
##### Calculate the numbers in popuation 40k (All moms of mom-baby pairs after 2010 and passed the exclusion criteria)
##### population: gestational_age_w33_or_uncertain = 1 and live_birth_code=1 and critical_illness_4cpt = 0 and respiratory_procedure_code = 0 and fetal_anomalies_code =0

In [0]:
sql="""
     select a.mom_person_id,a.baby_person_id,a.mom_person_source_value as MOM_MRN,MEASUREMENT_ID,MEASUREMENT_DATE,MEASUREMENT_DATETIME,
     VALUE_AS_NUMBER,RANGE_LOW,RANGE_HIGH,VISIT_OCCURRENCE_ID,MEASUREMENT_SOURCE_VALUE,UNIT_SOURCE_VALUE,VALUE_SOURCE_VALUE,
     drug_type_grouping, search_term,search_term_type,first_visit_start_date,
     first_visit_end_date,cohort.baby_person_source_value as BABY_MRN,cohort.BABY_BIRTH_DATETIME,cohort.BABY_GENDER,
     cohort.BABY_RACE,LIVE_BIRTH_CODE,PREGNANCY_CODE,GESTATIONAL_AGE_W33_OR_UNCERTAIN,CRITICAL_ILLNESS_4CPT,
     RESPIRATORY_PROCEDURE_CODE,FETAL_ANOMALIES_CODE,GESTATIONAL_AGE_UNCERTAIN,NOWS_BABY_CODE,INFANT_TOX_LAB,
     MOM_OUD,MOM_OUD_INPATIENT,MOM_OUD_OUTPATIENT,MOM_DRUG,MOM_DRUG_IN_NOTE,mom_opioid_tox,cohort.BABY_1ST_VISIT_PROBLEM
     from mom_baby_step1_2010_mombabypair_mom_illicitsubstance_all_30bh a

     inner join (select * from phenotype_cohort) cohort
     on a.mom_person_id = cohort.mom_person_id and 
     a.baby_person_id = cohort.baby_person_id
     order by mom_person_id,baby_person_id
    """

mom_illicitsubstance_cohort = spark.sql(sql).distinct()
mom_illicitsubstance_cohort.name="mom_illicitsubstance_cohort"
register_parquet_global_view(mom_illicitsubstance_cohort)

In [0]:
df_inspection("global_temp.mom_illicitsubstance_cohort","all")


### Save Output for future use

In [0]:
mom_illicitsubstance_cohort.write.mode("overwrite").saveAsTable(f"covariate_output.mom_illicitsubstance_cohort")