##### Project: Opioid Exposed Infant Covariates
##### Investigator: Stephen Patrick, Sarah Loch
##### Programmers: Sander Su, Chris Guardo
##### Date Created: 01/17/23
##### Last Modified: 09/30/25

#### Notes: 
Using the Drug Related codes from the maternal health record, this notebook filters for the use of opioids in the patient’s medical record 


In [0]:
%run "../Project_modules"

- Time window: 90 days to 2 days prior to birth OR (ETOC date through 1 year post-partum - 03/09/23)
- cohort: phenotype cohort (41713) or whole phenotype cohort(56777) from phenotype V2

##### Mom Non-opioid-drug
- non-opioid drug list: Sheet 9 - non opioid drugs

##### Mom Opioid-Drug
- Opioid drug list: Sheet 7 - Opioid Search Terms

##### The difference of MOM OPIOID-DRUG between covar and phenotype (step 4): 
- cohort: opioid exposed mom vs. (whole) phenotype cohort from phenotype V2
- op drug exposure vs. all records
- 90 days to 2 days prior to birth or ETOC date through 1 year post-partum (for phenotype: 30 days to 2 days prior to birth)

In [0]:
%sql
--- Default bin size for range join optimization for all datetime comparision
SET spark.databricks.optimizer.rangeJoin.binSize=90

In [0]:
phenotype_table_location = " ***Insert file location*** "
phenotype_table=spark.sql(f"SELECT * FROM {phenotype_table_location}")

In [0]:
dbutils.widgets.dropdown("study_period", "ETOC_to_1yr_postpartum", ["90_days_to_2_days_prior_birth", "ETOC_to_1yr_postpartum"],"Study period")
study_period=dbutils.widgets.get("study_period")

# non_opioid_table="phenotyping.mprint_sheet_9_non_opioid_drug"
opioid_table="phenotyping.mprint_mom_drug_searchterm_v2"

if (study_period=="90_days_to_2_days_prior_birth"):
   phenotype_cohort = get_phenotype_cohort(phenotype_table) ###original cohort with criteria (41713)
elif (study_period=="ETOC_to_1yr_postpartum"):
   phenotype_cohort = get_whole_phenotype_cohort(phenotype_table) ###Andrew requested to check whole phenotype cohort (56777)

phenotype_cohort.createOrReplaceTempView("phenotype_cohort")

##### Sheet9: non_opioid drug list


In [0]:
#can be removed later
sql=f"select drug_exposure_id,person_id,drug_exposure_start_date,drug_exposure_end_date, lower(trim(drug_source_value)) as drug_source_value,drug_source_concept_id from {drug_exp_table} where person_id in (select mom_person_id from global_temp.mom_baby_step1_2010_mombabypair)"
drugexp_mombabypair_df=spark.sql(sql)

drugexp_mombabypair_df.name='drug_exp_mom_df'
register_parquet_global_view(drugexp_mombabypair_df)

In [0]:
sql="""
       select lower(drug_name) as search_term,type_code from (
       
          select drug_name,type_code from phenotyping.mprint_non_opioid_drug_name
          union
          select generic as drug_name,type_code from phenotyping.mprint_non_opioid_generic
          union
          select trade as drug_name,type_code from phenotyping.mprint_non_opioid_trade
       ) as drugs
      
    """
drug_name_list=spark.sql(sql)
drug_name_list.createOrReplaceTempView("drug_name_list")

In [0]:
df1=spark.sql(f"select * from global_temp.drug_exp_mom_df")
df2=spark.sql(f"select search_term,type_code from drug_name_list")
merged_df = df1.join(F.broadcast(df2), df1.drug_source_value.contains(df2["search_term"]), "inner")
result=merged_df.filter("drug_source_value not like '%orapred%'")

result.name='mprint_mom_non_opioid_drug'
register_parquet_global_view(result)

##### Validation

In [0]:
sql="""
    select count(*) as total, count(distinct person_id) as unique_mom from global_temp.mprint_mom_non_opioid_drug;
    """
insp_df = spark.sql(sql)
insp_df.display()

##### Time window: 90 days to 2 days prior to birth

In [0]:
def mom_drug_dob90days(drug_df_name,time_win_lower,time_win_upper):
  
    sql=f"""
       select * from {drug_df_name} a
       left join (select mom_person_id, baby_person_id, baby_birth_datetime 
       from global_temp.mom_baby_step1_2010_mombabypair) b
       on a.person_id = b.mom_person_id
       where 
       date_sub(baby_birth_datetime, {time_win_upper}) <= drug_exposure_start_date 
       and 
       drug_exposure_start_date <  date_sub(baby_birth_datetime, {time_win_lower});
    """ 
    
    df= spark.sql(sql)
    return df
    

##### Time window: ETOC date through 1 year post-partum

In [0]:
def mom_drug_etoc_1yr_postpartum(drug_df_name):
  
    sql=f"""
       select * from {drug_df_name} a
       left join (select mom_person_id, baby_person_id, start_gestation_date as ETOC, baby_dob as baby_birth_datetime
       from global_temp.ega_w33_or_uncertain_gestation_date) b
       on a.person_id = b.mom_person_id
       where 
       ETOC <= drug_exposure_start_date 
       and 
       drug_exposure_start_date <  date_add(baby_birth_datetime, 365);
    """ 
    
    df= spark.sql(sql)
    return df

In [0]:
if (study_period=="90_days_to_2_days_prior_birth"):
    mom_non_opioid_drug_main=mom_drug_dob90days("global_temp.mprint_mom_non_opioid_drug","2","90")
elif (study_period=="ETOC_to_1yr_postpartum"):
    mom_non_opioid_drug_main=mom_drug_etoc_1yr_postpartum("global_temp.mprint_mom_non_opioid_drug")
    
mom_non_opioid_drug_main.createOrReplaceTempView("mom_non_opioid_drug_main")

#### Validation

In [0]:
df_inspection("mom_non_opioid_drug_main","all")

##### Add x_drug_exposure info (including doc_type)

In [0]:
def drug_extra_table_addition(drug_df_name):
  
   sql=f"""
        select * from {drug_df_name} a
        inner join {drug_exp_table_extra} b
        using (person_id,drug_exposure_id);
      """
   
   df= spark.sql(sql)
   return df

In [0]:
mom_non_opioid_drug_all=drug_extra_table_addition("mom_non_opioid_drug_main")
mom_non_opioid_drug_all.createOrReplaceTempView("mom_non_opioid_drug_all")

##### Validation

In [0]:
#df_inspection("mom_non_opioid_drug_all","all")


##### cohort: all mom in mom_baby_2010_mombabypair_all_version1_11042022_05232022 (v2)

In [0]:
def get_cohort(drug_df_name):
   
   sql=f"""
       select distinct *
       from {drug_df_name} 
       where (mom_person_id,baby_person_id) in
       (select mom_person_id,baby_person_id from phenotype_cohort);
       """
   df= spark.sql(sql)
   return df

In [0]:
mom_non_opioid_drug_cohort= get_cohort("mom_non_opioid_drug_all")
mom_non_opioid_drug_cohort.createOrReplaceTempView("mom_non_opioid_drug_cohort")

##### Create results table for review

In [0]:
sql="""
      select b.mom_person_source_value as mom_mrn,b.baby_person_source_value as baby_mrn,* 
      from mom_non_opioid_drug_cohort a left join
      (select * from phenotype_cohort) b
      using(mom_person_id,baby_person_id,baby_birth_datetime);

   """
mom_non_opioid_drug_covar_data= spark.sql(sql).drop("mom_person_id","baby_person_id","person_id","mom_person_source_value","baby_person_source_value")
mom_non_opioid_drug_covar_data=change_colname_case(mom_non_opioid_drug_covar_data,'upper')
mom_non_opioid_drug_covar_data.name='mom_non_opioid_drug_covar_data'
register_parquet_global_view(mom_non_opioid_drug_covar_data)

In [0]:
%sql
select * from global_temp.mom_non_opioid_drug_covar_data;

##### Validation

In [0]:
%sql
select count(*) AS total, count(distinct mom_mrn) AS unique_mom, count(distinct baby_mrn) AS unique_baby from global_temp.mom_non_opioid_drug_covar_data;


##### Output for review

In [0]:
spark.sql("select * from global_temp.mom_non_opioid_drug_covar_data").write.mode("overwrite").saveAsTable(f"phenotype_output.mom_non_opioid_drug_covar_data_{study_period}")

##### Search the OPIOID drug list in table 'DRUG_EXPOSURE'
- NOTE: We used drug_mom cohort from step4_momdrug.JN

##### Time window: 90 days to 2 days prior to birth or ETOC date through 1 year post-partum

In [0]:
if (study_period=="90_days_to_2_days_prior_birth"):
    mom_opioid_drug_main=mom_drug_dob90days("global_temp.mom_drug_info","2","90")
elif (study_period=="ETOC_to_1yr_postpartum"):
    mom_opioid_drug_main=mom_drug_etoc_1yr_postpartum("global_temp.mom_drug_info")
    
mom_opioid_drug_main.createOrReplaceTempView("mom_opioid_drug_main")

In [0]:
mom_opioid_drug_all=drug_extra_table_addition("mom_opioid_drug_main")
mom_opioid_drug_all.createOrReplaceTempView("mom_opioid_drug_all")

In [0]:
mom_opioid_drug_cohort= get_cohort("mom_opioid_drug_all")
mom_opioid_drug_cohort.createOrReplaceTempView("mom_opioid_drug_cohort")

##### Create result for review

In [0]:
sql="""
    select b.mom_person_source_value as mom_mrn,b.baby_person_source_value as baby_mrn,*,'OPIOIDS' as TYPE_CODE 
    from mom_opioid_drug_cohort a left join
    (select * from phenotype_cohort) b
    using(mom_person_id,baby_person_id,baby_birth_datetime);
   """
mom_opioid_drug_covar_data= spark.sql(sql).drop("mom_person_id","baby_person_id","person_id","mom_person_source_value","baby_person_source_value")
mom_opioid_drug_covar_data=mom_opioid_drug_covar_data.withColumnRenamed("generic","search_term")
mom_opioid_drug_covar_data=change_colname_case(mom_opioid_drug_covar_data,'upper')
mom_opioid_drug_covar_data.name='mom_opioid_drug_covar_data'
register_parquet_global_view(mom_opioid_drug_covar_data)

In [0]:
%sql
select * from global_temp.mom_opioid_drug_covar_data;

In [0]:
%sql
select count(*) AS total, count(distinct mom_mrn) AS unique_mom, count(distinct baby_mrn) AS unique_baby from global_temp.mom_opioid_drug_covar_data;


### Save Output for future use

In [0]:
spark.sql("select * from global_temp.mom_opioid_drug_covar_data").write.mode("overwrite").saveAsTable(f"covariate_output..mom_opioid_drug_covar_data_{study_period}")