_Author_ = "Sevda Molani"

_copyright_ = "2022 Sevda Molani"

_License_ = "Institute for Systems Biology"

_Version_ = "1.0"

In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *
import numpy as np
import pandas as pd
pd.set_option('max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [0]:
df = spark.sql("""SELECT 
pat_id,
instance,
pat_enc_csn_id,
contact_date,
admissiondatetime,
dischargedatetime,
orderingdatetime,
observationdatetime,
result_short,
onehourbase 
FROM 
rdp_phi_sandbox.sm_positive_delta_results_feb""")

df.createOrReplaceTempView('Baseline')

In [0]:
%sql DROP VIEW IF EXISTS rdp_phi_sandbox.SM_Vasopressors;
CREATE VIEW rdp_phi_sandbox.SM_Vasopressors AS
SELECT
  medicationrxnorm.medication_id,
  medicationrxnorm.instance,
  medicationrxnorm.rxnormcode
FROM
  rdp_phi.medicationrxnorm
WHERE
  medicationrxnorm.rxnormcode in ('3616','3628','3966','3992','6963','7512','8163','11149')
AND 
  medicationrxnorm.instance = 1000
GROUP BY
  medicationrxnorm.medication_id,
  medicationrxnorm.instance,
  medicationrxnorm.RXNORMCODE

In [0]:
%sql 
DROP VIEW IF EXISTS rdp_phi_sandbox.SM_patt_med;
CREATE VIEW rdp_phi_sandbox.SM_patt_med AS
SELECT
  CONCAT(medicationorders.instance, medicationorders.pat_id) as patient_id,
  medicationorders.pat_id,
  medicationorders.instance,
  medicationorders.pat_enc_csn_id,
  medicationorders.orderingdatetime,
  medicationorders.medication_id,
  medicationorders.start_date, 
  medicationorders.end_date
FROM
  rdp_phi.medicationorders
WHERE
  medicationorders.instance = 1000
GROUP BY
  medicationorders.pat_id,
  medicationorders.instance,
  medicationorders.pat_enc_csn_id,
  medicationorders.orderingdatetime,
  medicationorders.medication_id,
  medicationorders.start_date, 
  medicationorders.end_date

In [0]:
###Patients you have used either PPI or AB

Vasso_table = spark.sql("""SELECT
  Baseline.pat_id,
  Baseline.pat_enc_csn_id,
  Baseline.instance,
  Baseline.onehourbase,
  SM_Vasopressors.rxnormcode AS Vasso 
  FROM Baseline
  INNER JOIN rdp_phi_sandbox.SM_patt_med ON SM_patt_med.pat_id = Baseline.pat_id AND SM_patt_med.instance = Baseline.instance AND SM_patt_med.pat_enc_csn_id = Baseline.pat_enc_csn_id
  INNER JOIN rdp_phi_sandbox.SM_Vasopressors ON SM_Vasopressors.medication_id = SM_patt_med.medication_id
  WHERE SM_patt_med.orderingdatetime >= Baseline.onehourbase AND SM_patt_med.orderingdatetime <= Baseline.onehourbase + Interval '1' hour
  GROUP BY 
  Baseline.pat_id,
  Baseline.pat_enc_csn_id,
  Baseline.instance,
  Baseline.onehourbase,
  rxnormcode""")

Vasso_table = Vasso_table.dropDuplicates()

In [0]:
%sql DROP VIEW IF EXISTS rdp_phi_sandbox.SM_Immunization;
CREATE VIEW rdp_phi_sandbox.SM_Immunization AS
SELECT
  immunization.pat_id,
  immunization.instance,
  immunization.immunizationdate,
  immunization.immunzatn_id,
  immunizationcode.cvxcode,
  immunizationstatus
FROM
  rdp_phi.immunization
  INNER JOIN rdp_phi.immunizationcode ON immunization.immunzatn_id=immunizationcode.immunzatn_id AND immunization.instance=immunizationcode.instance
WHERE
  immunizationcode.cvxcode in ('207','208','212')
AND 
  immunization.instance = 1000
GROUP BY
  immunization.pat_id,
  immunization.instance,
  immunization.immunizationdate,
  immunization.immunzatn_id,
  immunizationcode.cvxcode,
  immunizationstatus

In [0]:
Immuno_table = spark.sql("""SELECT
  Baseline.pat_id,
  Baseline.instance,
  Baseline.pat_enc_csn_id,
  Baseline.onehourbase,
  immunizationdate,
  immunizationstatus,
  cvxcode,
  SM_Immunization.immunzatn_id AS Vaccine_code
  FROM Baseline
  INNER JOIN rdp_phi_sandbox.SM_Immunization ON SM_Immunization.pat_id = Baseline.pat_id AND SM_Immunization.instance = Baseline.instance
  WHERE SM_Immunization.immunizationdate <= Baseline.onehourbase - Interval '21' day
  GROUP BY 
  Baseline.pat_id,
  Baseline.instance,
  Baseline.pat_enc_csn_id,
  Baseline.onehourbase,
  immunizationdate,
  immunizationstatus,
  cvxcode,
  immunzatn_id""")

#Immuno_table = Immuno_table.dropDuplicates()

In [0]:
from pyspark.sql.window import Window
###Filter the vaccines that were given for sure
Immuno_table = Immuno_table.filter(Immuno_table.immunizationstatus == "Given")
Immuno_table = Immuno_table.dropDuplicates(['pat_id','instance','immunizationdate'])

### Count the number of doses
w = Window.partitionBy(['pat_id','instance']).orderBy("immunizationdate").rowsBetween(Window.unboundedPreceding,Window.unboundedFollowing)
Immuno_table = Immuno_table.withColumn("Administered_dose_count",count(col("immunizationdate")).over(w))
Immuno_table = Immuno_table.withColumn("lastdose_date",last(col("immunizationdate")).over(w))

### Remove the patients who got vaccinated within 14 days of Today's date
Immuno_table = Immuno_table.filter(datediff(current_date(),Immuno_table.lastdose_date)>=14)

Immuno_table = Immuno_table.dropDuplicates(['pat_id','instance'])

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import rank, col

###Check if fully vaccinated at time positive covid test or not
Immuno_table = Immuno_table.withColumn("Vaccination_status",
                                       when((col('cvxcode')=="212") & (col('Administered_dose_count')==1) , "Fully").
                                       when((col('cvxcode')=="207") & (col('Administered_dose_count')==2) , "Fully").
                                       when((col('cvxcode')=="208") & (col('Administered_dose_count')==2) , "Fully").
                                       when((col('cvxcode')=="217") & (col('Administered_dose_count')==2) , "Fully").
                                       when((col('cvxcode')=="212") & (col('Administered_dose_count')>1) , "Booster").
                                       when((col('cvxcode')=="207") & (col('Administered_dose_count')>2) , "Booster").
                                       when((col('cvxcode')=="208") & (col('Administered_dose_count')>2) , "Booster").
                                       when((col('cvxcode')=="217") & (col('Administered_dose_count')>2) , "Booster").
                                       otherwise("Half"))

###Name the vaccines
Immuno_table = Immuno_table.withColumn("CVX_name",when(col('cvxcode')=="212", "Janssen COVID-19 Vaccine").
                                 when(col('cvxcode')=="207", "Moderna COVID-19 Vaccine").
                                 when(col('cvxcode')=="208", "Pfizer-BioNTech COVID-19 Vaccine").
                                 when(col('cvxcode')=="217", "Pfizer-BioNTech COVID-19 Vaccine").
                                 otherwise("Unknown"))




Immuno_table = Immuno_table.dropDuplicates()

In [0]:
final_df = df.join(Immuno_table,['pat_id','instance','pat_enc_csn_id','onehourbase'],'left')

In [0]:
final_df = final_df.withColumn("Vaccination_status",when(final_df.cvxcode.isNull(),"NotVaccinated").otherwise(col("Vaccination_status")))
final_df = final_df.withColumn("CVX_name",when(final_df.cvxcode.isNull(),"NotVaccinated").otherwise(col("CVX_name")))

In [0]:
final_df1 = final_df.join(Vasso_table,['pat_id','instance','pat_enc_csn_id','onehourbase'],'left')

In [0]:
final_df1 = final_df1.withColumn('Vasso_use', when((col('Vasso').isNull()), "no").otherwise("yes"))
final_df1 = final_df1.withColumn('Administered_dose_count', when((col('Administered_dose_count').isNull()), 0).otherwise(col('Administered_dose_count')))

In [0]:
%sql
DROP TABLE rdp_phi_sandbox.sm_patt_delta_vasso_Immuno_feb;

In [0]:
table_name = 'rdp_phi_sandbox.sm_patt_delta_vasso_Immuno_feb'
final_df1.write.saveAsTable(table_name)

In [0]:
%sql
REFRESH table_name;
SELECT * FROM rdp_phi_sandbox.sm_patt_delta_vasso_Immuno_feb;