In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pulp import *
import pandas as pd
import os, glob
import seaborn as sns
from scipy.stats import kruskal
import scikit_posthocs as sp
from scipy.stats import mannwhitneyu
from dotenv import load_dotenv
load_dotenv() 

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
os.environ['GCLOUD_PROJECT'] = str(os.getenv("GCLOUD_PROJECT"))
%load_ext google.cloud.bigquery

from google.cloud import bigquery
client=bigquery.Client()
from google.cloud import bigquery_storage_v1

# Notes

In [None]:
%%bigquery df_notes
CREATE OR REPLACE TABLE som-nero-phi-jonc101.blood_culture_stewardship.Notes AS
WITH cohort AS (
    SELECT * FROM `som-nero-phi-jonc101.blood_culture_stewardship.cohort`
),
All_Relevent_notes as (
SELECT note.*
FROM `som-nero-phi-jonc101.Deid_Notes_JChen.Deid_Notes_SHC_JChen` note
INNER JOIN `som-nero-phi-jonc101.shc_core_2023.prov_map` m  ON m.shc_prov_id = CAST(SUBSTR(note.author_prov_map_id, 2) AS STRING)
WHERE 
m.prov_type in ('STANFORD REFERRING PHYSICIAN','PHYSICIAN','RESIDENT','MEDICAL STUDENT','NURSE PRACTITIONER','FELLOW',
                   'NP STUDENT','PHYSICIAN ASSISTANT','PA STUDENT')
),
inpatients as (
select * from cohort
),
inpatients_admit_time as (
select auc.*,
     en.hosp_admsn_time_jittered_utc,
from inpatients auc left join `som-nero-phi-jonc101.shc_core_2023.encounter` en on
auc.anon_id=en.anon_id and
auc.pat_enc_csn_id_coded=en.pat_enc_csn_id_coded
where hosp_admsn_time_jittered_utc is not null
),
inpatients_notes as (
select inp.anon_id,
inp.pat_enc_csn_id_coded,
inp.order_proc_id_coded,
note.jittered_note_date_utc as notedatetime,
note.deid_note_text,
note.note_type,
note.note_type_desc,
from inpatients_admit_time inp
inner join All_Relevent_notes note on 
inp.anon_id=note.anon_id
where 
TIMESTAMP_DIFF(inp.blood_culture_order_datetime, TIMESTAMP(note.jittered_note_date_utc), Hour) >= -24 
and
TIMESTAMP_DIFF(note.jittered_note_date_utc, inp.hosp_admsn_time_jittered_utc, DAY) >=-14 
)
select  anon_id,
pat_enc_csn_id_coded,
order_proc_id_coded,
notedatetime,
deid_note_text,
note_type,
note_type_desc,
from inpatients_notes
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,notedatetime,note_type,deid_note_text,note_type_desc
order by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,notedatetime,note_type,deid_note_text,note_type_desc

In [None]:
df_notes.head()

# ED notes with Physcian Provider 

In [None]:
%%bigquery df_notes
CREATE OR REPLACE TABLE som-nero-phi-jonc101.blood_culture_stewardship.EDProviderNotes AS
WITH cohort AS (
    SELECT * FROM `som-nero-phi-jonc101.blood_culture_stewardship.cohort`
),
note as(
 SELECT note.*,
  FROM `som-nero-phi-jonc101.Deid_Notes_JChen.Deid_Notes_SHC_JChen` note
  INNER JOIN `som-nero-phi-jonc101.shc_core_2023.prov_map` m  ON m.shc_prov_id = CAST(SUBSTR(note.author_prov_map_id, 2) AS STRING)
WHERE 
note.note_type_desc like 'ED Provider Notes'
AND m.prov_type in ('PHYSICIAN')
)
select cohort.anon_id,
cohort.pat_enc_csn_id_coded,
cohort.order_proc_id_coded,
note.jittered_note_date_utc as notedatetime,
note.deid_note_text,
note.note_type,
note.note_type_desc,
from cohort 
inner join note using(anon_id)
where 
TIMESTAMP_DIFF(cohort.blood_culture_order_datetime, TIMESTAMP(note.jittered_note_date_utc), Hour) >= -24 #0
and 
TIMESTAMP_DIFF(cohort.blood_culture_order_datetime, TIMESTAMP(note.jittered_note_date_utc), Hour) <= 24 #0