In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os 
from google.cloud import bigquery

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/conorcorbin/.config/gcloud/application_default_credentials.json' 
os.environ['GCLOUD_PROJECT'] = 'som-nero-phi-jonc101' 
client=bigquery.Client()

In [None]:
query="""
WITH cohort_bugs AS (
ed), 

adt_dep as (
    SELECT DISTINCT
      adt.pat_enc_csn_id_coded, 
      FIRST_VALUE(dm.department_name) OVER 
      (PARTITION BY adt.pat_enc_csn_id_coded ORDER BY adt.effective_time_jittered_utc) department_name,
    FROM 
      `shc_core.adt` adt
    INNER JOIN
      `som-nero-phi-jonc101.shc_core.dep_map` dm
    USING
      (department_id)
)

SELECT 
    c.*, a.department_name
FROM
    cohort_bugs c
INNER JOIN
    adt_dep a
USING
    (pat_enc_csn_id_coded)

"""
query_job = client.query(query)
df = query_job.result().to_dataframe()
df.head()

In [None]:
(df
    .assign(description=lambda x: ["Blood Culture" if "BLOOD" in a else
                                   "Urine Culture" if "URINE" in a else
                                   "Csf or Fluid Culture" for a in x.description])
    .groupby('description')
    .agg(num_csns=('pat_enc_csn_id_coded', 'nunique'))
)

In [None]:
### Filter out coag neg staph and rename MRSA to Staph Aureus
df = (df
    .assign(description=lambda x: ["Blood Culture" if "BLOOD" in a else
                                   "Urine Culture" if "URINE" in a else
                                   "Csf or Fluid Culture" for a in x.description])
    .query("organism != 'COAG NEGATIVE STAPHYLOCOCCUS'")
    .assign(organism = lambda x: [a if a != 'STAPH AUREUS {MRSA}' else 'STAPHYLOCOCCUS AUREUS'
                                 for a in x.organism])
    .assign(organism = lambda x: [a if a != 'STREPTOCOCCUS AGALACTIAE {GROUP B}' else 'STREPTOCOCCUS AGALACTIAE (GROUP B)'
                                 for a in x.organism])
    .assign(organism = lambda x: [a.lower() for a in x.organism])
)

In [None]:
### Get Top 5 bugs for each culture type by department where we count by number of encounters bug grew. 
df_stanford = pd.DataFrame()
df_valley_care = pd.DataFrame()
for culture_type in ['Blood Culture', 'Urine Culture', 'Csf or Fluid Culture']:
    
    df_stanford_temp = (df
        .query('not department_name.str.contains("VCP") and description==@culture_type', engine='python')
        .groupby('organism')
        .agg({'pat_enc_csn_id_coded' : 'nunique'})
        .reset_index()
        .rename(columns={'organism' : 'Organism', 'pat_enc_csn_id_coded' : "Count"})
        .sort_values('Count', ascending=False)
        .head(5)
    )
    df_stanford_temp['Culture Type'] = [culture_type for i in range(len(df_stanford_temp))]


    df_valley_care_temp = (df
        .query('department_name.str.contains("VCP") and description==@culture_type', engine='python')
        .groupby('organism')
        .agg({'pat_enc_csn_id_coded' : 'nunique'})
        .reset_index()
        .rename(columns={'organism' : 'Organism', 'pat_enc_csn_id_coded' : "Count"})
        .sort_values('Count', ascending=False)
        .head(5)
    )
    df_valley_care_temp['Culture Type'] = [culture_type for i in range(len(df_valley_care_temp))]

    
    df_stanford = pd.concat([df_stanford, df_stanford_temp])
    df_valley_care = pd.concat([df_valley_care, df_valley_care_temp])

In [None]:
df_stanford

In [None]:
df_valley_care

In [None]:
df_stanford['Institution'] = ["Stanford ED" for i in range(len(df_stanford))]
df_valley_care['Institution'] = ["Valley Care ED" for i in range(len(df_valley_care))]
df_final = pd.concat([df_stanford, df_valley_care])
df_final.head()

In [None]:
df_final = df_final[['Institution', 'Culture Type', 'Organism', 'Count']]
df_final['Organism'] = [a.capitalize() for a in df_final['Organism']]
df_final.to_csv('Table 2: Organism by culture type long.csv')

In [None]:
culture_type = df_stanford['Culture Type'].values
df_stanford = df_stanford.drop('Culture Type', axis=1)
df_valley_care = df_valley_care.drop('Culture Type', axis=1)

df_valley_care.columns = pd.MultiIndex.from_product([['Valley Care ED'], df_valley_care.columns])
df_stanford.columns = pd.MultiIndex.from_product([['Stanford ED'], df_stanford.columns])

df_bugs = pd.concat([df_valley_care.reset_index(drop=True), df_stanford.reset_index(drop=True)], axis=1)
df_bugs['Culture Type'] = culture_type

In [None]:
df_bugs = df_bugs[['Culture Type', 'Stanford ED', 'Valley Care ED']]
df_bugs

In [None]:
df_bugs.to_html('table2_top_bugs.html', index=None)

In [None]:
df_bugs.to_csv('table2_top_bugs_by_culture.csv', index=None)

In [None]:
df_bugs_test['test'] = ['test' for i in range(len(df_bugs_test))]

In [None]:
df_bugs_test.drop("Culture Type", axis=1)