In [None]:
import pandas as pd
import numpy as np
import altair as alt
from ipywidgets import interact
from os.path import join

from constants import COLUMNS
from utils import read_latest_diagnoses_df, read_icd_df

In [None]:
df = read_latest_diagnoses_df()
SITE_IDS = df[COLUMNS.SITE_ID].unique().tolist()

# Columns
siteid = COLUMNS.SITE_ID
icd_code = COLUMNS.ICD_CODE
icd_version = COLUMNS.ICD_VERSION
num_patients = COLUMNS.NUM_PATIENTS

# df # Comment this line not to include data preview in GitHub repo

In [None]:
icd_df = read_icd_df()
df = df.merge(icd_df, how="left", left_on='icd_code', right_on='ICDcode')
# df.head() # Comment this line not to include data preview in GitHub repo

## Summarization Chart
##### Select Site ID to see data by a specific site ID

In [None]:
# We want to use consistent theme for charts
def apply_theme(base):
    return base.configure_axis(
        labelFontSize=14,
        labelFontWeight=300,
        titleFontSize=18,
        titleFontWeight=300,
        labelLimit=1000
    ).configure_title(fontSize=18, fontWeight=400, anchor="middle"
    ).configure_legend(
        titleFontSize=18, titleFontWeight=400,
        labelFontSize=16, labelFontWeight=300,
        padding=10,
        cornerRadius=0,
        orient='bottom-right'
    )

In [None]:
bar_scale = alt.Scale(range=["#CA2026", "#377FB8", "#60B75D"])

def diagnoses_chart(SiteID, Normalize, YAxis): 
    
    yfield = "icd_code"
    if YAxis == "ICD Description":
        yfield = "ICDdescription"
    
    # Mouseover effect
    mouseover = alt.selection(type='single', on='mouseover', fields=[icd_code], nearest=False)
    
    # Base bar chart
    base = alt.Chart(df).mark_bar().encode(
        x=alt.X(f"sum({num_patients}):Q", title="Number of patients", axis=alt.Axis(tickCount=5)),
        y=alt.Y(f"{yfield}:N", title=None, sort="-x"),
        color=alt.Color(siteid + ":N", scale=bar_scale, title=None),
        # mouse hover visual effect
        opacity=alt.condition(mouseover, alt.value(1), alt.value(0.5)),
        tooltip=[siteid, icd_code, "ICDdescription", f"sum({num_patients})"],
        order=alt.Order(
          # sort segments by number of patients
          'num_patients:Q',
          sort='descending'
        )
    ).transform_filter(
        # Do not show uncertain data
        alt.FieldGTEPredicate(field=num_patients, gte=0)
    ).properties(
        title="Diagnoses starting 7 days before positive test (" + SiteID + ")",
        width=600,
        height=400
    )
        
    if SiteID != "All Sites":
        base = base.transform_filter(
            alt.FieldEqualPredicate(field=siteid, equal=SiteID)
        )

    if Normalize == "Yes":
        base = base.encode(
            x=alt.X(f"sum({num_patients}):Q", title="Fraction of Patients", axis=alt.Axis(tickCount=5), stack="normalize"),
        )
        
    chart = apply_theme(base).add_selection(mouseover)
        
    return chart.interactive()

interact(diagnoses_chart, SiteID=["All Sites"] + SITE_IDS, Normalize=["No", "Yes"], YAxis=["ICD Description","ICD Code"] )

In [None]:
icd_color_scale = alt.Scale(domain=["9", "10"], range=["#69A6CE", "#EB6F63"])

def apply_theme(base):
    return base.configure_axis(
        labelFontSize=14,
        labelFontWeight=300,
        titleFontSize=18,
        titleFontWeight=300
    ).configure_title(fontSize=18, fontWeight=400, anchor="middle")
    
def diagnoses_chart(SiteID): 
    
    # Mouseover effect
    mouseover = alt.selection(type='single', on='mouseover', fields=[icd_code], nearest=False)
    
    # Base bar chart
    base = alt.Chart(df).mark_bar().encode(
        x=alt.X(f"sum({num_patients}):Q", title="Total Number of Patients", axis=alt.Axis(tickCount=5)),
        y=alt.Y(f"{icd_code}:N", title="ICD Code", sort="-x"),
        color=alt.Color(icd_version + ":N", scale=icd_color_scale, title="ICD Version"),
        # mouse hover visual effect
        opacity=alt.condition(~mouseover, alt.value(1), alt.value(0.8)),
        tooltip=[icd_code, f"sum({num_patients})"]
    ).transform_filter(
        # Do not show uncertain data
        alt.FieldGTEPredicate(field=num_patients, gte=0)
    ).properties(
        title="All Diagnoses Patients Have Starting 7 Days Before Positive Test (" + SiteID + ")",
        width=500,
        height=300
    )
    
    if SiteID != "All Sites":
        base = base.transform_filter(
            alt.FieldEqualPredicate(field=siteid, equal=SiteID)
        )
    
    # Text label
    text = base.mark_text(align='right', baseline='middle', dx=-4, fontWeight="bold").encode(
        text=f"sum({num_patients}):Q",
        color=alt.value("white")
    )
    
    chart = apply_theme((base + text)).add_selection(mouseover)
        
    return chart.interactive()

interact(diagnoses_chart, SiteID=["All Sites"] + SITE_IDS, )

In [None]:
def apply_theme(base):
    return base.configure_axis(
        labelFontSize=14,
        labelFontWeight=300,
        titleFontSize=18,
        titleFontWeight=300
    ).configure_title(fontSize=18, fontWeight=400, anchor="start")
    
def diagnoses_chart(SiteID, Normalize): 
    
    # Mouseover effect
    mouseover = alt.selection(type='single', on='mouseover', fields=[icd_code], nearest=False)
    
    # Base bar chart
    base = alt.Chart(df).mark_bar().encode(
        x=alt.X(f"sum({num_patients}):Q", title="Number of Patients", axis=alt.Axis(tickCount=5)),
        y=alt.Y("icd_code:N", title="ICD Code", sort="-x"),
        color=alt.Color(siteid + ":N", scale=alt.Scale(scheme="category10"), title="Site"),
        # mouse hover visual effect
        opacity=alt.condition(mouseover, alt.value(1), alt.value(0.5)),
        tooltip=[icd_code, "ICDdescription:N", f"sum({num_patients})", siteid],
        order=alt.Order(
          # sort segments by number of patients
          'num_patients:Q',
          sort='descending'
        )
    ).transform_filter(
        # Do not show uncertain data
        alt.FieldGTEPredicate(field=num_patients, gte=0)
    ).properties(
        title="Diagnoses Starting 7 Days Before Positive Test (" + SiteID + ")",
        width=500,
        height=500
    )
        
    if SiteID != "All Sites":
        base = base.transform_filter(
            alt.FieldEqualPredicate(field=siteid, equal=SiteID)
        )

    if Normalize != "no":
        base = base.encode(
            x=alt.X(f"sum({num_patients}):Q", title="Fraction of Patients", axis=alt.Axis(tickCount=5), stack="normalize"),
        )
        
    chart = apply_theme(base).add_selection(mouseover)
        
    return chart.interactive()

interact(diagnoses_chart, SiteID=["All Sites"] + SITE_IDS, Normalize=["yes", "no"] )

In [None]:
def apply_theme(base):
    return base.configure_axis(
        labelFontSize=14,
        labelFontWeight=300,
        titleFontSize=18,
        titleFontWeight=300
    ).configure_title(fontSize=18, fontWeight=400, anchor="start")
    
def diagnoses_chart(SiteID, Normalize): 
    
    # Mouseover effect
    mouseover = alt.selection(type='single', on='mouseover', fields=[icd_code], nearest=False)
    
    # Base bar chart
    base = alt.Chart(df).mark_bar().encode(
        x=alt.X(f"sum({num_patients}):Q", title="Number of Patients", axis=alt.Axis(tickCount=5)),
        y=alt.Y("siteid:N", title="Site", sort="-x"),
        color=alt.Color(icd_code + ":N", scale=alt.Scale(scheme="category20"), title="ICD Code"),
        # mouse hover visual effect
        opacity=alt.condition(mouseover, alt.value(1), alt.value(0.5)),
        tooltip=[icd_code, "ICDdescription:N", f"sum({num_patients})", siteid],
        order=alt.Order(
          # sort segments by number of patients
          'num_patients:Q',
          sort='descending'
        )
    ).transform_filter(
        # Do not show uncertain data
        alt.FieldGTEPredicate(field=num_patients, gte=0)
    ).properties(
        title="Diagnoses Starting 7 Days Before Positive Test (" + SiteID + ")",
        width=500,
        height=300
    )
        
    if SiteID != "All Sites":
        base = base.transform_filter(
            alt.FieldEqualPredicate(field=siteid, equal=SiteID)
        )

    if Normalize != "no":
        base = base.encode(
            x=alt.X(f"sum({num_patients}):Q", title="Fraction of Patients", axis=alt.Axis(tickCount=5), stack="normalize"),
        )
        
    chart = apply_theme(base).add_selection(mouseover)
        
    return chart.interactive()

interact(diagnoses_chart, SiteID=["All Sites"] + SITE_IDS, Normalize=["yes", "no"] )

## One-to-N Comparison Chart
##### Select site ID using dropdown menu to highlight

In [None]:
def diagnoses_chart_hl(HighlightBy): 
    
    # Base bar chart
    base = alt.Chart(df).mark_bar().encode(
        x=alt.X(f"sum({num_patients}):Q", title="Total Number of Patients", axis=alt.Axis(tickCount=5)),
        y=alt.Y(f"{icd_code}:N", title="ICD Code", sort="-x"),
        color=alt.value("lightgray")
    ).transform_filter(
        # Do not show uncertain data
        alt.FieldGTEPredicate(field=num_patients, gte=0)
    ).properties(
        title="All Diagnoses Patients Have Starting 7 Days Before Positive Test (" + HighlightBy + ")",
        width=500,
        height=300
    )
    
    # Selected bars
    hl = base.encode(
        color=alt.Color(icd_version + ":N", scale=icd_color_scale, title="ICD Version")
    ).transform_filter(
        alt.FieldEqualPredicate(field=siteid, equal=HighlightBy)
    )
    
    # Text label
    text = base.mark_text(align='right', baseline='middle', dx=-4, fontWeight="bold").encode(
        text=f"sum({num_patients}):Q",
        color=alt.value("white")
    )
    
    text_hl = hl.mark_text(align='left', baseline='middle', dx=4, fontWeight="bold").encode(
        text=f"sum({num_patients}):Q"
    )
    
    chart = apply_theme((base + text + hl + text_hl))
        
    return chart.interactive()

interact(diagnoses_chart_hl, HighlightBy=SITE_IDS, )

In [None]:
# Mouseover effect
mouseover = alt.selection(type='single', on='mouseover', fields=[siteid], nearest=False)

# Base bar chart
base = alt.Chart(df).mark_bar().encode(
    x=alt.X(f"sum({num_patients}):Q", title="Total Number of Patients", axis=alt.Axis(tickCount=5)),
    y=alt.Y(f"{icd_code}:N", title="ICD Code"),
    color=alt.value("lightgray")
).transform_filter(
    # Do not show uncertain data
    alt.FieldGTEPredicate(field=num_patients, gte=0)
).properties(
    title="Diagnoses Patients Have Starting 7 Days Before Positive Test",
    width=350,
    height=300
)

# Text label
text = base.mark_text(align='right', baseline='middle', dx=-4, fontWeight="bold").encode(
    text=f"sum({num_patients}):Q",
    color=alt.value("white")
)

# Highlight selected site id
hl = base.encode(
    color=alt.Color(icd_version + ":N", scale=icd_color_scale, title="ICD Version")
).transform_filter(mouseover)

text_hl = hl.mark_text(align='left', baseline='middle', dx=4, fontWeight="bold").encode(
    text=f"sum({num_patients}):Q"
)

# Chart for total by site ID
chart_by_siteid = alt.Chart(df).mark_bar().encode(
    y=alt.Y(f"sum({num_patients}):Q", title="Total Number of Patients", axis=alt.Axis(tickCount=5)),
    x=alt.X(f"{siteid}:N", title="Site", sort="-y"),
    color=alt.condition(~mouseover, alt.value("rgb(244, 208, 75)"), alt.value("#E48236"))
).transform_filter(
    # Do not show uncertain data
    alt.FieldGTEPredicate(field=num_patients, gte=0)
).properties(
    title="Diagnoses By Site",
    width=350,
    height=300
).add_selection(
    mouseover
)

chart = apply_theme(
    ((base + text + hl + text_hl) | chart_by_siteid)
).resolve_scale("independent")
        
chart

In [None]:
def apply_theme_sm(base):
    return base.configure_axis(
        labelFontSize=10,
        labelFontWeight=300,
        titleFontSize=14,
        titleFontWeight=300
    ).configure_title(fontSize=20, fontWeight=400, anchor="middle")

# Mouseover effect
mouseover = alt.selection(type='single', on='mouseover', fields=[icd_code], nearest=False)
    
base = alt.Chart(df).mark_bar().encode(
    x=alt.X(f"{icd_code}:N", title="ICD Code"),
    y=alt.Y(f"sum({num_patients}):Q", title="Total Number of Patients"),
    color=alt.Color(icd_version + ":N", scale=icd_color_scale, title="ICD Version"),
    facet=alt.Facet(f"{siteid}:N", columns=5, title=""),
    tooltip=[icd_code, f"sum({num_patients})"]
).transform_filter(
    # Do not show uncertain data
    alt.FieldGTEPredicate(field=num_patients, gte=0)
).properties(
    width=150,
    height=150
)

chart = apply_theme_sm(base).properties(
    title="All Diagnoses Patients Have Starting 7 Days Before Positive Test"
)

chart.interactive()