In [2]:
import pandas as pd
import numpy as np
import altair as alt
from ipywidgets import interact
from os.path import join

from constants import COLUMNS, SITE_IDS_IN_DIR
from utils import read_full_diagnoses_df

In [5]:
# SITE_IDS = ['FWN','FXL','FZT','FMT','FFG','FOW','FSZ','FMA','FEQ','FVX','FKQ','FBL','FDQ','FKN','FBD','FKL','FUU','FZU','FZM','FUN']
SITE_IDS = SITE_IDS_IN_DIR.DIAGNOSES

['FKN',
 'FEQ',
 'FKL',
 'FDQ',
 'FSZ',
 'FXL',
 'FBD',
 'FZU',
 'FZT',
 'FOW',
 'FUN',
 'FUU',
 'FZM',
 'FVX',
 'FBL',
 'FMT',
 'FMA',
 'FWN',
 'FKQ',
 'FFG']

In [65]:
df = read_full_diagnoses_df(SITE_IDS)

# Columns
siteid = COLUMNS.SITE_ID
icd_code = COLUMNS.ICD_CODE
icd_version = COLUMNS.ICD_VERSION
num_patients = COLUMNS.NUM_PATIENTS

df.head()

Unnamed: 0,siteid,icd_code,icd_version,num_patients
0,FWN,54,9,3
1,FWN,103,9,9
2,FWN,37,9,11
3,FWN,J40,10,4
4,FWN,J22,10,8


## Summarization Chart
##### Select Site ID to see data by a specific site ID

In [287]:
icd_color_scale = alt.Scale(domain=["9", "10"], range=["#69A6CE", "#EB6F63"])

def apply_theme(base):
    return base.configure_axis(
        labelFontSize=14,
        labelFontWeight=300,
        titleFontSize=18,
        titleFontWeight=300
    ).configure_title(fontSize=18, fontWeight=400, anchor="middle")
    
def diagnoses_chart(SiteID): 
    
    # Mouseover effect
    mouseover = alt.selection(type='single', on='mouseover', fields=[icd_code], nearest=False)
    
    # Base bar chart
    base = alt.Chart(df).mark_bar().encode(
        x=alt.X(f"sum({num_patients}):Q", title="Total Number of Patients", axis=alt.Axis(tickCount=5)),
        y=alt.Y(f"{icd_code}:N", title="ICD Code", sort="-x"),
        color=alt.Color(icd_version + ":N", scale=icd_color_scale, title="ICD Version"),
        # mouse hover visual effect
        opacity=alt.condition(~mouseover, alt.value(1), alt.value(0.8)),
        tooltip=[icd_code, f"sum({num_patients})"]
    ).transform_filter(
        # Do not show uncertain data
        alt.FieldGTEPredicate(field=num_patients, gte=0)
    ).properties(
        title="All Diagnoses Patients Have Starting 7 Days Before Positive Test (" + SiteID + ")",
        width=500,
        height=300
    )
    
    if SiteID != "All Sites":
        base = base.transform_filter(
            alt.FieldEqualPredicate(field=siteid, equal=SiteID)
        )
    
    # Text label
    text = base.mark_text(align='right', baseline='middle', dx=-4, fontWeight="bold").encode(
        text=f"sum({num_patients}):Q",
        color=alt.value("white")
    )
    
    chart = apply_theme((base + text)).add_selection(mouseover)
        
    return chart.interactive()

interact(diagnoses_chart, SiteID=["All Sites"] + SITE_IDS, )

interactive(children=(Dropdown(description='SiteID', options=('All Sites', 'FWN', 'FXL', 'FZT', 'FMT', 'FFG', …

<function __main__.diagnoses_chart(SiteID)>

## One-to-N Comparison Chart
##### Select site ID using dropdown menu to highlight

In [280]:
def diagnoses_chart_hl(HighlightBy): 
    
    # Base bar chart
    base = alt.Chart(df).mark_bar().encode(
        x=alt.X(f"sum({num_patients}):Q", title="Total Number of Patients", axis=alt.Axis(tickCount=5)),
        y=alt.Y(f"{icd_code}:N", title="ICD Code", sort="-x"),
        color=alt.value("lightgray")
    ).transform_filter(
        # Do not show uncertain data
        alt.FieldGTEPredicate(field=num_patients, gte=0)
    ).properties(
        title="All Diagnoses Patients Have Starting 7 Days Before Positive Test (" + HighlightBy + ")",
        width=500,
        height=300
    )
    
    # Selected bars
    hl = base.encode(
        color=alt.Color(icd_version + ":N", scale=icd_color_scale, title="ICD Version")
    ).transform_filter(
        alt.FieldEqualPredicate(field=siteid, equal=HighlightBy)
    )
    
    # Text label
    text = base.mark_text(align='right', baseline='middle', dx=-4, fontWeight="bold").encode(
        text=f"sum({num_patients}):Q",
        color=alt.value("white")
    )
    
    text_hl = hl.mark_text(align='left', baseline='middle', dx=4, fontWeight="bold").encode(
        text=f"sum({num_patients}):Q"
    )
    
    chart = apply_theme((base + text + hl + text_hl))
        
    return chart.interactive()

interact(diagnoses_chart_hl, HighlightBy=SITE_IDS, )

interactive(children=(Dropdown(description='HighlightBy', options=('FWN', 'FXL', 'FZT', 'FMT', 'FFG', 'FOW', '…

<function __main__.diagnoses_chart_hl(HighlightBy)>

In [315]:
# Mouseover effect
mouseover = alt.selection(type='single', on='mouseover', fields=[siteid], nearest=False)

# Base bar chart
base = alt.Chart(df).mark_bar().encode(
    x=alt.X(f"sum({num_patients}):Q", title="Total Number of Patients", axis=alt.Axis(tickCount=5)),
    y=alt.Y(f"{icd_code}:N", title="ICD Code"),
    color=alt.value("lightgray")
).transform_filter(
    # Do not show uncertain data
    alt.FieldGTEPredicate(field=num_patients, gte=0)
).properties(
    title="Diagnoses Patients Have Starting 7 Days Before Positive Test",
    width=350,
    height=300
)

# Text label
text = base.mark_text(align='right', baseline='middle', dx=-4, fontWeight="bold").encode(
    text=f"sum({num_patients}):Q",
    color=alt.value("white")
)

# Highlight selected site id
hl = base.encode(
    color=alt.Color(icd_version + ":N", scale=icd_color_scale, title="ICD Version")
).transform_filter(mouseover)

text_hl = hl.mark_text(align='left', baseline='middle', dx=4, fontWeight="bold").encode(
    text=f"sum({num_patients}):Q"
)

# Chart for total by site ID
chart_by_siteid = alt.Chart(df).mark_bar().encode(
    y=alt.Y(f"sum({num_patients}):Q", title="Total Number of Patients", axis=alt.Axis(tickCount=5)),
    x=alt.X(f"{siteid}:N", title="Site", sort="-y"),
    color=alt.condition(~mouseover, alt.value("rgb(244, 208, 75)"), alt.value("#E48236"))
).transform_filter(
    # Do not show uncertain data
    alt.FieldGTEPredicate(field=num_patients, gte=0)
).properties(
    title="Diagnoses By Site",
    width=350,
    height=300
).add_selection(
    mouseover
)

chart = apply_theme(
    ((base + text + hl + text_hl) | chart_by_siteid)
).resolve_scale("independent")
        
chart

In [298]:
def apply_theme_sm(base):
    return base.configure_axis(
        labelFontSize=10,
        labelFontWeight=300,
        titleFontSize=14,
        titleFontWeight=300
    ).configure_title(fontSize=20, fontWeight=400, anchor="middle")

# Mouseover effect
mouseover = alt.selection(type='single', on='mouseover', fields=[icd_code], nearest=False)
    
base = alt.Chart(df).mark_bar().encode(
    x=alt.X(f"{icd_code}:N", title="ICD Code"),
    y=alt.Y(f"sum({num_patients}):Q", title="Total Number of Patients"),
    color=alt.Color(icd_version + ":N", scale=icd_color_scale, title="ICD Version"),
    facet=alt.Facet(f"{siteid}:N", columns=5, title=""),
    tooltip=[icd_code, f"sum({num_patients})"]
).transform_filter(
    # Do not show uncertain data
    alt.FieldGTEPredicate(field=num_patients, gte=0)
).properties(
    width=150,
    height=150
)

chart = apply_theme_sm(base).properties(
    title="All Diagnoses Patients Have Starting 7 Days Before Positive Test"
)

chart.interactive()