In [1]:
import pandas as pd
import numpy as np
import altair as alt
from altair import datum
from ipywidgets import interact
from os.path import join
from datetime import date, datetime, timedelta

from constants import COLUMNS
from utils import read_latest_daily_counts_df
from utils_data_align import add_aligned_time_steps_column

In [2]:
df = read_latest_daily_counts_df()
SITE_IDS = df[COLUMNS.SITE_ID].unique().tolist()

# Columns
siteid = COLUMNS.SITE_ID
date = COLUMNS.DATE
new_positive_cases = COLUMNS.NEW_POSITIVE_CASES
patients_in_icu = COLUMNS.PATIENTS_IN_ICU
new_deaths = COLUMNS.NEW_DEATHS

# Convert data for viz
df.loc[df[new_positive_cases] < 0, new_positive_cases] = 0
df.loc[df[patients_in_icu] < 0, patients_in_icu] = 0
df.loc[df[new_deaths] < 0, new_deaths] = 0

# Range of some columns
start_date = min(df['date'])
end_date = max(df['date'])
duration = (datetime.strptime(end_date, "%Y-%m-%d") - datetime.strptime(start_date, "%Y-%m-%d")).days

start_date_margin = datetime.strptime(start_date, "%Y-%m-%d") - timedelta(days=1)
start_date_margin = start_date_margin.strftime("%Y-%m-%d")

end_date_margin = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
end_date_margin = end_date_margin.strftime("%Y-%m-%d")

print(start_date, end_date)

df

2020-03-16 2020-03-31


Unnamed: 0,siteid,date,new_positive_cases,patients_in_icu,new_deaths
0,FZU,2020-03-20,5,0,0
1,FZU,2020-03-21,8,0,0
2,FZU,2020-03-22,6,0,0
3,FZU,2020-03-23,10,0,0
4,FZU,2020-03-24,8,0,0
...,...,...,...,...,...
274,FBD,2020-03-27,11,1,0
275,FBD,2020-03-28,11,1,0
276,FBD,2020-03-29,10,3,1
277,FBD,2020-03-30,8,2,1


In [3]:
def apply_theme(base):
    return base.configure_axis(
        labelFontSize=14,
        labelFontWeight=300,
        titleFontSize=18,
        titleFontWeight=300
    ).configure_title(fontSize=18, fontWeight=400, anchor="middle"
    ).configure_legend(
        titleFontSize=18, titleFontWeight=400,
        labelFontSize=14, labelFontWeight=300
    )

## Total Daily Counts

In [4]:
def daily_counts_chart(SiteID):
    category = "category"
    value = "value"
    
    # Mouseover effect
    mouseover = alt.selection_single(on="mouseover", fields=[category]) # nearest=True does not look working
    
    base = alt.Chart(df).transform_fold(
        # Fold three quantitative fields, making three rows from one original row.
        fold=[new_positive_cases, patients_in_icu, new_deaths], 
        as_=[category, value]
    ).encode(
        x=alt.X(
            f"{date}:T", axis=alt.Axis(tickCount=7), 
            scale=alt.Scale(domain=[start_date_margin, end_date_margin]),
            title=None,
        ),y=alt.Y(
            f"sum({value}):Q", axis=alt.Axis(tickCount=5), 
            scale=alt.Scale(domain=[-2, 400]),
            title="Number of Patients"
        ),
        color=alt.Color(f"{category}:N", scale=alt.Scale(range=["#5C63A2", "#EC7176", "#F4AB32"])),
        # mouse hover visual effect
        size=alt.condition(~mouseover, alt.value(4), alt.value(6)),
        tooltip=[f"{date}:T", f"{category}:N", f"sum({value}):Q"],
    )
    
    if SiteID != "All":
        base = base.transform_filter(
            alt.FieldEqualPredicate(field=siteid, equal=SiteID)
        )
    
    line = base.mark_line(size=4).properties(
        width=750, height=400, title="Number of Possitive Cases, Patients in ICU, and Deaths"
    ).add_selection(mouseover)
    
    circle = base.mark_circle(size=70).encode(
        size=alt.condition(~mouseover, alt.value(70), alt.value(100))
    )
    
    # Uncomment below to show label at the end of lines
    # text = base.mark_text(align='left', fontWeight=400, dx=10, dy=1).encode(
    #     text=f"{category}:N",
    #     size=alt.condition(~mouseover, alt.value(20), alt.value(24))
    # ).transform_window(
    #     sort=[alt.SortField(date, order="descending")], 
    #     rank="rank(date)"
    # ).transform_filter(alt.datum.rank == 1)

    # Guidelines
    g = alt.Chart(pd.DataFrame({
        "date":[start_date, end_date_margin],
        "value":[0, duration]
    })).mark_line(color="lightgray", strokeDash=[10,1], size=3).encode(
        x="date:T",
        y="value:Q"
    )

    g10 = alt.Chart(pd.DataFrame({
        "date":[start_date, end_date_margin],
        "value":[0, duration * 10]
    })).mark_line(color="lightgray", strokeDash=[10,1], size=3).encode(
        x="date:T",
        y="value:Q"
    )
    
    g20 = alt.Chart(pd.DataFrame({
        "date":[start_date, end_date_margin],
        "value":[0, duration * 20]
    })).mark_line(color="lightgray", strokeDash=[10,1], size=3).encode(
        x="date:T",
        y="value:Q"
    )
    
    return apply_theme(g + g10 + g20 + line + circle).interactive()

interact(daily_counts_chart, SiteID=["All"] + SITE_IDS, )

interactive(children=(Dropdown(description='SiteID', options=('All', 'FZU', 'FZT', 'FZM', 'FXL', 'FWN', 'FVX',…

<function __main__.daily_counts_chart(SiteID)>

## Align Date - "Since N Possitive Cases"

In [5]:
# Align data and make new dataframe
df_aligned = add_aligned_time_steps_column(df, new_positive_cases, date, 10)

def daily_counts_chart(SiteID):
    category = "category"
    value = "value"
    
    # Mouseover effect
    mouseover = alt.selection_single(on="mouseover", fields=[category]) # nearest=True does not look working
    
    base = alt.Chart(df_aligned).transform_fold(
        # Fold three quantitative fields, making three rows from one original row.
        fold=[new_positive_cases, patients_in_icu, new_deaths], 
        as_=[category, value]
    ).transform_filter(
        (0 <= alt.datum.timestep) & (alt.datum.timestep <= 8) # Just because the number going down because of less institution is missleading
    ).encode(
        x=alt.X(
            f"timestep", axis=alt.Axis(tickCount=7),
            title="Number of days since 10th possitive cases",
            scale=alt.Scale(domain=[0,8])
        ),y=alt.Y(
            f"sum({value}):Q", axis=alt.Axis(tickCount=5),
            title="Number of Patients"
        ),
        color=alt.Color(f"{category}:N", scale=alt.Scale(range=["#5C63A2", "#EC7176", "#F4AB32"])),
        # mouse hover visual effect
        size=alt.condition(~mouseover, alt.value(4), alt.value(6)),
        tooltip=[f"timestep", f"{category}:N", f"sum({value}):Q"],
    )
    
    if SiteID != "All":
        base = base.transform_filter(
            alt.FieldEqualPredicate(field=siteid, equal=SiteID)
        )
    
    line = base.mark_line(size=4).properties(
        width=750, height=400, title="Number of Possitive Cases, Patients in ICU, and Deaths"
    ).add_selection(mouseover)
    
    circle = base.mark_circle(size=70).encode(
        size=alt.condition(~mouseover, alt.value(70), alt.value(100))
    )
    
    # Uncomment below to show label at the end of lines
    # text = base.mark_text(align='left', fontWeight=400, dx=10, dy=1).encode(
    #     text=f"{category}:N",
    #     size=alt.condition(~mouseover, alt.value(20), alt.value(24))
    # ).transform_window(
    #     sort=[alt.SortField(date, order="descending")], 
    #     rank="rank(date)"
    # ).transform_filter(alt.datum.rank == 1)
    
    return apply_theme(line + circle).interactive()

interact(daily_counts_chart, SiteID=["All"] + SITE_IDS, )

interactive(children=(Dropdown(description='SiteID', options=('All', 'FZU', 'FZT', 'FZM', 'FXL', 'FWN', 'FVX',…

<function __main__.daily_counts_chart(SiteID)>

## By Individual Sites

In [6]:
def daily_counts_site_chart(SiteID):
    
    base = alt.Chart(df).encode(
        x=alt.X(
            f"{date}:T", 
            axis=alt.Axis(tickCount=7), 
            title="",
            scale=alt.Scale(domain=[start_date_margin, end_date_margin])
        ),
        y=alt.Y(
            f"{new_positive_cases}:Q", 
            axis=alt.Axis(tickCount=5), 
            title=None
        )
    ).properties(width=700, height=500, title="Number of Possitive Cases")
    
    # Color encoding
    if SiteID == "All":
        base = base.encode(color=alt.Color(siteid, title="Site"))
    else:
        base = base.encode(
            color=alt.Color(siteid, scale=alt.Scale(range=["lightgray"]), legend=None)
        )
    
    line = base.mark_line(size=4)
    circle = line.mark_circle(size=80)
    
    # Only when seeing individual data
    hl_line = base.mark_line(size=4).encode(
        color=alt.value("steelblue")
    ).transform_filter(alt.datum.siteid == SiteID)
    hl_circle = hl_line.mark_circle(size=80)
    
    
    text = hl_line.mark_text(align='left', fontWeight=400, dx=10, dy=1).encode(
        text=f"{siteid}:N",
        size=alt.value(20),
    ).transform_window(
        sort=[alt.SortField(date, order="descending")], 
        rank="rank(date)"
    ).transform_filter(alt.datum.rank == 1)
    
    return apply_theme(line + text + hl_line + circle + hl_circle).interactive()

interact(daily_counts_site_chart, SiteID=["All"] + SITE_IDS, )

interactive(children=(Dropdown(description='SiteID', options=('All', 'FZU', 'FZT', 'FZM', 'FXL', 'FWN', 'FVX',…

<function __main__.daily_counts_site_chart(SiteID)>

## Align Date - "Since N Possitive Cases"

In [7]:
# Align data and make new dataframe
df_aligned = add_aligned_time_steps_column(df, new_positive_cases, date, 10)

def daily_counts_site_chart(SiteID):
    
    base = alt.Chart(df_aligned).encode(
        x=alt.X(
            f"timestep", 
            axis=alt.Axis(tickCount=7), 
            title="Number of days since 10th possitive cases",
        ),
        y=alt.Y(
            f"{new_positive_cases}:Q", 
            axis=alt.Axis(tickCount=5), 
            title=None
        )
    ).transform_filter(
        alt.datum.timestep >= 0
    ).properties(width=700, height=500, title="Number of Possitive Cases")
    
    # Color encoding
    if SiteID == "All":
        base = base.encode(color=alt.Color(siteid, title="Site"))
    else:
        base = base.encode(
            color=alt.Color(siteid, scale=alt.Scale(range=["lightgray"]), legend=None)
        )
    
    line = base.mark_line(size=4)
    circle = line.mark_circle(size=80)
    
    # Only when seeing individual data
    hl_line = base.mark_line(size=4).encode(
        color=alt.value("steelblue")
    ).transform_filter(alt.datum.siteid == SiteID)
    hl_circle = hl_line.mark_circle(size=80)
    
    
    text = hl_line.mark_text(align='left', fontWeight=400, dx=10, dy=1).encode(
        text=f"{siteid}:N",
        size=alt.value(20),
    ).transform_window(
        sort=[alt.SortField(date, order="descending")], 
        rank="rank(date)"
    ).transform_filter(alt.datum.rank == 1)
    
    return apply_theme(line + text + hl_line + circle + hl_circle).interactive()

interact(daily_counts_site_chart, SiteID=["All"] + SITE_IDS, )

interactive(children=(Dropdown(description='SiteID', options=('All', 'FZU', 'FZT', 'FZM', 'FXL', 'FWN', 'FVX',…

<function __main__.daily_counts_site_chart(SiteID)>