In [None]:
# Auto-reload frequently changed files
%load_ext autoreload
%autoreload 2
%aimport utils

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
from os.path import join
from web import for_website

from constants import COLUMNS, DATA_AGGREGATE_TYPES
from utils import (
    read_combined_daily_counts_df, read_combined_by_country_daily_counts_df, read_combined_by_site_daily_counts_df,
    apply_theme
)

In [None]:
"""
Common info that should be defined everytime before rendering visualizations
"""
SITES = read_combined_by_site_daily_counts_df()[COLUMNS.SITE_ID].unique()

# Titles
NUM_SITES = len(SITES)
DATA_DATE = "2020-04-07"
VIS_DATE = "2020-04-09"
NUM_PATIENTS = "15,427"
SUBTITLE = f"Data as of {DATA_DATE} | {NUM_SITES} Sites | Plots generated on {VIS_DATE}"

SAVE_DIR = join("..", "output") # Where to save visualization *.PNG files

# Colors
COMBINED = "All countries"
COMBINED_COLOR = "#444444"

COUNTRIES = ["France", "Germany", "Italy", "Singapore", "USA"]
COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#CC79A7", "#D55E00"]
COLOR_BY_COUNTRY = {COUNTRIES[i]: COUNTRY_COLOR[i] for i in range(len(COUNTRIES))} 

COUNTRIES_AND_COMBINED = [COMBINED] + COUNTRIES
COUNTRY_AND_COMBINED_COLOR = [COMBINED_COLOR] + COUNTRY_COLOR
COLOR_BY_COUNTRY_AND_COMBINED = {COUNTRIES_AND_COMBINED[i]: COUNTRY_AND_COMBINED_COLOR[i] for i in range(len(COUNTRIES_AND_COMBINED))} 

# Site-leve colors
SITES = ['APHP', 'FRBDX', 'UKER', 'UKFR', 'ICSM1', 'ICSM20', 'ICSM5', 'POLIMI', 'BCH', 'BIDMC', 'CHOP', 'KUMC', 'MAYOC', 'MGB', 'MUSC', 'UCLA', 'UMICH', 'UPenn', 'UTSW']
SITES_ANONYMOUS = [f"SITE {(i+1):02d}" for i in range(len(SITES))]
SITES_TO_ANONYMOUS = { SITES[i]: SITES_ANONYMOUS[i] for i in range(len(SITES)) } 
SITES_COUNTRY = ["France", "France", "Germany", "Germany", "Italy", "Italy", "Italy", "Italy", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA"]
SITE_COLOR = [COLOR_BY_COUNTRY[SITES_COUNTRY[i]] for i in range(len(SITES))]
COLOR_BY_SITE = { SITES[i]: COLOR_BY_COUNTRY[SITES_COUNTRY[i]] for i in range(len(SITES)) } 

COLOR20 = [
    "#3366cc", "#dc3912", "#ff9900", "#109618", "#990099", "#0099c6", 
    "#dd4477", "#66aa00", "#b82e2e", "#316395", "#994499", "#22aa99", 
    "#aaaa11", "#6633cc", "#e67300", "#8b0707", "#651067", "#329262", "#5574a6", "#3b3eac"
]

# Required Setups
- All combined datasets should be placed in `../data/combined` (e.g., `../data/combined/DailyCounts-Combinedyymmdd.csv` for the DailyCounts file).
- To save PNG files for visualizations, a folder named "output" should be present (i.e., `../output/`).

# Data preprocess

In [None]:
CATEGORY = "category"

def preprocess_daily_df(df_dc):

    # Wide to long
    df_dc = pd.melt(df_dc, id_vars=[
        COLUMNS.SITE_ID, COLUMNS.DATE,
        COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
        COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS,
        COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.UNMASKED_SITES_NEW_DEATHS,
        COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.MASKED_SITES_NEW_DEATHS
    ])
    df_dc = df_dc.rename(columns={"variable": CATEGORY, "value": COLUMNS.NUM_PATIENTS})

    # Leave only the 'upper' and 'under' values for the certain 'category' only
    for c in [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.PATIENTS_IN_ICU, COLUMNS.NEW_DEATHS]:
        filter_c = df_dc[CATEGORY] == c
        df_dc.loc[filter_c, "upper"] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dc.loc[filter_c, "masked_upper_bound_" + c]
        df_dc.loc[filter_c, "under"] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS]
        df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] = df_dc.loc[filter_c, COLUMNS.NUM_PATIENTS] + df_dc.loc[filter_c, "masked_upper_bound_" + c] / 2.0
        
        # Add num of sites
        df_dc.loc[filter_c, COLUMNS.NUM_SITES] = df_dc["unmasked_sites_" + c] + df_dc["masked_sites_" + c]

    # Drop unused columns
    df_dc = df_dc.drop(columns=[
        COLUMNS.MASKED_UPPER_BOUND_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_UPPER_BOUND_PATIENTS_IN_ICU,
        COLUMNS.MASKED_UPPER_BOUND_NEW_DEATHS,
        COLUMNS.UNMASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.UNMASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.UNMASKED_SITES_NEW_DEATHS,
        COLUMNS.MASKED_SITES_NEW_POSITIVE_CASES,
        COLUMNS.MASKED_SITES_PATIENTS_IN_ICU,
        COLUMNS.MASKED_SITES_NEW_DEATHS
    ])
    
    return df_dc

# Read files
df_dc = preprocess_daily_df(read_combined_by_country_daily_counts_df())
df_dc_combined = preprocess_daily_df(read_combined_daily_counts_df())
df_dc_site_level = preprocess_daily_df(read_combined_by_site_daily_counts_df())

# Merge two
df_dc = pd.concat([df_dc, df_dc_combined])

df_dc.loc[df_dc[COLUMNS.SITE_ID] == "Combined", COLUMNS.SITE_ID] = COMBINED

# Anonumous
for site in SITES:
    df_dc_site_level.loc[df_dc_site_level[COLUMNS.SITE_ID] == site, COLUMNS.SITE_ID] = SITES_TO_ANONYMOUS[site]

df_dc

# Visualizations

In [None]:
CATEGORIES = [COLUMNS.NEW_POSITIVE_CASES, COLUMNS.NEW_DEATHS, COLUMNS.PATIENTS_IN_ICU]
TITLE_BY_CATEGORY = {
    COLUMNS.NEW_POSITIVE_CASES: "possitive cases",
    COLUMNS.NEW_DEATHS: "deaths",
    COLUMNS.PATIENTS_IN_ICU: "ICU admissions"
}

def dailycount_by_date(df=df_dc, is_cum=True, is_only_combined=False, is_site_level=False):
    
    # Selections
    nearest = alt.selection(type="single", nearest=True, on="mouseover", fields=[COLUMNS.DATE], empty='none', clear="mouseout")
    dailycount_dropdown = alt.binding_select(options=CATEGORIES)
    dailycount_selection = alt.selection_single(fields=[CATEGORY], bind=dailycount_dropdown, name="Value", init={CATEGORY: COLUMNS.NEW_POSITIVE_CASES})
    legend_selection = alt.selection_multi(fields=[COLUMNS.SITE_ID], bind="legend")

    # Rule
    nearest_rule = alt.Chart(df).mark_rule(color="black").encode(
        x=f"{COLUMNS.DATE}:T",
        size=alt.value(0.5)
    ).transform_filter(
        nearest
    )

    color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)
    if is_only_combined: 
        color_scale = alt.Scale(domain=[COMBINED], range=[COMBINED_COLOR])
    if is_site_level:
        color_scale = alt.Scale(domain=SITES_ANONYMOUS, range=SITE_COLOR, scheme="category20")

    # Filter
    filtered_chart = alt.Chart(df).transform_filter(
        # alt.datum[CATEGORY] == category
        dailycount_selection
    ).transform_filter(
        legend_selection
    )

    if is_only_combined:
        filtered_chart = filtered_chart.transform_filter(
            alt.datum[COLUMNS.SITE_ID] == COMBINED
        )
    else:
       filtered_chart = filtered_chart.transform_filter(
           alt.datum[COLUMNS.SITE_ID] != COMBINED
        )

    DAILY_COUNT_TOOLTIP = [
        alt.Tooltip(COLUMNS.SITE_ID, title="Country"),
        alt.Tooltip(COLUMNS.NUM_SITES, title="# of sites"),
        alt.Tooltip(COLUMNS.NUM_PATIENTS, title="# of patients"),
    ]

    # Calculate cumulative values
    y_field = COLUMNS.NUM_PATIENTS
    upper = "upper"
    under = "under"
    if is_cum:
        filtered_chart = filtered_chart.transform_window(
            cum_num_patients=f"sum({COLUMNS.NUM_PATIENTS})",    # overwrite
            sort=[{"field": COLUMNS.DATE}],
            groupby=[COLUMNS.SITE_ID]
        ).transform_window(
            cum_upper=f"sum(upper)",
            sort=[{"field": COLUMNS.DATE}],
            groupby=[COLUMNS.SITE_ID]
        ).transform_window(
            cum_under=f"sum(under)",
            sort=[{"field": COLUMNS.DATE}],
            groupby=[COLUMNS.SITE_ID]
        )
        upper = "cum_upper"
        under = "cum_under"
        y_field = "cum_num_patients"
        DAILY_COUNT_TOOLTIP += [alt.Tooltip("cum_num_patients:Q", title="Cumulative # of patients")]

    # Render
    line = filtered_chart.mark_line(size=3).encode(
        x=alt.X(
            f"{COLUMNS.DATE}:T", 
            axis=alt.Axis(tickCount=7, grid=True, labels=False, ticks=False, domain=True), 
            title=None
        ),
        y=alt.Y(f"{y_field}:Q", axis=alt.Axis(tickCount=5), title="Number of patients"),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None)),
        tooltip=DAILY_COUNT_TOOLTIP
    )
    circle = line.mark_circle(size=30).encode(
        size=alt.condition(~nearest, alt.value(30), alt.value(60))
    )
    errorband = filtered_chart.mark_errorband().encode(
        x=alt.X(f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), title=None),
        y=alt.Y(f"{upper}:Q", title=""), 
        y2=f"{under}:Q",
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None)),
        tooltip=DAILY_COUNT_TOOLTIP
    )

    top_line = (circle + line + errorband + nearest_rule).resolve_scale(color="shared").properties(width=750, height=400).interactive()

    bottom_bar = filtered_chart.mark_bar(size=5).encode(
        x=alt.X(f"{COLUMNS.DATE}:T", axis=alt.Axis(tickCount=7), title=None),
        y=alt.Y(f"{COLUMNS.NUM_SITES}:Q", title="# of sites"),
        color=alt.Color(f"{COLUMNS.SITE_ID}:N", scale=color_scale, legend=alt.Legend(title=None)),
        tooltip=DAILY_COUNT_TOOLTIP
    ).properties(height=60)
    
    bottom_bar = (bottom_bar + nearest_rule).interactive()

    title = "daily counts"
    # title = TITLE_BY_CATEGORY[category]
    title = f"Cumulative {title}" if is_cum else f"{title}"
    title = f"{title} by site" if is_site_level else f"{title} by country"
    title = title.capitalize()

    # Apply Theme
    result_vis = apply_theme(top_line & bottom_bar).resolve_scale(x="shared").properties(title={
        "text": title, 
        "subtitle": SUBTITLE,
        "subtitleColor": "gray",
        "dx": 60
    }).add_selection(
        legend_selection
    ).add_selection(
        nearest
    ).add_selection(
        dailycount_selection
    )
    
    return result_vis

In [None]:
dailycount = dailycount_by_date(is_cum=True, is_only_combined=True)

for_website(dailycount, "Daily Count", "Daily counts") # TODO: Remove this before deploying notebook
# save(dailycount, join(SAVE_DIR, "dailycount_by_date.png")) # Uncomment this to save *.png files

dailycount

## Daily counts by country

In [None]:
dailycount = dailycount_by_date(is_cum=False)

for_website(dailycount, "Daily Count", "Daily counts by country") # TODO: Remove this before deploying notebook
# save(dailycount, join(SAVE_DIR, "dailycount_by_date.png")) # Uncomment this to save *.png files

dailycount

## Cumulative daily counts by site

In [None]:
dailycount = dailycount_by_date(is_cum=True)

for_website(dailycount, "Daily Count", "Cumulative daily counts by country") # TODO: Remove this before deploying notebook
# save(dailycount, join(SAVE_DIR, "dailycount_by_date.png")) # Uncomment this to save *.png files

dailycount

## Daily counts by site

In [None]:
dailycount = dailycount_by_date(df=df_dc_site_level, is_cum=False, is_site_level=True)

for_website(dailycount, "Daily Count", "Daily counts by site") # TODO: Remove this before deploying notebook
# save(dailycount, join(SAVE_DIR, "dailycount_by_date.png")) # Uncomment this to save *.png files

dailycount

## Cumulative daily counts by site

In [None]:
dailycount = dailycount_by_date(df=df_dc_site_level, is_cum=True, is_site_level=True)

for_website(dailycount, "Daily Count", "Cumulative daily counts by site") # TODO: Remove this before deploying notebook
# save(dailycount, join(SAVE_DIR, "dailycount_by_date.png")) # Uncomment this to save *.png files

dailycount

## Values by the day of the week

In [None]:
import datetime
df_dc["week"] = df_dc["date"].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').isocalendar()[1])

def dailycount_by_day_and_week(country, category):
    # Filter
    filtered_chart = alt.Chart(df_dc).transform_filter(
        alt.datum[CATEGORY] == category
    ).transform_filter(
        alt.datum[COLUMNS.SITE_ID] == country
    )

    # Rendering
    result_vis = filtered_chart.mark_rect().encode(
        y=alt.Y("day(date):O",title="Day of the week"),
        x=alt.X('week:O', title="Week of the year"),
        color=alt.Color('sum(num_patients):Q', title=None, scale=alt.Scale(scheme="lightorange"))
    ).properties(height=220, width=380, title={
        "text": f"New {TITLE_BY_CATEGORY[category]} ({country})",
        "subtitle": SUBTITLE,
        "color": COLOR_BY_COUNTRY_AND_COMBINED[country],
        "subtitleColor": "gray",
        "dx": 60
    })
    return result_vis

is_save = False
for category in CATEGORIES:
    v = alt.vconcat()
    for country in COUNTRIES_AND_COMBINED:
        result_vis = dailycount_by_day_and_week(country=country, category=category)
        v &= result_vis
    
    # Apply Theme
    result_vis = apply_theme(
        v, 
        legend_orient="right", 
        legend_stroke_color="white", 
        legend_padding=0
    ).resolve_scale(color="independent", x="shared")

    # Display and save
    result_vis.display()
    if is_save:
        save(result_vis, join(SAVE_DIR, f"dailycount_by_day_and_week_{category}.png"))