In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils

import altair as alt
import pandas as pd
import numpy as np
from utils import read_combined_by_site_demographics_df
from constants import COLUMNS
from vega_datasets import data
from utils import apply_theme
from web import for_website

In [None]:
# Titles
NUM_SITES = len(read_combined_by_site_demographics_df()[COLUMNS.SITE_ID].unique())
DATA_DATE = "2020-04-11"
SUBTITLE = f"Data as of {DATA_DATE} | {NUM_SITES} Sites"

In [None]:
sites = pd.read_csv('../data/sites.csv', header=[0])
#cols = sites.columns.tolist()
#normed = pd.melt(flu, id_vars=[cols[0]], value_vars=cols[1:], var_name=['continent','country'])
#normed = normed.rename(columns={normed.columns[0]: 'week'})
sites.head()

In [None]:
countries = alt.topo_feature(data.world_110m.url, 'countries')

COUNTRIES = ["France", "Germany", "Italy", "Singapore", "USA"]
COUNTRY_ID = [250, 276, 380, 702, 840]
COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#CC79A7", "#D55E00"]
COLOR_BY_COUNTRY = {COUNTRIES[i]: COUNTRY_COLOR[i] for i in range(len(COUNTRIES))} 
COLOR_BY_ID = {COUNTRY_ID[i]: COUNTRY_COLOR[i] for i in range(len(COUNTRY_ID))} 
color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)
color_id_scale = alt.Scale(domain=COUNTRY_ID, range=COUNTRY_COLOR)


background = alt.Chart(countries).mark_geoshape(
    fill='lightgray',
    stroke='white'
).encode(
    color=alt.Color('id:N', scale=color_id_scale)
)

# city positions on background
locations = alt.Chart(sites).transform_aggregate(
    Latitude_Mean='mean(Latitude)',
    Longitude_Mean='mean(Longitude)',
    Hospitals_Total='sum(Hospitals)',
    Sites_Total='count()',
    groupby=['City','Country']
).mark_circle().encode(
    longitude='Longitude_Mean:Q',
    latitude='Latitude_Mean:Q',
    size=alt.Size( 'Hospitals_Total:Q', title='Number of Hospitals'),
    #size=alt.value(75),
    color=alt.Color('Country:N', scale=color_scale),
    tooltip=['City:N','Sites_Total:Q', 'Hospitals_Total:Q']
)

labels = locations.mark_text(
    align='center',
    baseline='top',
    dy=7
).encode(
    text='City',
    size=alt.value(10),
    color=alt.value('black')
)


north_america_map = ( background + locations + labels ).project(
    type= 'mercator',
    scale= 380,                          # Magnify
    center= [-95,40],                     # [lon, lat]
    clipExtent= [[0, 0], [400, 300]],    # [[left, top], [right, bottom]]
).properties(
    title='Sites in USA',
    width=400, height=300,
)

north_america_map

In [None]:
europe_map = ( background + locations + labels ).project(
    type= 'mercator',
    scale=800,                          # Magnify
    center= [7,47],                     # [lon, lat]
    clipExtent= [[0, 0], [400, 300]],    # [[left, top], [right, bottom]]
).properties(
    title='Sites in Europe',
    width=400, height=300
)

europe_map

In [None]:
asia_map = ( background + locations + labels ).project(
    type= 'mercator',
    scale= 400,                          # Magnify
    center= [100,0],                     # [lon, lat]
    clipExtent= [[0, 0], [400, 300]],    # [[left, top], [right, bottom]]
).properties(
    title='Sites in Asia',
    width=400, height=300
)

asia_map

In [None]:
labs = pd.read_csv('../data/Lab_VariationByCountry.csv', header=[0])
#cols = sites.columns.tolist()
#normed = pd.melt(flu, id_vars=[cols[0]], value_vars=cols[1:], var_name=['continent','country'])
#normed = normed.rename(columns={normed.columns[0]: 'week'})

labs = labs.replace('SITE_AVE', 'Within site variation')
labs = labs.replace('ALL', 'All countries')
labs = labs.replace('US', 'USA')


consistent_loinc = {
    "alanine aminotransferase (ALT)": "Alanine aminotransferase (U/L)",
    "albumin": "Albumin (g/dL)",
    "aspartate aminotransferase (AST)": "Aspartate aminotransferase (U/L)",
    "total bilirubin": "Total bilirubin (mg/dL)",
    "C-reactive protein (CRP)": "C-reactive protein (mg/dL)",
    "creatinine": "Creatinine (mg/dL)",
    "lactate dehydrogenase (LDH)": "Lactate dehydrogenase (U/L)",
    "cardiac troponin": "Cardiac troponin (ng/mL)",
    "prothrombin time (PT)": "Prothrombin time (s)",
    "white blood cell count (Leukocytes)": "White blood cell count (10*3/uL)",
    "lymphocyte count": "Lymphocyte count (10*3/uL)",
    "neutrophil count": "Neutrophil count (10*3/uL)",
    "D-dimer": "D-dimer",
    "procalcitonin": "Procalcitonin (ng/mL)",
}
labs["Lab"] = labs["Lab"].apply(lambda x: consistent_loinc[x])

labs.head()

In [None]:
input_dropdown = alt.binding_select(options=np.unique(labs.Lab))
selection = alt.selection_single(fields=['Lab'], bind=input_dropdown, name='Lab ', init={'Lab': labs.Lab[0]})
legend_selection = alt.selection_multi(fields=["Country"], bind="legend")

COUNTRIES = ["All countries", "France", "Germany", "Italy", "USA", "Within site variation"] # Removed "Singapore"
COUNTRY_COLOR = ["#444444", "#0072B2", "#E69F00", "#009E73", "#D55E00", "#444444"] # Removed "#CC79A7"
color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)
tick_size = 40

base = alt.Chart(labs).mark_bar(size=tick_size).encode(
    x=alt.Y('Country:N'),
    color=alt.Color('Country:N', scale=color_scale, title=None),
    stroke=alt.Color('Country:N', scale=color_scale),
    strokeWidth=alt.value(1),
    #opacity=alt.value(0.5),
    y=alt.Y('y:Q', title='Mean (SD)'),
    y2=alt.Y2('y2:Q'),
    tooltip=[
        alt.Tooltip("Country", title="Category"),
        alt.Tooltip("mean_val", title="Mean", format=".2f"),
        alt.Tooltip("stdev_val", title="Standard deviation", format=".2f"),
        alt.Tooltip("days_since_positive", title="Days since positive")
    ]
)

mean = alt.Chart(labs).mark_tick(size=tick_size, thickness=2).encode(
    x=alt.X('Country:N', title=None),
    opacity=alt.value(1),
    #color=alt.Color('Country:N', scale=color_scale, title=None),
    color=alt.value('white'),
    y=alt.Y('mean_val:Q')
)

plot = alt.layer(
    base, mean
).add_selection(
    selection
).add_selection(
    legend_selection
).transform_filter(
    selection
).transform_filter(
    legend_selection
).transform_filter(
    alt.FieldOneOfPredicate(field='Country', oneOf=COUNTRIES)
).transform_filter(
    alt.datum["days_since_positive"] == 0
).transform_calculate(
    y='datum.mean_val + datum.stdev_val',
    y2='datum.mean_val - datum.stdev_val',
).properties(
    title={
        "text": ["Lab Variation Across Sites on Day 0"], 
        "dx": 50,
        "subtitle": SUBTITLE,
        "subtitleColor": "gray",
        "anchor": "start",
    },
    width=350, height=400
)

plot = apply_theme(plot, legend_orient="right")

for_website(plot, "Labs", "Lab variation across sites on Day 0") # To use in website

plot