In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils

import altair as alt
import pandas as pd
import requests
import io

from constants import COLUMNS
from utils import apply_theme, get_visualization_subtitle
from web import for_website

# Data Preprocessing

## Participating Sites from Figshare

In [None]:
figshare_url = "https://ndownloader.figshare.com/files/22300929"

s = requests.get(figshare_url).content
sites = pd.read_csv(io.StringIO(s.decode('utf-8')))

sites.head()

In [None]:
# Alternative to data.world_110m.url, we are using the below file to include Singapore
alternative_world_url = "https://raw.githubusercontent.com/danutzthe/topojson_world_map/master/world-110m.json"

countries = alt.topo_feature(alternative_world_url, 'countries')

COUNTRIES = ["France", "Germany", "Italy", "Singapore", "USA"]
COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#CC79A7", "#D55E00"]
color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)

# Get codes for countries
code_df = pd.read_json(
    "https://raw.githubusercontent.com/alisle/world-110m-country-codes/master/world-110m-country-codes.json"
)
singapore_df = pd.DataFrame(data=[{"code": "Singapore", "id": 702, "name": "Singapore"}])
code_df = pd.concat([code_df, singapore_df])

df = pd.DataFrame(data=[
    {"id": 250, "color": "#0072B2", "fill": "lightgray", "opacity": 1, "Country": "France"},
    {"id": 276, "color": "#E69F00", "fill": "lightgray","opacity": 1, "Country": "Germany"},
    {"id": 380, "color": "#009E73", "fill": "lightgray","opacity": 1, "Country": "Italy"},
    {"id": 702, "color": "#CC79A7", "fill": "lightgray","opacity": 1, "Country": "Singapore"},
    {"id": 840, "color": "#D55E00", "fill": "lightgray","opacity": 1, "Country": "USA"},
])
code_df = code_df.set_index("id")
df = df.set_index("id")
df = code_df.join(df).reset_index()

# Set default encodings
# TODO: Add color below if needed
df.loc[df["opacity"] != 1, "opacity"] = 0.2

df = df.set_index("Country")
sites = sites.set_index("Country")
df = df.join(sites).reset_index()

background = alt.Chart(countries).mark_geoshape(
    fill="lightgray",
    stroke='lightgray'
).encode(
    opacity=alt.Opacity("opacity:Q", legend=None)
).transform_lookup(
    lookup="id",
    from_=alt.LookupData(df, "id", ["opacity", "color"])
)

# Get lists of sites
UNIQUE_CITIES = sites["City"].unique().tolist()
for c in UNIQUE_CITIES:
    f = df["City"] == c
    sites_str =  ", ".join(sites[sites["City"] == c]["Healthcare System"].unique().tolist())
    df.loc[f, "Sites"] = sites_str

In [None]:
# city positions on background
locations = alt.Chart(df).transform_aggregate(
    Latitude_Mean='mean(Latitude)',
    Longitude_Mean='mean(Longitude)',
    Hospitals_Total='sum(Hospitals)',
    Sites_Total='count()',
    groupby=['City','Country', 'Sites'] # Sites should contain the list of sites in each city
).transform_filter(
    alt.datum["City"] != None
).mark_circle().encode(
    longitude='Longitude_Mean:Q',
    latitude='Latitude_Mean:Q',
    size=alt.Size(
        'Hospitals_Total:Q', 
        title='Number of Hospitals', 
        scale=alt.Scale(domain=[1, 40], range=[60, 400]),
        legend=None
    ),
    color=alt.Color(
        'Country:N', 
        scale=color_scale, 
        legend=None
    ),
    tooltip=[
        alt.Tooltip('Country:N'),
        alt.Tooltip('City:N'),
        alt.Tooltip('Sites:N', title="Participating sites"),
        alt.Tooltip('Sites_Total:Q', title="Number of sites"), 
        alt.Tooltip('Hospitals_Total:Q', title="Number of hospitals"),
    ]
)

labels = locations.mark_text(
    align='center',
    baseline='top',
    dy=7
).encode(
    longitude='Longitude_Mean:Q',
    latitude='Latitude_Mean:Q',
    text='City',
    size=alt.value(8),
    color=alt.value('black')
)

In [None]:
usa = ( background + locations + labels ).project(
    type= 'mercator',
    scale= 280,
    center= [-126, 59],
    translate = [0, 0]
).properties(
    title={
        "text": 'Sites in North America',
        "subtitle": get_visualization_subtitle(12),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

usa

In [None]:
# Custom dx and dy
# TODO: Ways to clean the below codes?
city_offsets = {
    "Paris": {"dx": 0, "dy": 16},
    "Lumezzane/Brescia": {"dx": 45, "dy": 0},
    "Milan": {"dx": -20, "dy": 0},
    "Bergamo": {"dx": 0, "dy": -10},
    "Erlangen": {"dx": 8, "dy": 10},
}
cities = pd.read_csv('../data/sites.csv', header=[0])["City"].unique().tolist()
for c in list(city_offsets.keys()):
    cities.remove(c)
labels_with_offset = (
    labels.transform_filter(
        (alt.datum["City"] != "Paris") & 
        (alt.datum["City"] != "Lumezzane/Brescia") &
        (alt.datum["City"] != "Milan") & 
        (alt.datum["City"] != "Bergamo") & 
        (alt.datum["City"] != "Erlangen")
    )
    + 
    labels.transform_filter(
        alt.datum["City"] == "Paris"
    ).mark_text(dy=city_offsets["Paris"]["dy"])
    +
    labels.transform_filter(
        alt.datum["City"] == "Lumezzane/Brescia"
    ).mark_text(dx=city_offsets["Lumezzane/Brescia"]["dx"], dy=city_offsets["Lumezzane/Brescia"]["dy"])
    +
    labels.transform_filter(
        alt.datum["City"] == "Milan"
    ).mark_text(dx=city_offsets["Milan"]["dx"], dy=city_offsets["Milan"]["dy"])
    +
    labels.transform_filter(
        alt.datum["City"] == "Bergamo"
    ).mark_text(dx=city_offsets["Bergamo"]["dx"], dy=city_offsets["Bergamo"]["dy"])
    +
    labels.transform_filter(
        alt.datum["City"] == "Erlangen"
    ).mark_text(dx=city_offsets["Erlangen"]["dx"], dy=city_offsets["Erlangen"]["dy"])
)

eu = ( background + locations + labels_with_offset ).project(
    type='mercator',
    scale=820,
    center=[-3, 53],
    translate=[0, 0],
).properties(
    title={
        "text": 'Sites in Europe',
        "subtitle": get_visualization_subtitle(6),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

eu

In [None]:
asia = ( background + locations + labels ).project(
    type= 'mercator',
    scale= 900,
    center= [94,10],
    translate=[0, 0]
).properties(
    title={
        "text": 'Sites in Asia',
        "subtitle": get_visualization_subtitle(1),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

asia

In [None]:
h = apply_theme(alt.hconcat(usa, eu, asia, spacing=10)).properties(
    background="transparent"
)

for_website(h, "Map", "Sites by continent horizontal")

h

In [None]:
v = apply_theme(alt.vconcat(usa, eu, asia, spacing=10)).properties(
    background="transparent"
)

for_website(v, "Map", "Sites by continent vertical")

v