In [111]:
import altair as alt
import numpy as np
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

alt.data_transformers.disable_max_rows()

def days_between(d1, d2):
    if (isinstance(d1, str) and isinstance(d2, str)):
        d1 = datetime.strptime(d1, "%m-%d-%Y")
        d2 = datetime.strptime(d2, "%m-%d-%Y")
        return int((d2 - d1).days)
    return None;

In [112]:
total_cases_df       = pd.read_csv("jhu-data.csv",index_col=0)
total_cases_df       = total_cases_df.reset_index()
empty_rows           = total_cases_df[(total_cases_df["Confirmed"]==0)&(total_cases_df["Recovered"]==0)&(total_cases_df["Active"]==0)&(total_cases_df["Deaths"]==0)].index
total_cases_df       = total_cases_df.drop(empty_rows, axis=0)


# Tabulate the top K countries with the highest total number of Confirmed cases
K              = 20  
topKcountries  = list(total_cases_df.groupby("Country_Region").sum()["Confirmed"].nlargest(20).index)
cases_df       = total_cases_df[total_cases_df["Country_Region"].isin(topKcountries)]

# Compute days since N = 50 confirmed cases; drop those with no N confirmed cases
N              = 50
days_since_N                   = cases_df[cases_df["Confirmed"]>N].groupby("Country_Region")["Date"].min().to_dict()
cases_df['Date N Confirmed']   = cases_df.apply  (lambda x: days_since_N.get(x['Country_Region']),axis=1)
cases_df                       = cases_df.dropna (subset=['Date N Confirmed'])
cases_df['Days_from_N']        = cases_df.apply  (lambda x: days_between(x["Date N Confirmed"], x["Date"]), axis = 1)


# Adding in information about lockdowns & possibly drop rows without lockdowns
quarantine_df   = pd.read_csv("quarantine-activity.csv",index_col=0).set_index('Country_Region')
full_lockdown   = quarantine_df[quarantine_df['Lockdown Type'] == 'Full'].groupby("Country_Region")['Date Enacted'].min().to_dict()

# Add date of full lockdown
cases_df['Date of Full Lockdown']     = cases_df.apply  (lambda x: full_lockdown.get(x['Country_Region']),axis=1)
cases_df['lockdown_day_from_N']       = cases_df.apply  (lambda x: days_between(x["Date N Confirmed"], x["Date of Full Lockdown"]), axis = 1)

# Drop rows for those without a full lockdown 
#cases_df                              = cases_df.dropna (subset=['Date of Full Lockdown'])

# The overall US trends are the ones with Province_State is NaN
overall_us_df = cases_df[(cases_df["Country_Region"]=="United States")&(cases_df["Province_State"].isna())]
cases_df = cases_df[cases_df["Country_Region"]!="United States"] #drop everything from the US
cases_df = pd.concat([overall_us_df,cases_df]) #combine the US overall df with the df without the US 
cases_df = cases_df.drop(columns="Province_State")


In [113]:
track_lockdown_value = {}
track_y_intercept = {}
track_lockdown_x = {}
def check_if_lockdown(c, lockdown, current, conf):
    if (lockdown >= current and current >= 0):
        # lockdown is in the future
        if (c in track_lockdown_x): 
            if (track_lockdown_x.get(c) <= current):
                # updating last known value prior to lockdown
                track_lockdown_value[c] = conf
                track_lockdown_x[c] = current
        else:
            # same logic: updating last known value prior to lockdown 
            track_lockdown_value[c] = conf
            track_lockdown_x[c] = current
        if (current == 0):
            # track y intercept
            track_y_intercept[c] = conf
    return True;
# Compute lockdown value, lockdown x, and y intercept
cases_df.apply  (lambda x: check_if_lockdown (x['Country_Region'], x['lockdown_day_from_N'], x['Days_from_N'], x['Confirmed']), axis=1)
cases_df['Intercept']                 = cases_df.apply  (lambda x: track_y_intercept.get(x['Country_Region']),axis=1)
cases_df['Lockdown_x']                = cases_df.apply  (lambda x: track_lockdown_x.get(x['Country_Region']),axis=1)
cases_df['Lockdown_value']            = cases_df.apply  (lambda x: track_lockdown_value.get(x['Country_Region']),axis=1)
# Intercept * a^Lockdown_x == Lockdown_value ==> log(a) = log(Lockdown_value/Intercept)/Lockdown_x
cases_df['Lockdown_slope'] = np.exp(np.log(cases_df.Lockdown_value / cases_df.Intercept) / cases_df.Lockdown_x)

In [117]:
# Growth rate of cumulative cases over time, averaged over the previous week
def computeAvrgDailyChange(x,window=7):
    pctChange = x.diff()/x*100
    return pctChange.rolling(window=window).mean()

cases_df["Average Daily Change in Total Confirmed Cases"] = cases_df.groupby("Country_Region")["Confirmed"].transform(computeAvrgDailyChange)

In [118]:
# Countries: Logarithmic Axis (Stephen's prettier version)
chart_df = cases_df.loc[(cases_df.Days_from_N >= 0) * (cases_df.Days_from_N <= 32)]
for country in full_lockdown:
    if country not in days_since_N:
        continue
    val_to_insert = days_between(days_since_N[country], full_lockdown[country])
    # insert some dummy rows w/ Days_from_N == lockdown_day_from_N to get tooltip_rules w/ mouseover to work properly
    new_row = pd.Series({'Country_Region': country,
                         'lockdown_day_from_N': val_to_insert,
                         'Days_from_N': val_to_insert,
                        })
    chart_df = chart_df.append(new_row, ignore_index=True)

chart_df['x'] = chart_df.Days_from_N
chart_df['y'] = chart_df.Confirmed

nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['x'], empty='none')

legend_selection = alt.selection_multi(fields=['Country_Region'], bind='legend')

brush = alt.selection_interval(name="brush", encodings=['x'])




shared_encodings = dict(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
)

chart = alt.Chart(chart_df, width=700, height=500)

def _add_model_transformation_fields(base):
    return base.transform_calculate(
        model_y='datum.Lockdown_value * pow(datum.Lockdown_slope, datum.x - datum.Lockdown_x)'
    ).transform_filter(
        'datum.x >= datum.Lockdown_x'
    ).transform_filter(
        'datum.Confirmed !== null'
    ).transform_filter(
        'datum.model_y <= 100000'
    ).transform_filter (
        'datum.Lockdown_x !== null'
    )

lines = chart.mark_line(size=3).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
    opacity=alt.condition(legend_selection, alt.value(3), alt.value(0.1)),
).transform_filter(
    'datum.Confirmed !== null'
).add_selection(legend_selection).add_selection(brush)

model_lines = _add_model_transformation_fields(
    chart.mark_line(size=4, strokeDash=[1,1]).encode(
        x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
        y=alt.Y("model_y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
        color=alt.Color("Country_Region"),
        opacity=alt.condition(legend_selection, alt.value(3), alt.value(0)),
    )
)


points = chart.mark_point(size=90, filled=True).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
    opacity=alt.condition(legend_selection, alt.value(.4), alt.value(0)),
).transform_filter(
    'datum.Confirmed !== null'
)

# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = chart.mark_point().encode(
    x='x:Q',
    opacity=alt.value(0),
).add_selection(nearest)

# Draw points on the line, and highlight based on selection
tooltip_points = points.mark_point(filled=True).encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
tooltip_text = points.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'tooltip_text:N', alt.value(' ')),
    opacity=alt.condition(legend_selection, alt.value(3), alt.value(0.1)),
).transform_calculate(
    tooltip_text='datum.Country_Region + ": " + datum.y'
)
# Draw a rule at the location of the selection
tooltip_rules = chart.mark_rule(color='gray').encode(
    x='x:Q',
).transform_filter(
    nearest
)

brush_rule = chart.mark_rule(color='red').encode(
    x='brush_mid:Q'
).transform_filter(
    'isDefined(brush.x)'
)

brush_text = brush_rule.mark_text(align='center', dx=5, dy=-255).encode(
    text=alt.value('Line of no intervention'),
    color=alt.value('red')
).transform_filter(
    'isDefined(brush.x)'
)

lockdown_rules = chart.mark_rule(size = 3, strokeDash=[7,3]).encode(
        x='x:Q',
        color=alt.Color("Country_Region"),
        opacity=alt.condition(legend_selection, alt.value(1), alt.value(0)),
).transform_filter(
    'datum.x == datum.lockdown_day_from_N'
)


#new lockdown text

# lockdown_text = chart.mark_text().encode(
#         x='x:Q',
#         color=alt.Color("Country_Region"),
#         text="HELLO"
#    #     opacity=alt.condition(legend_selection, alt.value(1), alt.value(0)),
# ).transform_filter(
#     'datum.x == datum.lockdown_day_from_N'
# )


lockdown_tooltip=lockdown_rules.mark_text(align='left', dx=5, dy=-220).encode(
    text=alt.condition(nearest, 'lockdown_tooltip_text:N', alt.value(' '))
).transform_calculate(
    lockdown_tooltip_text='datum.Country_Region + " locked down"'
)
alt.layer(
    lines,
    model_lines,
    selectors,
#    tooltip_points,
#    tooltip_text,
#     tooltip_rules,
#    brush_rule,
#    brush_text,
    lockdown_rules,
    lockdown_tooltip,
    points
).transform_calculate(
    brush_mid='isDefined(brush.x) ? 0.5 * (brush.x[0] + brush.x[1]) : 100'
)

In [116]:
# Countries: Rate of Change (Stephen's prettier version)
chart_df = cases_df.loc[(cases_df.Days_from_N >= 0) * (cases_df.Days_from_N <= 32)]
for country in full_lockdown:
    if country not in days_since_N:
        continue
    val_to_insert = days_between(days_since_N[country], full_lockdown[country])
    # insert some dummy rows w/ Days_from_N == lockdown_day_from_N to get tooltip_rules w/ mouseover to work properly
    new_row = pd.Series({'Country_Region': country,
                         'lockdown_day_from_N': val_to_insert,
                         'Days_from_N': val_to_insert,
                        })
    chart_df = chart_df.append(new_row, ignore_index=True)

chart_df['x'] = chart_df.Days_from_N
chart_df['y'] = chart_df["Average Daily Change in Total Confirmed Cases"]

nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['x'], empty='none')

legend_selection = alt.selection_multi(fields=['Country_Region'], bind='legend')

brush = alt.selection_interval(name="brush", encodings=['x'])

shared_encodings = dict(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
)

chart = alt.Chart(chart_df, width=700, height=500)

def _add_model_transformation_fields(base):
    return base.transform_calculate(
        model_y='datum.Lockdown_value * pow(datum.Lockdown_slope, datum.x - datum.Lockdown_x)'
    ).transform_filter(
        'datum.x >= datum.Lockdown_x'
    ).transform_filter(
        'datum.Confirmed !== null'
    ).transform_filter(
        'datum.model_y <= 100000'
    )

lines = chart.mark_line(size=3).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("y:Q", title="Average Daily Change in Total Confirmed Cases (%)"),#, scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
    opacity=alt.condition(legend_selection, alt.value(3), alt.value(0.1)),
).transform_filter(
    'datum.Confirmed !== null'
).add_selection(legend_selection).add_selection(brush)

model_lines = _add_model_transformation_fields(
    chart.mark_line(size=3, strokeDash=[1,1]).encode(
        x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
        y=alt.Y("model_y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
        color=alt.Color("Country_Region"),
        opacity=alt.condition(legend_selection, alt.value(5), alt.value(0)),
    )
)


points = chart.mark_point(size=90, filled=True).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("y:Q", title="Average Daily Change in Total Confirmed Cases (%)"),#, scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
    opacity=alt.condition(legend_selection, alt.value(.4), alt.value(0)),
).transform_filter(
    'datum.Confirmed !== null'
)

# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = chart.mark_point().encode(
    x='x:Q',
    opacity=alt.value(0),
).add_selection(nearest)

# Draw points on the line, and highlight based on selection
tooltip_points = points.mark_point(filled=True).encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
tooltip_text = points.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'tooltip_text:N', alt.value(' ')),
    opacity=alt.condition(legend_selection, alt.value(3), alt.value(0.1)),
).transform_calculate(
    tooltip_text='datum.Country_Region + ": " + datum.y'
)
# Draw a rule at the location of the selection
tooltip_rules = chart.mark_rule(color='gray').encode(
    x='x:Q',
).transform_filter(
    nearest
)

brush_rule = chart.mark_rule(color='red').encode(
    x='brush_mid:Q'
).transform_filter(
    'isDefined(brush.x)'
)

brush_text = brush_rule.mark_text(align='center', dx=5, dy=-255).encode(
    text=alt.value('Line of no intervention'),
    color=alt.value('red')
).transform_filter(
    'isDefined(brush.x)'
)

lockdown_rules = chart.mark_rule(size = 3, strokeDash=[7,3]).encode(
        x='x:Q',
        color=alt.Color("Country_Region"),
        opacity=alt.condition(legend_selection, alt.value(1), alt.value(0)),
).transform_filter(
    'datum.x == datum.lockdown_day_from_N'
)

lockdown_tooltip=lockdown_rules.mark_text(align='left', dx=5, dy=-220).encode(
    text=alt.condition(nearest, 'lockdown_tooltip_text:N', alt.value(' '))
).transform_calculate(
    lockdown_tooltip_text='datum.Country_Region + " locked down"'
)
alt.layer(
    lines,
#     model_lines,
    selectors,
    tooltip_points,
    tooltip_text,
#     tooltip_rules,
    brush_rule,
    brush_text,
    lockdown_rules,
    lockdown_tooltip,
    points
).transform_calculate(
    brush_mid='isDefined(brush.x) ? 0.5 * (brush.x[0] + brush.x[1]) : 100'
)

# Trying to look at USA States

In [119]:
cases_usa      = total_cases_df.dropna (subset=['Province_State'])

# Tabulate the top L states with the highest total number of Confirmed cases
L              = 30  
topLstates     = list(cases_usa.groupby("Province_State").sum()["Confirmed"].nlargest(L).index)
cases_usa      = cases_usa[cases_usa["Province_State"].isin(topLstates)]


# Compute days since M = 20 confirmed cases; drop those with no M confirmed cases
M              = 20
days_since_M                   = cases_usa[cases_usa["Confirmed"]>M].groupby("Province_State")["Date"].min().to_dict()
cases_usa['Date M Confirmed']  = cases_usa.apply  (lambda x: days_since_M.get(x['Province_State']),axis=1)
cases_usa                      = cases_usa.dropna (subset=['Date M Confirmed'])
cases_usa['Days_from_M']       = cases_usa.apply  (lambda x: days_between(x["Date M Confirmed"], x["Date"]), axis = 1)

cases_usa["Average Daily Change in Total Confirmed Cases"] = cases_usa.groupby("Province_State")["Confirmed"].transform(lambda x: computeAvrgDailyChange(x,window=5))


quarantine_usa      = pd.read_csv("quarantine-activity-US.csv",index_col=0).set_index('Province_State')
full_lockdown_usa   = quarantine_usa[quarantine_usa['Lockdown Type'] == 'Stay at home'].groupby("Province_State")['Date Enacted'].min().to_dict()

# Add date of full lockdown
cases_usa['Date of Full Lockdown']     = cases_usa.apply  (lambda x: full_lockdown_usa.get(x['Province_State']),axis=1)
cases_usa['lockdown_day_from_M']       = cases_usa.apply  (lambda x: days_between(x["Date M Confirmed"], x["Date of Full Lockdown"]), axis = 1)

# Drop rows for those without a full lockdown 
#cases_usa                              = cases_usa.dropna (subset=['Date of Full Lockdown'])

cases_usa



Unnamed: 0,Country_Region,Province_State,Confirmed,Recovered,Active,Deaths,Date,Date M Confirmed,Days_from_M,Average Daily Change in Total Confirmed Cases,Date of Full Lockdown,lockdown_day_from_M
0,United States,Washington,1,0,1,0,01-22-2020,03-03-2020,-41,,03-23-2020,20.0
9,United States,Washington,1,0,1,0,01-23-2020,03-03-2020,-40,,03-23-2020,20.0
27,United States,Washington,1,0,1,0,01-24-2020,03-03-2020,-39,,03-23-2020,20.0
39,United States,Illinois,1,0,1,0,01-25-2020,03-11-2020,-46,,03-21-2020,10.0
40,United States,Washington,1,0,1,0,01-25-2020,03-03-2020,-38,,03-23-2020,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...
6406,United States,Texas,2792,0,0,37,03-29-2020,03-11-2020,18,19.228383,,
6407,United States,Utah,720,0,0,2,03-29-2020,03-15-2020,14,16.115925,,
6410,United States,Virginia,890,0,0,20,03-29-2020,03-13-2020,16,19.817492,,
6411,United States,Washington,4465,0,0,198,03-29-2020,03-03-2020,26,12.117669,03-23-2020,20.0


In [120]:
# States: Logarithmic Axis (Stephen's prettier version)


chart_df = cases_usa.loc[(cases_usa.Days_from_M >= 0) * (cases_usa.Days_from_M <= 32)]
# for country in full_lockdown:
#     if country not in days_since_N:
#         continue
#     val_to_insert = days_between(days_since_N[country], full_lockdown[country])
#     # insert some dummy rows w/ Days_from_N == lockdown_day_from_N to get tooltip_rules w/ mouseover to work properly
#     new_row = pd.Series({'Country_Region': country,
#                          'lockdown_day_from_N': val_to_insert,
#                          'Days_from_N': val_to_insert,
#                         })
#     chart_df = chart_df.append(new_row, ignore_index=True)

chart_df['x'] = chart_df.Days_from_M
chart_df['y'] = chart_df.Confirmed

nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['x'], empty='none')

legend_selection = alt.selection_multi(fields=['Province_State'], bind='legend')

brush = alt.selection_interval(name="brush", encodings=['x'])




shared_encodings = dict(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 30 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(10,100000))),
    color=alt.Color("Province_State"),
)

chart = alt.Chart(chart_df, width=700, height=500)


lines = chart.mark_line(size=3).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 30 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(10,100000))),
    color=alt.Color("Province_State"),
    opacity=alt.condition(legend_selection, alt.value(3), alt.value(0.1)),
).transform_filter(
    'datum.Confirmed !== null'
).add_selection(legend_selection).add_selection(brush)

# model_lines = _add_model_transformation_fields(
#     chart.mark_line(size=4, strokeDash=[1,1]).encode(
#         x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
#         y=alt.Y("model_y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
#         color=alt.Color("Country_Region"),
#         opacity=alt.condition(legend_selection, alt.value(3), alt.value(0)),
#     )
# )


points = chart.mark_point(size=90, filled=True).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 30 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(10,100000))),
    color=alt.Color("Province_State"),
    opacity=alt.condition(legend_selection, alt.value(.4), alt.value(0)),
).transform_filter(
    'datum.Confirmed !== null'
)

# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = chart.mark_point().encode(
    x='x:Q',
    opacity=alt.value(0),
).add_selection(nearest)

# Draw points on the line, and highlight based on selection
tooltip_points = points.mark_point(filled=True).encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
tooltip_text = points.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'tooltip_text:N', alt.value(' ')),
    opacity=alt.condition(legend_selection, alt.value(3), alt.value(0.1)),
).transform_calculate(
    tooltip_text='datum.Province_State + ": " + datum.y'
)
# Draw a rule at the location of the selection
tooltip_rules = chart.mark_rule(color='gray').encode(
    x='x:Q',
).transform_filter(
    nearest
)

brush_rule = chart.mark_rule(color='red').encode(
    x='brush_mid:Q'
).transform_filter(
    'isDefined(brush.x)'
)

brush_text = brush_rule.mark_text(align='center', dx=5, dy=-255).encode(
    text=alt.value('Line of no intervention'),
    color=alt.value('red')
).transform_filter(
    'isDefined(brush.x)'
)

lockdown_rules = chart.mark_rule(size = 3, strokeDash=[7,3]).encode(
        x='x:Q',
        color=alt.Color("Province_State"),
        opacity=alt.condition(legend_selection, alt.value(1), alt.value(0)),
).transform_filter(
    'datum.x == datum.lockdown_day_from_M'
)


#new lockdown text

# lockdown_text = chart.mark_text().encode(
#         x='x:Q',
#         color=alt.Color("Country_Region"),
#         text="HELLO"
#    #     opacity=alt.condition(legend_selection, alt.value(1), alt.value(0)),
# ).transform_filter(
#     'datum.x == datum.lockdown_day_from_N'
# )


lockdown_tooltip=lockdown_rules.mark_text(align='left', dx=5, dy=-220).encode(
    text=alt.condition(nearest, 'lockdown_tooltip_text:N', alt.value(' '))
).transform_calculate(
    lockdown_tooltip_text='datum.Province_State + " locked down"'
)
alt.layer(
    lines,
#    model_lines,
    selectors,
#    tooltip_points,
#    tooltip_text,
#     tooltip_rules,
#    brush_rule,
#    brush_text,
    lockdown_rules,
    lockdown_tooltip,
    points
).transform_calculate(
    brush_mid='isDefined(brush.x) ? 0.5 * (brush.x[0] + brush.x[1]) : 100'
)

In [121]:
# Countries: Rate of Change (Stephen's prettier version)
chart_usa = cases_usa.loc[(cases_usa.Days_from_M >= 0) * (cases_usa.Days_from_M <= 32)]
for state in full_lockdown_usa:
    if state not in days_since_M:
        continue
    val_to_insert = days_between(days_since_M[state], full_lockdown_usa[state])
    # insert some dummy rows w/ Days_from_N == lockdown_day_from_N to get tooltip_rules w/ mouseover to work properly
    new_row = pd.Series({'Province_State': state,
                         'lockdown_day_from_M': val_to_insert,
                         'Days_from_M': val_to_insert,
                        })
    chart_df = chart_df.append(new_row, ignore_index=True)

chart_df['x'] = chart_df.Days_from_M
chart_df['y'] = chart_df["Average Daily Change in Total Confirmed Cases"]

nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['x'], empty='none')

legend_selection = alt.selection_multi(fields=['Province_State'], bind='legend')

brush = alt.selection_interval(name="brush", encodings=['x'])

shared_encodings = dict(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 30 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(20,100000))),
    color=alt.Color("Province_State"),
)

chart = alt.Chart(chart_df, width=700, height=500)

def _add_model_transformation_fields(base):
    return base.transform_calculate(
        model_y='datum.Lockdown_value * pow(datum.Lockdown_slope, datum.x - datum.Lockdown_x)'
    ).transform_filter(
        'datum.x >= datum.Lockdown_x'
    ).transform_filter(
        'datum.Confirmed !== null'
    ).transform_filter(
        'datum.model_y <= 100000'
    )

lines = chart.mark_line(size=3).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 30 Confirmed"),
    y=alt.Y("y:Q", title="Average Daily Change in Total Confirmed Cases (%)"),#, scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Province_State"),
    opacity=alt.condition(legend_selection, alt.value(3), alt.value(0.1)),
).transform_filter(
    'datum.Confirmed !== null'
).add_selection(legend_selection).add_selection(brush)

model_lines = _add_model_transformation_fields(
    chart.mark_line(size=3, strokeDash=[1,1]).encode(
        x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 30 Confirmed"),
        y=alt.Y("model_y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
        color=alt.Color("Province_State"),
        opacity=alt.condition(legend_selection, alt.value(5), alt.value(0)),
    )
)


points = chart.mark_point(size=90, filled=True).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 30 Confirmed"),
    y=alt.Y("y:Q", title="Average Daily Change in Total Confirmed Cases (%)"),#, scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Province_State"),
    opacity=alt.condition(legend_selection, alt.value(.4), alt.value(0)),
).transform_filter(
    'datum.Confirmed !== null'
)

# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = chart.mark_point().encode(
    x='x:Q',
    opacity=alt.value(0),
).add_selection(nearest)

# Draw points on the line, and highlight based on selection
tooltip_points = points.mark_point(filled=True).encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
tooltip_text = points.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'tooltip_text:N', alt.value(' ')),
    opacity=alt.condition(legend_selection, alt.value(3), alt.value(0.1)),
).transform_calculate(
    tooltip_text='datum.Province_State + ": " + datum.y'
)
# Draw a rule at the location of the selection
tooltip_rules = chart.mark_rule(color='gray').encode(
    x='x:Q',
).transform_filter(
    nearest
)

brush_rule = chart.mark_rule(color='red').encode(
    x='brush_mid:Q'
).transform_filter(
    'isDefined(brush.x)'
)

brush_text = brush_rule.mark_text(align='center', dx=5, dy=-255).encode(
    text=alt.value('Line of no intervention'),
    color=alt.value('red')
).transform_filter(
    'isDefined(brush.x)'
)

lockdown_rules = chart.mark_rule(size = 3, strokeDash=[7,3]).encode(
        x='x:Q',
        color=alt.Color("Province_State"),
        opacity=alt.condition(legend_selection, alt.value(1), alt.value(0)),
).transform_filter(
    'datum.x == datum.lockdown_day_from_M'
)

lockdown_tooltip=lockdown_rules.mark_text(align='left', dx=5, dy=-220).encode(
    text=alt.condition(nearest, 'lockdown_tooltip_text:N', alt.value(' '))
).transform_calculate(
    lockdown_tooltip_text='datum.Province_State + " locked down"'
)
alt.layer(
    lines,
#     model_lines,
    selectors,
    tooltip_points,
    tooltip_text,
#     tooltip_rules,
    brush_rule,
    brush_text,
    lockdown_rules,
    lockdown_tooltip,
    points
).transform_calculate(
    brush_mid='isDefined(brush.x) ? 0.5 * (brush.x[0] + brush.x[1]) : 100'
)

# Adding population information

In [13]:
total_cases_df       = pd.read_csv("../91-DIVOC/pages/covid-visualization/jhu-data.csv",index_col=0)
empty_rows           = total_cases_df[(total_cases_df["Confirmed"]==0)&(total_cases_df["Recovered"]==0)&(total_cases_df["Active"]==0)&(total_cases_df["Deaths"]==0)].index
total_cases_df       = total_cases_df.drop(empty_rows, axis=0)

In [14]:
len(total_cases_df)

3547

In [15]:
len(total_cases_df["Country_Region"].unique())

194

In [16]:
#downloaded from https://datahub.io/JohnSnowLabs/population-figures-by-country
pop = pd.read_csv("population-figures-by-country-csv_csv.csv")[["Country","Year_2016"]]

In [17]:
pop = pop.rename(columns={"Year_2016":"Population","Country":"Country_Region"})

In [18]:
len(pop.Country_Region.unique())

263

In [19]:
total_cases_df = total_cases_df.merge(pop,how="left")

In [20]:
# TODO: These countries need manual entry for Population
# total_cases_df[total_cases_df.Population.isnull()]["Country_Region"].unique()

In [21]:
len(total_cases_df)

3547

In [22]:
# pop[pop.Country_Region.str.contains("Korea")]

In [23]:
total_cases_df["Confirmed Cases Per Thousand Ppl"] = total_cases_df["Confirmed"]/total_cases_df["Population"]*1000

In [24]:
total_cases_df[total_cases_df["Confirmed Cases Per Thousand Ppl"]>1]

Unnamed: 0,Country_Region,Province_State,Confirmed,Recovered,Active,Deaths,Date,Population,Confirmed Cases Per Thousand Ppl
1722,San Marino,,36.0,0.0,35.0,1.0,03-08-2020,33203.0,1.084239
1949,San Marino,,51.0,0.0,49.0,2.0,03-10-2020,33203.0,1.536006
2047,San Marino,,62.0,0.0,60.0,2.0,03-11-2020,33203.0,1.867301
2146,San Marino,,69.0,0.0,66.0,3.0,03-12-2020,33203.0,2.078125
2249,San Marino,,80.0,0.0,75.0,5.0,03-13-2020,33203.0,2.409421
2358,San Marino,,80.0,4.0,71.0,5.0,03-14-2020,33203.0,2.409421
2591,San Marino,,109.0,4.0,98.0,7.0,03-16-2020,33203.0,3.282836
2842,San Marino,,119.0,4.0,104.0,11.0,03-18-2020,33203.0,3.584013
2972,San Marino,,119.0,4.0,104.0,11.0,03-19-2020,33203.0,3.584013
3071,Iceland,,409.0,5.0,404.0,0.0,03-20-2020,334252.0,1.223628


This measure seems to bias towards countries with low population, not sure if this is a great measure, like in the NY times article for states.
https://www.nytimes.com/interactive/2020/03/27/upshot/coronavirus-new-york-comparison.html?action=click&module=Spotlight&pgtype=Homepage