In [90]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import re
from tqdm.notebook import tqdm
from pathlib import Path
from bokeh.layouts import row, column, gridplot
from bokeh.models import Title, ColumnDataSource, HoverTool, LinearAxis, LabelSet, Div, SingleIntervalTicker
from bokeh.palettes import Category10
from bokeh.io import output_file, show
from bokeh.models import BasicTickFormatter
from bokeh.plotting import figure
from bokeh.models import Span
from bokeh.models import Label
from bokeh.models import LogScale
from bokeh.io import show
import bokeh
bokeh.__version__

'2.0.0'

In [91]:
covidPath = Path('../../..').resolve()
all_states_path = covidPath.joinpath('Lockdown', 'output','all_states.csv')
reports_path = covidPath.joinpath('..','..','Covid_Reports','Reports.csv')
PLOT_SIZE_X = 800
PLOT_SIZE_Y = 500

In [92]:
COLORS_FOR_LOCKDOWN_LEVEL = ["purple", "blue", "green", "orange", "red"]

In [93]:
data = pd.read_csv(reports_path)
available_countries = countries = data["country"].unique()
# print(sorted(available_countries))

In [94]:
data[data['country'] == 'Israel']

Unnamed: 0,country,state,county,city,population,update_time,db_source_name,db_source_url,db_source_time,confirmed,...,new_positive_cases,note_it,note_en,suspected cases,unknown,cases/100k pop.,hospitalized,percent,tests,quarantine
23585,Israel,,,,8299706.0,2020-03-29 23:08:13+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-29 00:00:00+00:00,4247.0,...,,,,,,,,,,
27012,Israel,,,,8299706.0,2020-03-28 23:05:25+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-28 00:00:00+00:00,3619.0,...,,,,,,,,,,
30439,Israel,,,,8299706.0,2020-03-27 23:23:03+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-27 00:00:00+00:00,3035.0,...,,,,,,,,,,
33858,Israel,,,,8299706.0,2020-03-26 23:48:18+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-26 00:00:00+00:00,2693.0,...,,,,,,,,,,
37278,Israel,,,,8299706.0,2020-03-25 23:33:04+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-25 00:00:00+00:00,2369.0,...,,,,,,,,,,
40696,Israel,,,,8299706.0,2020-03-24 23:37:15+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-24 00:00:00+00:00,1930.0,...,,,,,,,,,,
44111,Israel,,,,8299706.0,2020-03-23 23:19:21+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-23 00:00:00+00:00,1442.0,...,,,,,,,,,,
47452,Israel,,,,8299706.0,2020-03-22 23:45:00+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-22 00:00:00+00:00,1071.0,...,,,,,,,,,,
47643,Israel,,,,8299706.0,2020-03-21 11:43:06+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-21 00:00:00+00:00,883.0,...,,,,,,,,,,
47944,Israel,,,,8299706.0,2020-03-20 14:43:04+00:00,Johns Hopkins CSSE,github.com/CSSEGISandData/COVID-19.git,2020-03-20 00:00:00+00:00,705.0,...,,,,,,,,,,


In [95]:
lockdown_dataframe = pd.read_csv(all_states_path)
available_countries = countries = lockdown_dataframe["country"].unique()
# for now we'll focus only on these columns
lockdown_dataframe = lockdown_dataframe[["lockdown_level", "country", "province", "start_date"]].rename(columns={'province':'state'})

In [96]:
lockdown_dataframe.head()

Unnamed: 0,lockdown_level,country,state,start_date
0,3,Canada,Alberta,2020-03-30 00:00:00
1,2,Canada,British Columbia,2020-03-30 00:00:00
2,2,Canada,"Calgary, Alberta",2020-03-30 00:00:00
3,2,Canada,"Edmonton, Alberta",2020-03-30 00:00:00
4,2,Canada,"London, ON",2020-03-30 00:00:00


In [97]:
# verifying that each state is updated once per day means that we don't need to worry about double counting
# a state twice in a single day
def verify_each_state_is_updated_at_most_once_per_day():
    for state in data["state"].unique():
        state_data = data[data["state"] == state]
        if len(state_data["db_source_time"]) != len(state_data):
            print(state)
verify_each_state_is_updated_at_most_once_per_day()

In [98]:
def get_data_for_country(data, country, db_source=None, state=None):
    country_data = data[data["country"] == country]
    # some data is dirty in that it sums all the cases and sets state to be NAN (LOOK AT ITALY!!)
    
    # we need to see how we want to handle the nan states (sometimes its fine, sometimes its not good - Italy...)
    if country == "Italy":
        country_data = country_data[country_data['state'].notna()]
    country_data = country_data[country_data['state'] != 'sum']
    if state is not None:
        country_data = country_data[country_data["state"] == state]
        print("Found {} entries for country {} from state {}".format(len(country_data), country, state))
    if db_source is not None:
        country_data = country_data[country_data["db_source_url"] == db_source]
        print("Found {} entries for country {} from source {}".format(len(country_data), country, db_source))
    print("Found {} entries for country {}".format(len(country_data), country))
    return country_data

In [99]:
def get_lockdown_data_for_country(data, country, state=None):
    country_data = data[data["country"] == country]    
    # if no state is given, we will return the data for all the states
    if state is not None:
        country_data = country_data[country_data["state"] == state]
        print("Found {} entries for country {} from state {}".format(len(country_data), country, state))
    else:
##        country_data = country_data[country_data["state"].isnull()]
        print("Found {} entries for country {} from all states".format(len(country_data), country))
    return country_data

In [100]:
def get_unique_dates_for_country(country_data, country):
    patient20 = country_data.loc[country_data["confirmed"] >= 1, ["db_source_time"]]
    unique_dates = sorted(patient20["db_source_time"])
    #print('Found {} unique dates for country {} starting from date: {}'.format(len(unique_dates), country, unique_dates[0]))
    return unique_dates

In [101]:
def string_date_to_datetime_day(date):
    day = re.split('-|:',date)[0:3]
    day = datetime.date(int(day[0]), int(day[1]), int(day[2][0:2]))
    return day

In [102]:
def compute_days_since_patient_zero(country_data, country_unique_dates):
    # for each day, get the number of days that have passed since patient zero discovered
    starting_date = country_unique_dates[0]
    starting_date = string_date_to_datetime_day(starting_date)
    days_since_patient_zero = []
    for date in country_data["db_source_time"]:
        day = string_date_to_datetime_day(date)
        days_since_patient_zero.append((day - starting_date).days)
    country_data["days_since_patient_zero"] = days_since_patient_zero

In [103]:
def compute_days_since_patient_zero_for_lockdown_data(country_data, country_lockdown_data, country_unique_dates):
    # for each day, get the number of days that have passed since patient zero discovered
    starting_date = country_unique_dates[0]
    starting_date = string_date_to_datetime_day(starting_date)
    days_since_patient_zero = []
    for date in country_lockdown_data["start_date"]:
        day = string_date_to_datetime_day(date)
        diff = (day - starting_date).days
        # so apparently, the date for all the lockdown level 0 is 1/1/2019 so we'll just set negative values
        # here to be 0 (its not really important anyway)
        diff = max(0, diff)
        days_since_patient_zero.append(diff)
    country_lockdown_data["days_since_patient_zero"] = days_since_patient_zero

In [104]:
def get_total_growth_rate_per_day(country_data):
    previous_number_of_cases = 0
    total_cases = []
    new_cases_per_day = []
    for days_since_0 in sorted(country_data["days_since_patient_zero"]):
        data_for_day = country_data[country_data["days_since_patient_zero"] == days_since_0]
        #day = np.unique([d.split()[0] for d in data_for_day["update_time"]])[0]
        total_confirmed_cases = np.sum(data_for_day["confirmed"])
        new_cases = total_confirmed_cases - previous_number_of_cases
#         print("New cases for day {}: {}".format(day, int(new_cases)))
#         print("Total cases up to and including day {}: {}\n".format(day, int(total_confirmed_cases)))
        previous_number_of_cases = total_confirmed_cases
        total_cases.append(int(total_confirmed_cases))
        new_cases_per_day.append(int(new_cases))
    return total_cases, new_cases_per_day

In [261]:
def get_total_death_rate_per_day(country_data):
    previous_number_of_death = 0
    total_death = []
    new_death_per_day = []
    for days_since_0 in sorted(country_data["days_since_patient_zero"]):
        data_for_day = country_data[country_data["days_since_patient_zero"] == days_since_0]
        #day = np.unique([d.split()[0] for d in data_for_day["update_time"]])[0]
        total_death_hosp = np.sum(data_for_day["deaths"]) + np.sum(data_for_day["intensive_care"])
        new_death = total_death_hosp - previous_number_of_death
#         print("New cases for day {}: {}".format(day, int(new_cases)))
#         print("Total cases up to and including day {}: {}\n".format(day, int(total_confirmed_cases)))
        previous_number_of_cases = total_death_hosp
        total_death.append(int(total_death_hosp))
        new_death_per_day.append(int(new_death))
    return total_death

In [262]:
def get_R_rate_per_day(country_data, regr=False):
    previous_number_of_cases = 1
    total_cases = []
    ratio_per_day = []
    
    # creating matrix of data   ################################################################################
    X = np.empty((0,2))
    y = np.empty((0,1))
    
    for days_since_0 in sorted(country_data["days_since_patient_zero"].unique()):
        data_for_day = country_data[country_data["days_since_patient_zero"] == days_since_0]
        day = np.unique([d.split()[0] for d in data_for_day["db_source_time"]])[0]
        total_confirmed_cases = np.sum(data_for_day["confirmed"])
    
        # collect data
        X = np.vstack((X,np.array([float(days_since_0), 1.0])))
        y = np.vstack((y,np.array([float(total_confirmed_cases)])))
        
        ratio = float(total_confirmed_cases / previous_number_of_cases)
#         print("New cases for day {}: {}".format(day, int(new_cases)))
#         print("Total cases up to and including day {}: {}\n".format(day, int(total_confirmed_cases)))
        previous_number_of_cases = total_confirmed_cases
        total_cases.append(int(total_confirmed_cases))
        ratio_per_day.append(float(ratio))
 
    # calculate regression each point
    if regr == True:
        for i in range(X.shape[0]-5):
            (r,_,__,___) = np.linalg.lstsq(X[i:i+5,:],np.squeeze(np.log2(y[i:i+5,0])))
            ratio_per_day[i+5] = np.exp2(r[0])
        print('** Using regression **')
        for i in range(5):
            ratio_per_day[i] = 1
    return ratio_per_day

In [284]:
def get_R_rate_death(country_data, regr=False):
    previous_number_of_cases = 1
    total_cases = []
    ratio_per_day = []

    # creating matrix of data   ################################################################################
    X = np.empty((0,2))
    y = np.empty((0,1))
    
    for days_since_0 in sorted(country_data["days_since_patient_zero"].unique()):
        data_for_day = country_data[country_data["days_since_patient_zero"] == days_since_0]
        day = np.unique([d.split()[0] for d in data_for_day["db_source_time"]])[0]
        total_death_hosp = np.sum(data_for_day["deaths"]) + np.sum(data_for_day["intensive_care"])
    
        # collect data
        X = np.vstack((X,np.array([float(days_since_0), 1.0])))
        y = np.vstack((y,np.array([float(total_death_hosp)])))
        
        ratio = float(total_death_hosp / previous_number_of_cases)
#         print("New cases for day {}: {}".format(day, int(new_cases)))
#         print("Total cases up to and including day {}: {}\n".format(day, int(total_death_hosp)))
        previous_number_of_cases = total_death_hosp
        total_cases.append(int(total_death_hosp))
        ratio_per_day.append(float(ratio))

    # calculate regression each point
    if regr == True:
        for i in range(X.shape[0]-5):
            (r,_,__,___) = np.linalg.lstsq(X[i:i+5,:],np.squeeze(np.log2(y[i:i+5,0])))
            ratio_per_day[i+5] = np.exp2(r[0])
        print('** Using regression **')
        for i in range(5):
            ratio_per_day[i] = 1

    return ratio_per_day

In [285]:
def plot_total_cumulative_cases(countries, states, db_sources, use_log_scale=False):
    
    p = figure(title="Total Cases: ",
           tools='pan,wheel_zoom,box_zoom,reset,save',
           plot_width=PLOT_SIZE_X, plot_height=PLOT_SIZE_Y)

    country_text = []
    for country, state in zip(countries, states):
        print(country, state)
        if state is not None:
            country_text.append('{} - {}'.format(country, state))
        else:
            country_text.append('{}'.format(country))
    
    p.add_layout(Title(text=", ".join(country_text), text_font_style='italic', text_font_size='15pt'), 'above')

    for idx, (country, db_source, state) in enumerate(zip(countries, db_sources, states)):
        country_data = get_data_for_country(data, country, db_source, state)
        country_unique_dates = get_unique_dates_for_country(country_data, country)
        compute_days_since_patient_zero(country_data, country_unique_dates)

        total_cases, _ = get_total_growth_rate_per_day(country_data)
        country_growth_data = pd.DataFrame()
        country_growth_data["days_since_patient_zero"] = sorted(country_data["days_since_patient_zero"])
        country_growth_data["total_number_of_cases"] = total_cases
        country_growth_data["country"] = [country] * len(total_cases)
        growth_source = ColumnDataSource.from_df(country_growth_data)

        s = p.scatter('days_since_patient_zero', 'total_number_of_cases', size=5,
                      source=growth_source, 
                      color=Category10[10][idx], fill_color=Category10[10][idx], line_color=Category10[10][idx],
                      hover_fill_color=Category10[10][idx],
                      legend_label='{}'.format(country_text[idx]))

        tool_tips = [("Country", "@country"),
                     ('Days Since Patient Zero', '@days_since_patient_zero'),
                     ('Total Number of Cases', '@total_number_of_cases')]
        hover = HoverTool(renderers=[s], tooltips=tool_tips)
        p.add_tools(hover)

       # here we add data for lockdowns
        country_lockdown_data = get_lockdown_data_for_country(lockdown_dataframe, country, state=state)
        compute_days_since_patient_zero_for_lockdown_data(country_data, country_lockdown_data, country_unique_dates)
        
        lockdown_data = pd.DataFrame()
        lockdown_data["level"] = country_lockdown_data["lockdown_level"]
        lockdown_data["days_since_patient_0"] = country_lockdown_data["days_since_patient_zero"]
        lockdown_data = lockdown_data.sort_values('days_since_patient_0', ascending=True).drop_duplicates('level').sort_index()
        
        for level, day in zip(lockdown_data["level"], lockdown_data["days_since_patient_0"]):
            if level == 0:
                continue
            vline = Span(location=day, dimension='height', line_dash='dashed', 
                         line_color=COLORS_FOR_LOCKDOWN_LEVEL[level], line_width=2)
            p.renderers.extend([vline])
            
            level_text = Label(x=day + 0.1, 
                               y=np.max(total_cases) * (level / 4), 
                               text='{} \nLevel: {}'.format(country_text[idx], level))
            p.add_layout(level_text)
        
    if use_log_scale:
        p.y_scale = LogScale()
    p.yaxis.formatter = BasicTickFormatter(use_scientific=False)
    p.title.text_font_size = '15pt'
    p.legend.location = "top_left"
    p.legend.click_policy = 'hide'
    p.legend.background_fill_color = '#fefefe'
    p.xaxis.axis_label = "Days Since Patient Zero"
    p.yaxis.axis_label = "Total Number of Cases"
    p.grid.grid_line_color = "gray"
    p.xaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'

    show(p)

In [286]:
def plot_daily_new_cases(countries, states, db_sources, use_log_scale=True):
    
    p = figure(title="Daily Number of New Cases: ",
           tools='pan,wheel_zoom,box_zoom,reset,save',
           plot_width=PLOT_SIZE_X, plot_height=PLOT_SIZE_Y)

    country_text = []
    for country, state in zip(countries, states):
        if state is not None:
            country_text.append('{} - {}'.format(country, state))
        else:
            country_text.append('{}'.format(country))
        
    p.add_layout(Title(text=", ".join(country_text), text_font_style='italic', text_font_size='15pt'), 'above')

    for idx, (country, db_source, state) in enumerate(zip(countries, db_sources, states)):
        country_data = get_data_for_country(data, country, db_source, state)
        country_unique_dates = get_unique_dates_for_country(country_data, country)

        compute_days_since_patient_zero(country_data, country_unique_dates)

        _, daily_new_cases = get_total_growth_rate_per_day(country_data)
        country_growth_data = pd.DataFrame()
        country_growth_data["days_since_patient_zero"] = sorted(country_data["days_since_patient_zero"].unique())
        country_growth_data["daily_new_cases"] = daily_new_cases
        country_growth_data["country"] = [country] * len(daily_new_cases)

        growth_source = ColumnDataSource.from_df(country_growth_data)

        s = p.scatter('days_since_patient_zero', 'daily_new_cases', size=5,
                      source=growth_source, 
                      color=Category10[10][idx], fill_color=Category10[10][idx], line_color=Category10[10][idx],
                      hover_fill_color=Category10[10][idx],
                      legend_label='{}'.format(country_text[idx]))

        tool_tips = [("Country", "@country"),
                     ('Days Since Patient Zero', '@days_since_patient_zero'),
                     ('Daily Number of New Cases', '@daily_new_cases')]
        hover = HoverTool(renderers=[s], tooltips=tool_tips)
        p.add_tools(hover)

# here we add data for lockdowns
        country_lockdown_data = get_lockdown_data_for_country(lockdown_dataframe, country, state=state)
        compute_days_since_patient_zero_for_lockdown_data(country_data, country_lockdown_data, country_unique_dates)
        
        lockdown_data = pd.DataFrame()
        lockdown_data["level"] = country_lockdown_data["lockdown_level"]
        lockdown_data["days_since_patient_0"] = country_lockdown_data["days_since_patient_zero"]
        lockdown_data = lockdown_data.sort_values('days_since_patient_0', ascending=True).drop_duplicates('level').sort_index()
        
        for level, day in zip(lockdown_data["level"], lockdown_data["days_since_patient_0"]):
            if level == 0:
                continue
            vline = Span(location=day, dimension='height', line_dash='dashed', 
                         line_color=COLORS_FOR_LOCKDOWN_LEVEL[level], line_width=2)
            p.renderers.extend([vline])
            
            level_text = Label(x=day + 0.1, 
                               y=np.max(daily_new_cases) * (level / 4), 
                               text='{} \nLevel: {}'.format(country_text[idx], level))
            p.add_layout(level_text)
        
        
    if use_log_scale:
        p.y_scale = LogScale()
    p.yaxis.formatter = BasicTickFormatter(use_scientific=False)
    p.title.text_font_size = '15pt'
    p.legend.location = "top_left"
    p.legend.click_policy = 'hide'
    p.legend.background_fill_color = '#fefefe'
    p.xaxis.axis_label = "Days Since Patient Zero"
    p.yaxis.axis_label = "Daily Number of New Cases"
    p.grid.grid_line_color = "gray"
    p.xaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'

    show(p)

In [287]:
def plot_R_ratio_daily(countries, states, db_sources, use_log_scale=True, regr=False):
    
    p = figure(title="Daily Increase Factor: ",
           tools='pan,wheel_zoom,box_zoom,reset,save',
           plot_width=PLOT_SIZE_X, plot_height=PLOT_SIZE_Y)
    
    country_text = []
    for country, state in zip(countries, states):
        if state is not None:
            country_text.append('{} - {}'.format(country, state))
        else:
            country_text.append('{}'.format(country))
        
    p.add_layout(Title(text=", ".join(country_text), text_font_style='italic', text_font_size='15pt'), 'above')

    for idx, (country, db_source, state) in enumerate(zip(countries, db_sources, states)):
        country_data = get_data_for_country(data, country, db_source, state)
        country_unique_dates = get_unique_dates_for_country(country_data, country)

        compute_days_since_patient_zero(country_data, country_unique_dates)

        daily_R_ratio = get_R_rate_per_day(country_data, regr)
        country_R_data = pd.DataFrame()
        country_R_data["days_since_patient_zero"] = sorted(country_data["days_since_patient_zero"].unique())
        country_R_data["daily_R_ratio"] = daily_R_ratio
        country_R_data["country"] = [country] * len(daily_R_ratio)

        R_source = ColumnDataSource.from_df(country_R_data)

        s = p.scatter('days_since_patient_zero', 'daily_R_ratio', size=5,
                      source=R_source, 
                      color=Category10[10][idx], fill_color=Category10[10][idx], line_color=Category10[10][idx],
                      hover_fill_color=Category10[10][idx],
                      legend_label='{}'.format(country_text[idx]))

        tool_tips = [("Country", "@country"),
                     ('Days Since Patient Zero', '@days_since_patient_zero'),
                     ('Daily Increase Factor', '@daily_R_ratio')]
        hover = HoverTool(renderers=[s], tooltips=tool_tips)
        p.add_tools(hover)
       
        # here we add data for lockdowns
        country_lockdown_data = get_lockdown_data_for_country(lockdown_dataframe, country, state=state)
        compute_days_since_patient_zero_for_lockdown_data(country_data, country_lockdown_data, country_unique_dates)
        
        lockdown_data = pd.DataFrame()
        lockdown_data["level"] = country_lockdown_data["lockdown_level"]
        lockdown_data["days_since_patient_0"] = country_lockdown_data["days_since_patient_zero"]
        lockdown_data = lockdown_data.sort_values('days_since_patient_0', ascending=True).drop_duplicates('level').sort_index()
        
        for level, day in zip(lockdown_data["level"], lockdown_data["days_since_patient_0"]):
            if level == 0:
                continue
            vline = Span(location=day, dimension='height', line_dash='dashed', 
                         line_color=COLORS_FOR_LOCKDOWN_LEVEL[level], line_width=2)
            p.renderers.extend([vline])
            
            level_text = Label(x=day + 0.1, 
                               y=0.1* (level / 4), 
                               text='{} \nLevel: {}'.format(country_text[idx], level))
            p.add_layout(level_text)
        
    #if use_log_scale:
     #   p.y_scale = LogScale()
    p.yaxis.formatter = BasicTickFormatter(use_scientific=False)
    p.title.text_font_size = '15pt'
    p.legend.location = "top_left"
    p.legend.click_policy = 'hide'
    p.legend.background_fill_color = '#fefefe'
    p.xaxis.axis_label = "Days Since Patient Zero"
    p.yaxis.axis_label = "Daily Increase Factor"
    p.grid.grid_line_color = "gray"
    p.xaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.y_range.end = 2
    p.y_range.start= 1

    show(p)

In [288]:
def plot_total_cumulative_death(countries, states, db_sources, use_log_scale=False):
    
    p = figure(title="Total Deaths and ICU patients: ",
           tools='pan,wheel_zoom,box_zoom,reset,save',
           plot_width=PLOT_SIZE_X, plot_height=PLOT_SIZE_Y)

    country_text = []
    for country, state in zip(countries, states):
        print(country, state)
        if state is not None:
            country_text.append('{} - {}'.format(country, state))
        else:
            country_text.append('{}'.format(country))
    
    p.add_layout(Title(text=", ".join(country_text), text_font_style='italic', text_font_size='15pt'), 'above')

    for idx, (country, db_source, state) in enumerate(zip(countries, db_sources, states)):
        country_data = get_data_for_country(data, country, db_source, state)
        country_unique_dates = get_unique_dates_for_country(country_data, country)
        compute_days_since_patient_zero(country_data, country_unique_dates)

        total_death = get_total_death_rate_per_day(country_data)
        country_growth_data = pd.DataFrame()
        country_growth_data["days_since_patient_zero"] = sorted(country_data["days_since_patient_zero"])
        country_growth_data["total_number_of_death"] = total_death
        country_growth_data["country"] = [country] * len(total_death)
        growth_source = ColumnDataSource.from_df(country_growth_data)

        s = p.scatter('days_since_patient_zero', 'total_number_of_death', size=5,
                      source=growth_source, 
                      color=Category10[10][idx], fill_color=Category10[10][idx], line_color=Category10[10][idx],
                      hover_fill_color=Category10[10][idx],
                      legend_label='{}'.format(country_text[idx]))

        tool_tips = [("Country", "@country"),
                     ('Days Since Patient Zero', '@days_since_patient_zero'),
                     ('Total Number of Deaths', '@total_number_of_death')]
        hover = HoverTool(renderers=[s], tooltips=tool_tips)
        p.add_tools(hover)

       # here we add data for lockdowns
        country_lockdown_data = get_lockdown_data_for_country(lockdown_dataframe, country, state=state)
        compute_days_since_patient_zero_for_lockdown_data(country_data, country_lockdown_data, country_unique_dates)
        
        lockdown_data = pd.DataFrame()
        lockdown_data["level"] = country_lockdown_data["lockdown_level"]
        lockdown_data["days_since_patient_0"] = country_lockdown_data["days_since_patient_zero"]
        lockdown_data = lockdown_data.sort_values('days_since_patient_0', ascending=True).drop_duplicates('level').sort_index()
        
        for level, day in zip(lockdown_data["level"], lockdown_data["days_since_patient_0"]):
            if level == 0:
                continue
            vline = Span(location=day, dimension='height', line_dash='dashed', 
                         line_color=COLORS_FOR_LOCKDOWN_LEVEL[level], line_width=2)
            p.renderers.extend([vline])
            
            level_text = Label(x=day + 0.1, 
                               y=np.max(total_death) * (level / 4), 
                               text='{} \nLevel: {}'.format(country_text[idx], level))
            p.add_layout(level_text)
        
    if use_log_scale:
        p.y_scale = LogScale()
    p.yaxis.formatter = BasicTickFormatter(use_scientific=False)
    p.title.text_font_size = '15pt'
    p.legend.location = "top_left"
    p.legend.click_policy = 'hide'
    p.legend.background_fill_color = '#fefefe'
    p.xaxis.axis_label = "Days Since Patient Zero"
    p.yaxis.axis_label = "Total Number of Cases"
    p.grid.grid_line_color = "gray"
    p.xaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'

    show(p)

In [289]:
def plot_R_ratio_death(countries, states, db_sources, use_log_scale=True, regr=False):
    
    p = figure(title="Daily Deaths+ ICU Increase Factor: ",
           tools='pan,wheel_zoom,box_zoom,reset,save',
           plot_width=PLOT_SIZE_X, plot_height=PLOT_SIZE_Y)
    

    country_text = []
    for country, state in zip(countries, states):
        if state is not None:
            country_text.append('{} - {}'.format(country, state))
        else:
            country_text.append('{}'.format(country))
        
    p.add_layout(Title(text=", ".join(country_text), text_font_style='italic', text_font_size='15pt'), 'above')

    for idx, (country, db_source, state) in enumerate(zip(countries, db_sources, states)):
        country_data = get_data_for_country(data, country, db_source, state)
        country_unique_dates = get_unique_dates_for_country(country_data, country)

        compute_days_since_patient_zero(country_data, country_unique_dates)

        daily_R_ratio = get_R_rate_death(country_data, regr=regr)
        country_R_data = pd.DataFrame()
        country_R_data["days_since_patient_zero"] = sorted(country_data["days_since_patient_zero"].unique())
        country_R_data["daily_R_ratio"] = daily_R_ratio
        country_R_data["country"] = [country] * len(daily_R_ratio)

        R_source = ColumnDataSource.from_df(country_R_data)

        s = p.scatter('days_since_patient_zero', 'daily_R_ratio', size=5,
                      source=R_source, 
                      color=Category10[10][idx], fill_color=Category10[10][idx], line_color=Category10[10][idx],
                      hover_fill_color=Category10[10][idx],
                      legend_label='{}'.format(country_text[idx]))

        tool_tips = [("Country", "@country"),
                     ('Days Since Patient Zero', '@days_since_patient_zero'),
                     ('Daily Increase Factor', '@daily_R_ratio')]
        hover = HoverTool(renderers=[s], tooltips=tool_tips)
        p.add_tools(hover)
       
        # here we add data for lockdowns
        country_lockdown_data = get_lockdown_data_for_country(lockdown_dataframe, country, state=state)
        compute_days_since_patient_zero_for_lockdown_data(country_data, country_lockdown_data, country_unique_dates)
        
        lockdown_data = pd.DataFrame()
        lockdown_data["level"] = country_lockdown_data["lockdown_level"]
        lockdown_data["days_since_patient_0"] = country_lockdown_data["days_since_patient_zero"]
        lockdown_data = lockdown_data.sort_values('days_since_patient_0', ascending=True).drop_duplicates('level').sort_index()
        
        for level, day in zip(lockdown_data["level"], lockdown_data["days_since_patient_0"]):
            if level == 0:
                continue
            vline = Span(location=day, dimension='height', line_dash='dashed', 
                         line_color=COLORS_FOR_LOCKDOWN_LEVEL[level], line_width=2)
            p.renderers.extend([vline])
            
            level_text = Label(x=day + 0.1, 
                               y=0.1* (level / 4), 
                               text='{} \nLevel: {}'.format(country_text[idx], level))
            p.add_layout(level_text)
        
    #if use_log_scale:
     #   p.y_scale = LogScale()
    p.yaxis.formatter = BasicTickFormatter(use_scientific=False)
    p.title.text_font_size = '15pt'
    p.legend.location = "top_left"
    p.legend.click_policy = 'hide'
    p.legend.background_fill_color = '#fefefe'
    p.xaxis.axis_label = "Days Since Patient Zero"
    p.yaxis.axis_label = "Daily Increase Factor"
    p.grid.grid_line_color = "gray"
    p.xaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.y_range.end = 2
    p.y_range.start= 1

    show(p)

In [290]:
# need to match countries with desired states and dbs
# THE PLOTS FOR THIS ARE MESSED UP BECAUSE OF THE OVERLAPS...

# display graphs inside notebook
bokeh.io.output_notebook()

countries = ["Israel"]
states = [None]
db_sources = ["github.com/CSSEGISandData/COVID-19.git"]
plot_total_cumulative_cases(countries, states, db_sources, use_log_scale=True)
plot_R_ratio_daily(countries, states, db_sources, use_log_scale=True, regr=True)
plot_total_cumulative_death(countries, states, db_sources, use_log_scale=True)
plot_R_ratio_death(countries, states, db_sources, use_log_scale=True)

Israel None
Found 38 entries for country Israel from source github.com/CSSEGISandData/COVID-19.git
Found 38 entries for country Israel
Found 10 entries for country Israel from all states


Found 38 entries for country Israel from source github.com/CSSEGISandData/COVID-19.git
Found 38 entries for country Israel
** Using regression **
Found 10 entries for country Israel from all states


Israel None
Found 38 entries for country Israel from source github.com/CSSEGISandData/COVID-19.git
Found 38 entries for country Israel
Found 10 entries for country Israel from all states


Found 38 entries for country Israel from source github.com/CSSEGISandData/COVID-19.git
Found 38 entries for country Israel
Found 10 entries for country Israel from all states


In [293]:
plot_R_ratio_death(countries, states, db_sources, use_log_scale=True, regr=True)

Found 735 entries for country Italy from source git://github.com/pcm-dpc/COVID-19
Found 735 entries for country Italy
** Using regression **
Found 99 entries for country Italy from all states


In [292]:
# need to match countries with desired states and dbs
# THE PLOTS FOR THIS ARE MESSED UP BECAUSE OF THE OVERLAPS...

# display graphs inside notebook
bokeh.io.output_notebook()

countries = ["Italy"]
states = [None]
db_sources = ["git://github.com/pcm-dpc/COVID-19"]
plot_total_cumulative_cases(countries, states, db_sources, use_log_scale=True)
plot_R_ratio_daily(countries, states, db_sources, use_log_scale=True, regr=True)
plot_total_cumulative_death(countries, states, db_sources, use_log_scale=True)
plot_R_ratio_death(countries, states, db_sources, use_log_scale=True, regr=True)

Italy None
Found 735 entries for country Italy from source git://github.com/pcm-dpc/COVID-19
Found 735 entries for country Italy
Found 99 entries for country Italy from all states


Found 735 entries for country Italy from source git://github.com/pcm-dpc/COVID-19
Found 735 entries for country Italy
** Using regression **
Found 99 entries for country Italy from all states


Italy None
Found 735 entries for country Italy from source git://github.com/pcm-dpc/COVID-19
Found 735 entries for country Italy
Found 99 entries for country Italy from all states


Found 735 entries for country Italy from source git://github.com/pcm-dpc/COVID-19
Found 735 entries for country Italy
** Using regression **
Found 99 entries for country Italy from all states
