## World Data charting

In [0]:
from datetime import datetime, timedelta, timezone
import dateutil.parser as du_parser
import pandas as pd
import eloader as el
import eplotter as ep

# load from the data loader helper
(df_world_daily) = el.load_opencovid19_data()
(df_it_daily, df_it_regional_daily) = el.load_pcmdpc_it_data()
(df_us_daily, df_us_states_daily, df_us_states_latest) = el.load_covidtracking_us_data()

df_fused_daily = el.fuse_daily_sources(df_world_daily, df_us_daily, df_it_daily)

### Global death rate by country

In [0]:
df_over_100_deaths = df_fused_daily[df_fused_daily['Deaths'] > 500]
countries_by_deaths = ep.rank_data_by_metric(df_over_100_deaths, metric='Death_rate', unique_key='CountryName')

ep.scatter_plot_by_series(
    _df=df_fused_daily,
    x_key='X', y_key='Death_rate',
    series_key='CountryName', series_names=countries_by_deaths['CountryName'],
    series_is_secondary=(lambda df: df['Population'].iloc[-1] < 5E+06 or df['Deaths'].iloc[-1] < 500),
    series_secondary_width=1,
    # shift_x_to_intersect_y=5,
    # y_log=True,
    bounds=[70, None, 0, 14],
    legend_decimals=1,
    data_labels="legend", data_labels_align="right",
    title="Death rate by Country, over time",
    label_x="Day of 2020", label_y="Reported deaths / Total cases (percent)",
    stamp_1='Grayed-out: low population or case count',
)

### Global confirmed cases by country

In [0]:
df_selected_countries = df_fused_daily[df_fused_daily['Confirmed'] > 1000]
ranked_countries_by_cases = ep.rank_data_by_metric(df_selected_countries, metric='Confirmed', unique_key='CountryName')

intersection_at=4000
ep.scatter_plot_by_series(
    _df=df_fused_daily,
    x_key='X', y_key='Confirmed',
    series_key='CountryName', series_names=ranked_countries_by_cases['CountryName'],
    series_is_secondary=(lambda df: df['Population'].iloc[-1] < 5E+06 or df['Confirmed'].iloc[-1] < 2*intersection_at),
    series_secondary_width=1,
    shift_x_to_intersect_y=intersection_at,
    y_log=True,
    data_labels="series", data_labels_align="center",
    title='Confirmed cases by country, since crossing ' + str(intersection_at),
    label_x='Days since case one thousand',
)

In [0]:
ep.scatter_plot_by_series(
    _df=df_fused_daily,
    x_key='X', y_key='Confirmed',
    series_key='CountryName', series_names=ranked_countries_by_cases['CountryName'],
    series_is_secondary=(lambda df: df['Population'].iloc[-1] < 20E+06 or df['Confirmed'].iloc[-1] < 5000),
    series_secondary_width=1,
    y_log=True,
    bounds=[el.date_to_day_of_year(datetime(2020, 2, 15)),None, 100,300000],
    legend_decimals=0,
    data_labels="series", data_labels_align="right",
    title='Confirmed cases by country, since Feb 15, 2020',
    label_x='Day of the year, 2020',
    stamp_1='Grayed-out: low population or case count',
)