# Charting Italian Covid-19 Data

In [0]:
from datetime import datetime, timedelta, timezone
import dateutil.parser as du_parser
import pandas as pd
import eloader as el
import eplotter as ep

# load from the data loader helper
(df_it_daily, df_it_regional_daily) = el.load_pcmdpc_it_data()

# complete data with per-day differentials (the ones missing in the source dataset)
el.add_canonical_differentials(df_it_daily)
df_it_daily = el.cleanup_canonical(df_it_daily).drop(columns=['dateChecked'])
el.add_canonical_differentials(df_it_regional_daily, daily_series_col='RegionName')
df_it_regional_daily = el.cleanup_canonical(df_it_regional_daily).drop(columns=['dateChecked'])

## Highlights

In [0]:
# Daily change in Infected
ep.scatter_plot_by_series(df_it_daily,
    x_key='X', y_key='dInfectious', y_filter='expo',
    series_key='CountryName',
    title="Italy - daily increase in the Infected population",
    label_x="Day of 2020", label_y="Infectious (today) - Infectious (yesterday)",
    stamp_1="Crossing below zero means the country is Healing"
)


In [0]:
# Hospitalization count, by day
ep.scatter_plot_by_series(df_it_daily,
    x_key='X', y_key='Hospitalized', y_filter='expo',
    series_key='CountryName',
    title="Italy - Current Hospitalization by day",
    label_x="Day of 2020", label_y="Hospitalized people",
    stamp_1="The peak was on April 9, 2020"
)

In [0]:
# Mortality by Region
regions_by_death_rate = ep.rank_data_by_metric(df_it_regional_daily, metric='Death_rate', unique_key='RegionName')
use_weeks = 6
ep.scatter_plot_by_series(
    df_it_regional_daily,
    x_key='X', y_key='Death_rate',
    series_key='RegionName', series_names=regions_by_death_rate['RegionName'],
    # series_is_secondary=secondary_function, series_secondary_width=1,
    y_filter='expo',
    bounds=[el.date_to_day_of_year(datetime.now() - timedelta(days=7*use_weeks)), None, 2, 20],
    legend_decimals=1, legend_suffix='%',
    data_labels="legend", data_labels_align='right',
    title="Italian Regions: death rate, in the past " + str(use_weeks) + " weeks",
    label_x="Day of 2020", label_y="Reported deaths / Confirmed cases (%)",
)
#regions_by_death_rate

## Regional - Normalized to the population (cases per 100,000)

In [0]:
# population-normalized regional charts
d1 = df_it_regional_daily
norm_metrics = ['Confirmed', 'Infectious', 'Deaths', 'Hospitalized', 'Tampons', 'dHospitalized', 'dDeaths']
norm_cols = []
norm_base = 100000
norm_base_text = ' per 100,000'
for metric in norm_metrics:
    d1[metric + norm_base_text] = d1[metric] / d1['Population'] * norm_base
    norm_cols.append(metric + norm_base_text)

for col in norm_cols:
    auto_rank = ep.rank_data_by_metric(df_it_regional_daily, metric=col, unique_key='RegionName')
    col_name = 'Daily ' + col[1:] if col.startswith('d') else col
    ep.scatter_plot_by_series(df_it_regional_daily,
        x_key='X', y_key=col, y_filter='expo',
        series_key='RegionName', series_names=auto_rank['RegionName'],
        title="Italy - " + col_name,
        label_x="Day of 2020", label_y=col_name,
    )


## Regions - Non-normalized to the population

In [0]:
for col in ['Confirmed', 'Infectious', 'Deaths', 'Recovered', 'Hospitalized', 'Tampons', 'PeopleTested', 'dConfirmed', 'dInfectious', 'dDeaths', 'dRecovered', 'dHospitalized', 'dTampons']:
    auto_rank = ep.rank_data_by_metric(df_it_regional_daily, metric=col, unique_key='RegionName')
    col_name = '(new) Daily ' + col[1:] if col.startswith('d') else col
    ep.scatter_plot_by_series(df_it_regional_daily,
        x_key='X', y_key=col, y_filter='expo',
        series_key='RegionName', series_names=auto_rank['RegionName'],
        title="Italy - " + col_name,
        label_x="Day of 2020", label_y=col_name,
    )

## Nationwide

In [0]:
for col in ['Confirmed', 'Infectious', 'Deaths', 'Recovered', 'Hospitalized', 'Tampons', 'PeopleTested', 'dConfirmed', 'dInfectious', 'dDeaths', 'dRecovered', 'dHospitalized', 'dTampons']:
    col_name = '(new) Daily ' + col[1:] if col.startswith('d') else col
    ep.scatter_plot_by_series(df_it_daily,
        x_key='X', y_key=col, y_filter='expo',
        series_key='CountryName',
        title="Italy - " + col_name,
        label_x="Day of 2020", label_y=col_name,
    )