In [None]:
!pip install yattag

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
import math
from datetime import datetime

In [None]:
# current date and time
now = datetime.now()

timestamp = int(datetime.timestamp(now))
timestamp

In [None]:
data = pd.read_csv("/kaggle/input/corona-virus-report/covid_19_clean_complete.csv", parse_dates = ['Date'])
data

In [None]:
data = data.loc[((data['Country/Region'] == "US") & (data['Province/State'].str.contains("County", na="False"))).apply(lambda x: not x)]
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(data)

In [None]:
epi_curve_totals = pd.pivot_table(data, values = ['Confirmed', 'Deaths', 'Recovered'], index = 'Date', columns = 'Country/Region', aggfunc=np.sum)
epi_curve_totals["Confirmed"]["US"]

In [None]:
epi_curve = epi_curve_totals.diff()
epi_curve

In [None]:
columns = 3
rows = math.ceil(epi_curve["Confirmed"].shape[1]/columns)

wsize = columns * 8
hsize = rows * 5

In [None]:
axes = epi_curve["Confirmed"].plot(
    subplots=True,
    figsize=(wsize,hsize),
    layout=(rows,columns),
    kind='bar',
    sharex=False)

ticklabels = [item.strftime('%b %d') for item in epi_curve.index]
for ax in axes:
    for a in ax:
        a.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))

plt.tight_layout()

plt.savefig("all_countries_%d.png" % timestamp, transparent=False)

In [None]:
ax = epi_curve["Confirmed"]\
        .drop(columns=["China"])\
        .plot(
            figsize=(15,10),
            kind='bar',
            stacked=True,
            legend=False,
            title = "Cases Outside China"
        )

ticklabels = [item.strftime('%b %d') for item in epi_curve.index]
ax.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))

#ax.legend(loc='center right', bbox_to_anchor=(1.5,0.5))

plt.savefig("stacked_outside_china_%d.png" % timestamp, transparent=False)

import matplotlib.ticker as plticker

axes=epi_curve\
    .pct_change()["Confirmed"]\
    .plot(
        subplots=True,
        figsize=(wsize,hsize),
        layout=(rows,columns),
        sharex=False)

ticklabels = [item.strftime('%b %d') for item in epi_curve.index]
for ax in axes:
    for a in ax:
        a.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
        a.grid(axis='y')
        #loc = plticker.MultipleLocator(base=0.1) # this locator puts ticks at regular intervals
        #a.yaxis.set_major_locator(loc)

plt.tight_layout()

In [None]:
d = epi_curve_totals["Confirmed"]\
    .drop(columns=["China"])\
    .sum(axis=1)\
    .pct_change()\
    .apply(lambda x: x*100)

summary = "mean: %(mean).2f\nmin: %(min).2f\np25: %(25%).2f\np50: %(50%).2f\np75: %(75%).2f\nmax: %(max).2f\ncount: %(count).2f\nstd: %(std).2f" % (d.describe())

ax=d\
    .plot(
        figsize=(15,10),
        title="Day over day percent change of total cases outside China"
)

ax.text(ax.get_xlim()[1]-5,65, summary, fontsize=10, bbox=dict(facecolor='white', alpha=1))

loc = ticker.MultipleLocator(base=10) # this locator puts ticks at regular intervals
ax.yaxis.set_major_locator(loc)
ax.grid(axis='y')
ax.set_ylabel("Percent Change")

plt.savefig("day_over_day_percent_change_%d.png" % timestamp, transparent=False)

In [None]:
d = epi_curve_totals["Confirmed"]\
    .drop(columns=["China"])\
    .pct_change()\
    .apply(lambda x: x*100)

#summary = "mean: %(mean).2f\nmin: %(min).2f\np25: %(25%).2f\np50: %(50%).2f\np75: %(75%).2f\nmax: %(max).2f\ncount: %(count).2f\nstd: %(std).2f" % (d.describe())

axes=d.plot(
        subplots=True,
        figsize=(wsize,hsize),
        layout=(rows,columns),
        sharex=False)

ticklabels = [item.strftime('%b %d') for item in epi_curve.index]
for ax in axes:
    for a in ax:
        a.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))

plt.tight_layout()

plt.savefig("all_countries_growth_%d.png" % timestamp, transparent=False)

In [None]:
d = epi_curve_totals["Confirmed"]\
    .drop(columns=["China"])\
    .sum(axis=1)\
    .pct_change()\
    .apply(lambda x: math.log(2,1+x))

summary = "mean: %(mean).2f\nmin: %(min).2f\np25: %(25%).2f\np50: %(50%).2f\np75: %(75%).2f\nmax: %(max).2f\ncount: %(count).2f\nstd: %(std).2f" % (d.describe())

ax = d\
    .plot(
        figsize=(15,10),
        title="Doubling time for cases outside of China",
)

loc = ticker.MultipleLocator(base=1) # this locator puts ticks at regular intervals
ax.yaxis.set_major_locator(loc)
ax.grid(axis='y')
ax.set_ylabel("Days")

ax.text(ax.get_xlim()[1]-5,21.8, summary, fontsize=10, bbox=dict(facecolor='white', alpha=1))

plt.savefig("doubling_time_%d.png" % timestamp, transparent=False)

In [None]:
epi_curve_totals["Confirmed"].iloc[-1]
cfr = epi_curve_totals["Deaths"].iloc[-1] / (epi_curve_totals["Confirmed"].iloc[-1])

d = pd.DataFrame({
    "Ratio": cfr[cfr != 0],
    "Confirmed": epi_curve_totals["Confirmed"].iloc[-1][cfr != 0]
})

ax = d.plot(
    secondary_y=['Confirmed'], 
    kind='bar',
    figsize=(15,10),
    grid=True,
    yticks=np.arange(0, 1, step=0.05),
    title="Ratio of deaths to confirmed cases"
)

ax.grid(axis='x')

plt.savefig("deaths_confirmed_ratio_%d.png" % timestamp, transparent=False)

In [None]:
threshold = 1
countries_with_cases = (epi_curve["Confirmed"]>=threshold).sum(axis=1)
countries_with_deaths = (epi_curve["Deaths"]>=threshold).sum(axis=1)

d_dict = {}
for i in [1, 10, 100, 1000]:
    d_dict["≥ %d Confirmed" % i] = (epi_curve["Confirmed"]>=i).sum(axis=1)

d = pd.DataFrame(d_dict)
ax = d.plot(
    figsize=(15,10),
    title="Countries reporting cases",
    ylim=(0, d.max(axis=0).iloc[0]),
    yticks=np.arange(0,d.max(axis=0).iloc[0],5)
    #logy=True
)

ax.grid(axis='y')

plt.savefig("countries_with_cases_%d.png" % timestamp, transparent=False)

In [None]:
epi_curve["Confirmed"].diff()["US"]

In [None]:
epi_curve["Confirmed"]["US"]

In [None]:
threshold = 1
countries_with_cases = (epi_curve["Confirmed"].diff()<=1).sum(axis=1)

ax = countries_with_cases.plot(
    figsize=(15,10),
    title="Countries with more fewer cases day-over-day",
    ylim=(0, 110),
    yticks=np.arange(0,110,5)
    #logy=True
)

ax.grid(axis='y')

plt.savefig("countries_with_decreasing_cases_%d.png" % timestamp, transparent=False)

In [None]:
d_dict = {}
for i in [1, 10, 100]:
    d_dict["%d Deaths" % i] = (epi_curve["Deaths"]>=i).sum(axis=1)

d = pd.DataFrame(d_dict)
ax = d.plot(
    figsize=(15,10),
    title="Countries reporting deaths",
    ylim=(0, d.max(axis=0).iloc[0]),
    #logy=True
)

ax.grid(axis='y')

plt.savefig("countries_with_deaths_%d.png" % timestamp, transparent=False)

In [None]:
countries_with_deaths = (epi_curve_totals["Deaths"].sum() != 0).index[epi_curve_totals["Deaths"].sum() != 0]

In [None]:
countries_with_deaths = (epi_curve_totals["Deaths"].sum() != 0).index[epi_curve_totals["Deaths"].sum() != 0]

columns = 3
rows = math.ceil(epi_curve["Confirmed"][countries_with_deaths].shape[1]/columns)

wsize = columns * 8
hsize = rows * 5

cfr_vs_time = epi_curve_totals["Deaths"][countries_with_deaths] / (epi_curve_totals["Confirmed"][countries_with_deaths])
axes = cfr_vs_time.plot(
    subplots=True,
    figsize=(wsize,hsize),
    layout=(rows,columns),
    kind='bar',
    sharex=False,
    title = "Ratio of Deaths to Confirmed Cases",
    yticks=np.arange(0, 1, step=0.05))

ticklabels = [item.strftime('%b %d') for item in cfr_vs_time.index]
for ax in axes:
    for a in ax:
        a.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
        a.set_ylim(0,1)
        a.grid(axis='y')

plt.tight_layout()
plt.savefig("deaths_confirmed_ratio_vs_time_%d.png" % timestamp, transparent=False)

None

In [None]:
countries_with_deaths = (epi_curve_totals["Deaths"].sum() != 0).index[epi_curve_totals["Deaths"].sum() != 0]

columns = 3
rows = math.ceil(epi_curve["Confirmed"][countries_with_deaths].shape[1]/columns)

wsize = columns * 8
hsize = rows * 5

cfr_vs_time = epi_curve_totals["Deaths"]["Iran"] / (epi_curve_totals["Confirmed"]["Iran"])
ax = cfr_vs_time.plot(
    figsize=(15,10),
    kind='bar',
    title = "Ratio of Deaths to Confirmed Cases in Iran",
    yticks=np.arange(0, 1, step=0.05))

ticklabels = [item.strftime('%b %d') for item in cfr_vs_time.index]
ax.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
ax.set_ylim(0,1)
ax.grid(axis='y')

plt.tight_layout()
plt.savefig("iran_deaths_confirmed_ratio_vs_time_%d.png" % timestamp", transparent=False)

None

In [None]:
ax = epi_curve["Confirmed"]\
        .drop(columns=["China"])\
        .plot(
            figsize=(15,10),
            kind='bar',
            stacked=True,
            legend=False,
            title = "Cases Outside China"
)

ticklabels = [item.strftime('%b %d') for item in epi_curve.index]
ax.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))

#ax.legend(loc='center right', bbox_to_anchor=(1.5,0.5))

plt.savefig("stacked_outside_china_%d.png" % timestamp, transparent=False)

None

In [None]:
# Plot an individual country
country = 'US'
ax = pd.DataFrame(
    {"Deaths": epi_curve["Deaths"][country],
     "Confirmed":epi_curve["Confirmed"][country],
     #"Recovered":epi_curve["Recovered"][country]
    }).plot(
        figsize=(15,10),
        kind='bar',
        title = country
    )



ticklabels = [item.strftime('%b %d') for item in epi_curve.index]
ax.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
ax.grid(axis='y')

plt.savefig("%s_%d.png" % (country.replace(" ", "_"), timestamp), transparent=False)

In [None]:
# Plot all countries
for country in epi_curve["Confirmed"].columns:
    ax = pd.DataFrame(
        {"Deaths": epi_curve["Deaths"][country],
         "Confirmed":epi_curve["Confirmed"][country],
         #"Recovered":epi_curve["Recovered"][country]
        }).plot(
            figsize=(15,10),
            kind='bar',
            title = country)



    ticklabels = [item.strftime('%b %d') for item in epi_curve.index]
    ax.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
    ax.grid(axis='y')

    plt.savefig("%s_%d.png" % (country.replace(" ", "_"), timestamp), transparent=False)
    plt.close()

In [None]:
from yattag import Doc

other_graphs = [
    "all_countries_%d.png" % timestamp,
    "stacked_outside_china_%d.png" % timestamp,
    "deaths_confirmed_ratio_%d.png" % timestamp,
    "deaths_confirmed_ratio_vs_time_%d.png" % timestamp,
    "countries_with_cases_%d.png" % timestamp,
    "countries_with_deaths_%d.png" % timestamp,
    "countries_with_increasing_cases_%d.png" % timestamp,
    "countries_with_decreasing_cases_%d.png" % timestamp,
    "doubling_time_%d.png" % timestamp,
    "day_over_day_percent_change_%d.png" % timestamp]

doc, tag, text = Doc().tagtext()

with tag('html'):
    with tag('body'):
        for g in other_graphs:
            with tag('div', id=g):
                with tag('a', href=g):
                    doc.stag('img', src = g)
        for country in epi_curve["Confirmed"].columns:
            with tag('div', id=country):
                image_name = "%s_%d.png" % (country.replace(" ", "_"), timestamp)
                doc.stag('br')
                text(country)
                doc.stag('br')
                with tag('a', href = image_name):
                    doc.stag('img', src = image_name)

index_file = open("index.html", "w")
index_file.write(doc.getvalue())
index_file.close()

doc.getvalue()