In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt 
import matplotlib as mpl 
import seaborn as sns
import matplotlib.ticker as tick
from IPython.core.display import display, HTML

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Intro

Coronavirus disease 2019 (COVID-19) time series listing confirmed cases, reported deaths and reported recoveries. Data is disaggregated by country (and sometimes subregion). Coronavirus disease (COVID-19) is caused by the Severe acute respiratory syndrome Coronavirus 2 (SARS-CoV-2) and has had a worldwide effect. On March 11 2020, the World Health Organization (WHO) declared it a pandemic, pointing to the over 118,000 cases of the Coronavirus illness in over 110 countries and territories around the world at the time.

This dataset includes time series data tracking the number of people affected by COVID-19 worldwide, including:

- confirmed tested cases of Coronavirus infection
- the number of people who have reportedly died while sick with Coronavirus
- the number of people who have reportedly recovered from it

In this notebook I'm trying to visualize the current condition of COVID-19 globaly, and specificaly the current condition in India. Hope you like my notebook!

## Note

If you find this notebook helpful, please feel free to upvote and if there are any criticism or feedback please don't hesitate to comment! cheers!😃


In [None]:
!pip install pywaffle
from pywaffle import Waffle

# Colors

In [None]:
colors_blue = ["#132C33", "#264D58", '#17869E', '#51C4D3', '#B4DBE9']
colors_dark = ["#1F1F1F", "#313131", '#636363', '#AEAEAE', '#DADADA']
colors_red = ["#331313", "#582626", '#9E1717', '#D35151', '#E9B4B4']
colors_mix = ["#17869E", '#264D58', '#179E66', '#D35151', '#E9DAB4', '#E9B4B4', '#D3B651', '#6351D3']
colors_div = ["#132C33", '#17869E', '#DADADA', '#D35151', '#331313']

sns.palplot(colors_blue)
sns.palplot(colors_dark)
sns.palplot(colors_red)
sns.palplot(colors_mix)
sns.palplot(colors_div)

In [None]:
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.left'] = False

mpl.rcParams['axes.titlecolor'] = colors_dark[0]
mpl.rcParams['axes.labelcolor'] = colors_dark[0]

# tick
mpl.rcParams['xtick.color'] = colors_dark[0]
mpl.rcParams['ytick.color'] = colors_dark[0]
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12


# legend 
mpl.rcParams['legend.edgecolor'] = colors_dark[0]

In [None]:
def reformat_large_tick_values(tick_val, pos):
    """
    Turns large tick values (in the billions, millions and thousands) such as 4500 into 4.5K and also appropriately turns 4000 into 4K (no zero after the decimal).
    """
    if tick_val >= 1000000000:
        val = round(tick_val/1000000000, 1)
        new_tick_format = '{:}B'.format(val)
    elif tick_val >= 1000000:
        val = round(tick_val/1000000, 1)
        new_tick_format = '{:}M'.format(val)
    elif tick_val >= 1000:
        val = round(tick_val/1000, 1)
        new_tick_format = '{:}K'.format(val)
    elif tick_val < 1000:
        new_tick_format = round(tick_val, 1)
    else:
        new_tick_format = tick_val

    # make new_tick_format into a string value
    new_tick_format = str(new_tick_format)
    
    # code below will keep 4.5M as is but change values such as 4.0M to 4M since that zero after the decimal isn't needed
    index_of_decimal = new_tick_format.find(".")
    
    if index_of_decimal != -1:
        value_after_decimal = new_tick_format[index_of_decimal+1]
        if value_after_decimal == "0":
            # remove the 0 after the decimal point since it's not needed
            new_tick_format = new_tick_format[0:index_of_decimal] + new_tick_format[index_of_decimal+2:]
            
    return new_tick_format

# Worldwide Analysis

In [None]:
data = pd.read_csv("/kaggle/input/covid-19/data/worldwide-aggregate.csv")
data['New Case'] = data.Confirmed - data.shift(1).Confirmed
data['New Deaths'] = data.Deaths - data.shift(1).Deaths
data['New Case MA 20'] = data.rolling(30).mean()['New Case']
data['New Deaths MA 20'] = data.rolling(30).mean()['New Deaths']
data['Death Rate Daily'] = data.Deaths / data.Confirmed * 100
data['Date'] = pd.to_datetime(data['Date'])

# Global Daily Cases

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))

line = ax.plot(
    data.Date, data['New Case'],
    color=colors_dark[0],
    alpha=0.3,
    label='Daily Cases',
)
MA = ax.plot(
    data.Date, data['New Case MA 20'],
    linestyle='--',
    color=colors_blue[2],
    linewidth=2,
    label='Moving Average 20 days'
)

xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
ax.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))

ax.grid(axis='y', alpha=0.3)
ax.set_axisbelow(True)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=5, borderpad=1, frameon=False, fontsize=12)
ax.set_ylabel("Cases", fontsize=14, labelpad=10, fontweight='bold', color=colors_dark[2])

ax.text(
    s="Global COVID-19 Cases",
    fontsize=24,
    color=colors_dark[1],
    x=xmin, y=ymax*1.2
)
ax.text(
    s="{:,}".format(data['Confirmed'].max()),
    fontsize=48,
    fontweight='bold',
    color=colors_blue[2],
    x=xmin, y=ymax*1.06
)

ax.text(
    s="Total Confirmed Cases",
    fontsize=18,
    color=colors_dark[2],
    x=xmin, y=ymax*1
)
ax.xaxis.set_major_formatter(mpl.dates.DateFormatter("%y-%m-%d"))

plt.show()
plt.tight_layout()

# Global Daily Deaths

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))

line = ax.plot(
    data.Date, data['New Deaths'],
    color=colors_dark[0],
    alpha=0.3,
    label='Daily Deaths',
)
MA = ax.plot(
    data.Date, data['New Deaths MA 20'],
    linestyle='--',
    color=colors_red[3],
    linewidth=2,
    label='Moving Average 20 days'
)

xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
ax.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))

ax.grid(axis='y', alpha=0.3)
ax.set_axisbelow(True)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=5, borderpad=1, frameon=False, fontsize=12)
ax.set_ylabel("Deaths", fontsize=14, labelpad=10, fontweight='bold', color=colors_dark[2])

ax.text(
    s="Global COVID-19 Deaths",
    fontsize=24,
    color=colors_dark[1],
    x=xmin, y=ymax*1.2
)
ax.text(
    s="{:,}".format(data['Deaths'].max()),
    fontsize=48,
    fontweight='bold',
    color=colors_red[3],
    x=xmin, y=ymax*1.06
)

ax.text(
    s="Total Deaths",
    fontsize=18,
    color=colors_dark[2],
    x=xmin, y=ymax*1
)
ax.xaxis.set_major_formatter(mpl.dates.DateFormatter("%y-%m-%d"))

plt.show()
plt.tight_layout()
plt.tight_layout()

# Mortality Rate

In [None]:
df = data.iloc[-1]
death_rate = df['Deaths'] / df['Confirmed'] * 100
display(HTML(
    '''
    <h1 style='padding:50px; font-weight:400; color:{};{}'>
        Out of 
        <span style="color:{}; font-weight:500">{:,}</span>
        confirmed cases globaly,
        <span style="color:{}; font-weight:500">{:,}</span>
        dies<br/>
        making the death rate of
        <span style="color:{}; font-weight:500">{:.2f}%</span>.
    </h1>
    '''.format(
        colors_dark[0], 
        colors_dark[0], 
        colors_blue[2], 
        df['Confirmed'],
        colors_red[3],
        df['Deaths'],
        colors_red[3], 
        death_rate
    )
))

In [None]:
df = data.iloc[-1]
death_rate = df['Deaths'] / df['Confirmed'] * 100
fig = plt.figure(
    FigureClass=Waffle, 
    rows=5, 
    columns=20, 
    values=[100-death_rate, death_rate],
    figsize=(16, 8),
    colors=[colors_dark[-1], colors_red[3]],
    icons='child',
    labels=["Not Dead", "Dead"],
    legend={'loc': 'lower left', 'bbox_to_anchor': (0.4, -0.2), 'ncol': len(data), 'framealpha': 0, 'fontsize': 12},
)

ax = fig.gca()
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
ax.text(
    s="2 out of 100 People\nDied On Covid",
    fontsize=32,
    fontweight='bold',
    color=colors_dark[1],
    x=xmin, y=ymax*1.25
)

ax.text(
    s="The death rate is equivalent to: 2 deaths out of 100 people\nit may seems harmless, but at a higher scale it is highly deady",
    fontsize=14,
    color=colors_dark[2],
    x=xmin, y=ymax*1.1
)

plt.show()
plt.tight_layout()

# Death Rate Overtime

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))

line = ax.fill_between(
    data.Date, data['Death Rate Daily'],
    color=colors_red[2],
    label='Daily Death rate',
    alpha=0.7
)

xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
ax.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))

ax.grid(axis='y', alpha=0.3)
ax.set_axisbelow(True)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=5, borderpad=1, frameon=False, fontsize=12)
ax.set_ylabel("Death rate", fontsize=14, labelpad=10, fontweight='bold', color=colors_dark[2])

ax.text(
    s="Has Covid Slowed Down?",
    fontsize=32,
    color=colors_dark[1],
    x=xmax-250, y=ymax*0.93,
    fontweight='bold'
)

ax.text(
    s="With the death rate decreasing overtime\nwe can tell that it has slowed down.\n\nBut looking from past experience, it might go up again\nif no proper handling is being done",
    fontsize=14,
    color=colors_dark[2],
    x=xmax-250, y=ymax*0.7
)
ax.xaxis.set_major_formatter(mpl.dates.DateFormatter("%y-%m-%d"))
                             
plt.show()
plt.tight_layout()

# Recent Events | India Covid Tsunami

In [None]:
data = pd.read_csv("/kaggle/input/covid-19/data/countries-aggregated.csv")
data['Date'] = pd.to_datetime(data['Date'])
data['New Case'] = data.Confirmed - data.Confirmed.shift(1)

In [None]:
df = data[data.Country == 'India']
df = df.drop(df.tail(1).index)

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))

low = df[df['Date'] == '2021-03-31']['Confirmed'].values[0]
high = df[df['Date'] >= '2021-04']['Confirmed'].sum()
rate = (high-low) / low
df1 = df[df['Date'] <= '2021-03-31']
df2 = df[df['Date'] >= '2021-04-01']


line1 = ax.plot(
    df1.Date, df1['New Case'],
    color=colors_dark[0],
    alpha=0.3,
    label='Daily Cases',
)

line2 = ax.plot(
    df2.Date, df2['New Case'],
    color=colors_red[3],
    label='Daily Cases',
)

vline = ax.axvline(
    x=pd.Timestamp('2021-04-01'),
    color=colors_dark[3],
    linestyle='--'
)

xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
ax.yaxis.set_major_formatter(tick.FuncFormatter(reformat_large_tick_values))

ax.grid(axis='y', alpha=0.3)
ax.set_axisbelow(True)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=5, borderpad=1, frameon=False, fontsize=12)
ax.set_ylabel("Cases", fontsize=14, labelpad=10, fontweight='bold', color=colors_dark[2])

ax.text(
    s="India COVID-19 Tsunami",
    fontsize=24,
    color=colors_dark[1],
    x=xmin, y=ymax*1.2
)
ax.text(
    s="{:.2f}%".format(rate),
    fontsize=48,
    fontweight='bold',
    color=colors_red[3],
    x=xmin, y=ymax*1.06
)

ax.text(
    s="Case Increase Since 30 Apr 2021",
    fontsize=18,
    color=colors_dark[2],
    x=xmin, y=ymax*1
)

ax.xaxis.set_major_formatter(mpl.dates.DateFormatter("%y-%m-%d"))

plt.show()
plt.tight_layout()

In [None]:
new_case = df[df.Date >= "2021-03-01"].sum()['New Case']
display(HTML(
    '''
    <h1 style='padding:1px 50px; font-weight:400; color:{};{}'>
        During the spike (01 Apr 2021 - today) daily <br/>
        cases continue to <span style='font-weight: 500'>increase exponentially</span> <br/> 
        with total of <span style="color:{}; font-weight:500">{:,}</span> new cases since the start of April 2021
    </h1>
    '''.format(
        colors_dark[0], 
        colors_dark[0], 
        colors_red[3],
        int(new_case),

    )
))

## WIll be continued