### 💉 COVID-19 Cases 🦠 Visualization using Bar Chart Race

### Final result of this notebook: 
Video that diaplay the most 10 countries have cases in the world over interval 01-2020 to 05-2021.

![](https://github.com/MhmdSyd/Bar_Chart_Race_Gif/blob/main/COVID_Sub.gif?raw=true)

### This notebook divided into 2 main parts:

> EDA

> Visualiztion

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt

import seaborn as sns 

from datetime import datetime

# display video of bar chart for all data
from IPython.display import Video

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import warnings
warnings.filterwarnings("ignore")

### Install bar_chart_race and ffmpeg that need in Visualization

In [None]:
! pip install bar_chart_race

In [None]:
# ! pip install ffmpeg

In [None]:
# ! conda install -c conda-forge ffmpeg 

In [None]:
# import bar chart race package that will need to visualization.
import bar_chart_race as bcr

## EDA

In [None]:
# read dataset by pandas and diaplay 5 frist 5 rows.
covid_df = pd.read_csv("../input/novel-corona-virus-2019-dataset/covid_19_data.csv", index_col="SNo")
covid_df.head()

In [None]:
# from this line below sure that some of countries not have data in same interval
covid_df.ObservationDate.value_counts()

In [None]:
# group data by country and date then sum cases at same date for all countries.
# then create new data to store change.
data = covid_df.groupby(by=["Country/Region", "ObservationDate"]).agg({'Confirmed' : ['sum'],
                                                                       'Deaths':['sum'],
                                                                       'Recovered': ['sum']})
# need the index columns, so reset index for data .
data =data.reset_index()
# convert ObservationDate to datetime type.
data.ObservationDate = pd.to_datetime(data.ObservationDate)
# sort data based on ObservationDate columns (date).
data = data.sort_values("ObservationDate")
# rename columns of data.
data.columns = ["Country/Region", "ObservationDate", "Confirmed", "Deaths", "Recovered"]

In [None]:
# Slicing data to get confirmed columns in all days for Egypt Country.
data.loc[data["Country/Region"]=="Egypt",["ObservationDate", "Confirmed"]]

In [None]:
# create new DataFrame to display ObservationDate as index and Countries as Columns.
df_covid = pd.DataFrame(data.ObservationDate.unique())

# rename df_covid data column
df_covid.columns = ["ObservationDate"]

# iterate on all countries to splite every country data and merge it as a column with df_covid data.
for country in [*data["Country/Region"].unique()]:
#  get data for country and split only two columns data and cases.
    test_data = data.loc[data["Country/Region"]==country,["ObservationDate", "Confirmed"]]
# rename columns for temporiery data.
    test_data.columns = ["ObservationDate", country]
# left merge on temporiery data with df_covid data based on ObservationDate columns in two datasets.
    df_covid = df_covid.merge(test_data, how='left', left_on='ObservationDate', right_on="ObservationDate")

# replace nan values by zero.
df_covid = df_covid.fillna(0)

# set date column as index.
df_covid.set_index("ObservationDate", inplace=True)

# convert index column type to datetime column.
df_covid.index = pd.to_datetime(df_covid.index)

# there is Others column in data that not need it, so i will drop it. 
df_covid.drop("Others",axis=1, inplace=True)

# display last 5 rows from data.
df_covid.tail()

## Visualiztion

In [None]:
# this function calulate cumulative sum for all cases in the world for every day.
def summary(values, ranks):
    total_deaths = int(round(values.sum(), -2))
    s = f'Total Cases - {total_deaths:,.0f}'
    return {'x': .99, 'y': .05, 's': s, 'ha': 'right',
            'size': 10,'color':'#733f6e'}

In [None]:
# funcion use to create bar chart race need two parameters pandas data and file name.
def create_sub_bar_chart_race(data,file_name):
# start to create bar chart race.
    plt.style.use("seaborn")
    fig, ax = plt.subplots(figsize=(10,7), dpi=120)
    ax.set_facecolor("#f2f0f0")
    ax.set_title('COVID-19 Cases Race by Country', 
                 fontdict={'family': 'Helvetica', 'size': '20', 'color': '#148585'})
    
    _ = bcr.bar_chart_race(df=data,
            filename=file_name,
            n_bars=10, fig=fig,
            orientation='h',
            fixed_order=False,
            bar_size=.85,
            shared_fontdict={'family': 'Helvetica', 'weight': 'normal', 'color': '#213030'},
            period_label={'x': .97, 'y': .15, 'ha': 'right', 'va': 'center',
                          'color':"#b01296", "size":14, "weight":"semibold"},
            period_fmt='%b %d, %Y',
            figsize=(10,7),
            dpi=120,
            period_summary_func=summary,
#             cmap='Paired',
            bar_label_size=8,
            tick_label_size=5,
            steps_per_period=20,
            period_length=400,
            interpolate_period=True,
            filter_column_colors=True,
            bar_kwargs={'alpha': .8, "lw":0})
    plt.close()

In [None]:
# create a gif for bar chart race for sub of data.
create_sub_bar_chart_race(df_covid.iloc[50:80],"/kaggle/working/COVID_Sub.gif")

### COVID-19 sub bar_chart_race animator gif display:
![](./COVID_Sub.gif "COVID.gif")

In [None]:
# print Start Time of Processing ffmpeg video.
current_time = datetime.now().strftime("%H:%M:%S")
print("Start Time of Processing =", current_time)

In [None]:
# create a mp4 video for bar chart race for all of data.
create_sub_bar_chart_race(df_covid,"/kaggle/working/COVID_Full.mp4")

In [None]:
# print end time of processing
current_time = datetime.now().strftime("%H:%M:%S")
print("End Time of Processing =", current_time)

In [None]:
# display the output video covid-19 of bar chart race.
Video("./COVID_Full.mp4",width=600)