# Realtime Covid-19 Dataset Analysis

In [1]:
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt

### Assigning the links for various _covid-19 datasets_

In [2]:
confirmed_cases = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
death_cases = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
recovery_cases = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
country_cases = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv"
country_cumulative = "https://covid19.who.int/WHO-COVID-19-global-table-data.csv"

### Creating the dataframes of the datasets and displaying dimension of each dataset

In [3]:
confirmed_df = pd.read_csv(confirmed_cases)
print(confirmed_df.shape)
deaths_df = pd.read_csv(death_cases)
print(deaths_df.shape)
recovery_df = pd.read_csv(recovery_cases)
print(recovery_df.shape)
cases_country_df = pd.read_csv(country_cases)
print(cases_country_df.shape)
country_cumulative_df = pd.read_csv(country_cumulative)
print(country_cumulative_df.shape)

(289, 1055)
(289, 1055)
(274, 1055)
(201, 16)
(238, 12)


### The first 5 records of the _confirmed cases_ dataframe

In [4]:
confirmed_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,11/28/22,11/29/22,11/30/22,12/1/22,12/2/22,12/3/22,12/4/22,12/5/22,12/6/22,12/7/22
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,205802,205830,205907,206073,206133,206145,206206,206273,206331,206414
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,333330,333338,333343,333360,333381,333391,333408,333413,333455,333472
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,271079,271082,271090,271096,271100,271102,271107,271113,271122,271128
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,46824,46824,47219,47219,47219,47219,47219,47219,47219,47446
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,104491,104491,104676,104676,104676,104676,104676,104750,104750,104808


In [5]:
cases_country_df.columns

Index(['Country_Region', 'Last_Update', 'Lat', 'Long_', 'Confirmed', 'Deaths',
       'Recovered', 'Active', 'Incident_Rate', 'People_Tested',
       'People_Hospitalized', 'Mortality_Rate', 'UID', 'ISO3', 'Cases_28_Days',
       'Deaths_28_Days'],
      dtype='object')

### Total _number of records_ in the confirmed cases dataframe

In [6]:
confirmed_df['Country/Region'].nunique()

201

In [7]:
confirmed_df = confirmed_df.replace(np.nan, '', regex=True)
deaths_df = deaths_df.replace(np.nan, '', regex=True)
recovery_df = recovery_df.replace(np.nan, '', regex=True)
cases_country_df = cases_country_df.replace(np.nan, '', regex=True)
country_cumulative_df = country_cumulative_df.replace(np.nan, '', regex=True)

In [8]:
global_data = cases_country_df.copy().drop(['Lat','Long_','Country_Region','Last_Update','Mortality_Rate','UID','ISO3','People_Tested','People_Hospitalized'], axis = 1)
global_summary = pd.DataFrame(global_data.sum()).transpose()


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



In [9]:
global_summary

Unnamed: 0,Confirmed,Deaths,Recovered,Active,Cases_28_Days,Deaths_28_Days
0,647326419,6647904,,,12852673,39060


In [10]:
confirmed_ts = confirmed_df.copy().drop(['Lat','Long','Country/Region','Province/State'],axis=1)
confirmed_ts_summary = confirmed_ts.sum()

In [11]:
confirmed_ts_summary

1/22/20          557
1/23/20          657
1/24/20          944
1/25/20         1437
1/26/20         2120
             ...    
12/3/22    645042169
12/4/22    645298233
12/5/22    645816104
12/6/22    646398544
12/7/22    647024269
Length: 1051, dtype: int64

## Confirmed Covid-19 Cases Vs Time (_Global_)

In [12]:
fig1 = go.Figure(data=go.Scatter(x=confirmed_ts_summary.index, y = confirmed_ts_summary.values, mode='lines+markers'))
fig1.update_layout(title='Total CoronaVirus Confirmed Cases (Globally)', yaxis_title='Confirmed Cases', xaxis_tickangle=315)
fig1.show()

In [13]:
color_arr = px.colors.qualitative.Dark24

In [14]:
def draw_plot(ts_array, ts_label, title, colors, mode_size, line_size, x_axis_title, y_axis_title, tickangle=0, y_axis_type='', additional_annotations = []):
    fig = go.Figure()
    
    for index, ts in enumerate(ts_array):
        fig.add_trace(go.Scatter(x=ts.index,
                                y=ts.values,
                                name=ts_label[index],
                                line=dict(color=colors[index], width=line_size[index]),connectgaps=True))
    
    x_axis_dict = dict(showline=True,
                      showgrid=True,
                      showticklabels=True,
                      linecolor='rgb(204,204,204)',
                      linewidth=2,
                      ticks='outside',
                      tickfont=dict(family='Arial',size=12,color='rgb(82,82,82)'))
    if x_axis_title:
        x_axis_dict['title'] = x_axis_title

    if tickangle > 0:
        x_axis_dict['tickangle'] = tickangle

    y_axis_dict = dict(showline=True,
                      showgrid=True,
                      showticklabels=True,
                      linecolor='rgb(204,204,204)',
                      linewidth=2,)
    if y_axis_type := "":
        y_axis_dict['type'] = y_axis_type
    
    if y_axis_title:
        y_axis_dict['title'] = y_axis_title

    fig.update_layout(xaxis=x_axis_dict,
                     yaxis=y_axis_dict,
                     autosize=True,
                     margin=dict(autoexpand=True,l=100,r=20,t=110,),
                     showlegend=True
                     )
    annotations = []
    
    annotations.append(dict(xref='paper',yref='paper',x=0.0,y=1.05,xanchor='left',yanchor='bottom',
                           text = title,
                           font = dict(family='Arial',size=16,color='rgb(37,37,37)'),showarrow=False))
    
    if(len(additional_annotations) > 0):
        annotations.append(additional_annotations)
    
    fig.update_layout(annotations=annotations)
    
    return fig

In [15]:
confirmed_agg_ts = confirmed_df.copy().drop(['Lat','Long','Country/Region','Province/State'],axis=1).sum()
death_agg_ts = deaths_df.copy().drop(['Lat','Long','Country/Region','Province/State'],axis=1).sum()
recovered_agg_ts = recovery_df.copy().drop(['Lat','Long','Country/Region','Province/State'],axis=1).sum()

active_agg_ts = pd.Series(
    data=np.array([x1-x2-x3 for (x1,x2,x3) in zip(confirmed_agg_ts.values,death_agg_ts.values,recovered_agg_ts.values)]),
    index=confirmed_agg_ts.index)

## Covid-19 Case studies - Comparing the relationship between the number of confirmed, active, recovered and deaths due to Covid-19

In [16]:
ts_array = [confirmed_agg_ts, active_agg_ts, recovered_agg_ts, death_agg_ts]
labels = ['Confirmed', 'Active', 'Recovered', 'Deaths']
colors = [color_arr[5], color_arr[0], color_arr[2], color_arr[3]]
mode_size = [8,8,12,8]
line_size = [2,2,4,2]

fig2 = draw_plot(ts_array = ts_array,
                ts_label = labels,
                title = 'Covid-19 Case Status',
                colors = colors, mode_size = mode_size,
                line_size = line_size,
                x_axis_title = "Date",
                y_axis_title = "Case Count",
                tickangle = 315,
                y_axis_type = '', additional_annotations = [])
fig2.show()

In [17]:
cases_country_df.copy().drop(
['Lat','Long_','Last_Update','People_Tested','People_Hospitalized','UID', 'ISO3'],axis=1).sort_values(
'Confirmed',ascending=False).reset_index(drop=True).style.bar(align="left",width=98,color='#d65f5f')

Unnamed: 0,Country_Region,Confirmed,Deaths,Recovered,Active,Incident_Rate,Mortality_Rate,Cases_28_Days,Deaths_28_Days
0,US,99269099,1083641,,,30130.275577,1.09162,1311648,9177
1,India,44674190,530647,,,3237.249858,1.187816,15120,138
2,France,38461021,160495,,,58944.235314,0.417293,1199657,1837
3,Germany,36690235,158684,,,44122.688139,0.432497,718913,3497
4,Brazil,35396191,690229,,,16652.375525,1.950009,540699,1834
5,"Korea, South",27548821,30908,,,53733.684424,0.112194,1511801,1377
6,Japan,25709940,51070,,,20327.84631,0.198639,2674070,3485
7,Italy,24488080,181733,,,40501.719531,0.742128,846069,2297
8,United Kingdom,24281031,213508,,,35767.359351,0.87932,96843,345
9,Russia,21336448,384546,,,14620.568713,1.802296,156416,1604


### The columns of the cumulative collection of country dataframe

In [18]:
print(country_cumulative_df.columns)

Index(['Name', 'WHO Region', 'Cases - cumulative total',
       'Cases - cumulative total per 100000 population',
       'Cases - newly reported in last 7 days',
       'Cases - newly reported in last 7 days per 100000 population',
       'Cases - newly reported in last 24 hours', 'Deaths - cumulative total',
       'Deaths - cumulative total per 100000 population',
       'Deaths - newly reported in last 7 days',
       'Deaths - newly reported in last 7 days per 100000 population',
       'Deaths - newly reported in last 24 hours'],
      dtype='object')


In [19]:
country_cumulative_df.head()

Unnamed: 0,Name,WHO Region,Cases - cumulative total,Cases - cumulative total per 100000 population,Cases - newly reported in last 7 days,Cases - newly reported in last 7 days per 100000 population,Cases - newly reported in last 24 hours,Deaths - cumulative total,Deaths - cumulative total per 100000 population,Deaths - newly reported in last 7 days,Deaths - newly reported in last 7 days per 100000 population,Deaths - newly reported in last 24 hours
Global,,642924560,8248.396062,2853171,36.604737,304100,6625029,84.995762,7424,0.095246,624,
United States of America,Americas,97618392,29491.725,296333,89.526,0,1071245,323.636,1744,0.527,0,
India,South-East Asia,44674190,3237.25,1552,0.112,241,530647,38.453,25,0.002,9,
France,Europe,37180489,57166.137,410382,630.975,75728,155836,239.603,504,0.775,93,
Germany,Europe,36690235,44116.492,178570,214.713,28115,158684,190.802,125,0.15,31,


### Refining the dataframe

In [20]:
country_cumulative_df = country_cumulative_df.drop(['WHO Region','Cases - cumulative total per 100000 population','Cases - newly reported in last 7 days per 100000 population','Deaths - cumulative total per 100000 population','Deaths - newly reported in last 7 days','Deaths - newly reported in last 7 days per 100000 population'],axis=1)
country_cumulative_df = country_cumulative_df.set_index('Name')
country_cumulative_df['Recovered'] = country_cumulative_df['Cases - cumulative total'].astype(int) - country_cumulative_df['Deaths - cumulative total'].astype(int)
country_cumulative_df = country_cumulative_df.drop(['Cases - cumulative total', 'Deaths - newly reported in last 24 hours'], axis=1)

ValueError: ignored

### Countries available in our dataset

In [None]:
print("Countries available in the Dataset")
country_cumulative_df.copy().drop(
['Cases - newly reported in last 7 days','Cases - newly reported in last 24 hours','Deaths - cumulative total','Recovered'],axis=1).sort_values('Name',ascending=True).reset_index(drop=False).style.bar(align="left",width=80)

### Getting the country name from the user

In [None]:
country = input("Enter country name : ").title()
is_country_exist = True
if 'Of' in country:
    country = country.replace('Of','of')
try:
    country_cumulative_df.loc[[country]]
except:
    print("Sorry, country not found!")
    is_country_exist = False

## Visualization for the number of recovered, deaths, newly reported in last 24 hours and newly reported in last 7 days due to Covid-19

In [None]:
if(is_country_exist):
    recovered = country_cumulative_df.at[country,'Recovered']
    deaths = country_cumulative_df.at[country,'Deaths - cumulative total']
    new24 = country_cumulative_df.at[country, 'Cases - newly reported in last 24 hours']
    new7 = country_cumulative_df.at[country, 'Cases - newly reported in last 7 days']
    fig3 = plt.figure(figsize=(8,8))
    font = {'family': 'Arial',
            'weight': 'bold',
            'size'  : 12
           }
    plt.rc('font', **font)
    plt.rcParams.update({'text.color' : "black", 'axes.labelcolor' : "black"})
    plt.pie([recovered, deaths, new24, new7],
            labels = ['Recovered','Deaths','New cases - 24hrs', 'New cases - 7days'],
            colors = ['lightgreen','red','pink','orange'],
            explode = [0.2,0.02,0.2,0.1],
            startangle = 180,
            autopct = '%1.1f%%'
           )
    plt.title(country)
    plt.legend()
    plt.show()
else:
    print("Please enter a valid country name!")