In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px # plotly express
%matplotlib inline 
import pycountry
from geopy.geocoders import Nominatim

# Input data files are available in the "../input/" directory.

import os
file_input=['/kaggle/input','../../../datasets/extracts/']
files={}
for dirname, _, filenames in os.walk(file_input[1]):
    for filename in filenames:
        files[filename]=os.path.join(dirname, filename)
        print(filename)

# Any results you write to the current directory are saved as output.

.DS_Store
countryLockdowndatesJHUMatch.csv
countryLockdowndates.csv
COVID19_open_line_list.csv
time_series_covid_19_confirmed_US.csv
time_series_covid_19_recovered.csv
time_series_covid_19_deaths_US.csv
covid_19_data.csv
COVID19_line_list_data.csv
time_series_covid_19_deaths.csv
time_series_covid_19_confirmed.csv


In [2]:
# Lockdown dates
lockdown_df=pd.read_csv(files['countryLockdowndates.csv'])
lockdown_df['LockDown Date']=pd.to_datetime(lockdown_df['Date'],format='%d/%m/%Y')
lockdown_df.sort_values('LockDown Date',inplace=True)

df=pd.read_csv(files['time_series_covid_19_confirmed.csv'])

# Pre-processing to remove negative data, if exists
df[df.columns[df.columns.str.contains('/20')]]=df[df.columns[df.columns.str.contains('/20')]].clip(lower=0)

# Column names as variables for ease-of-use
country_col='Country/Region'
confirmed_col='Confirmed Cases'

df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/17/20,4/18/20,4/19/20,4/20/20,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,906,933,996,1026,1092,1176,1279,1351,1463,1531
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,539,548,562,584,609,634,663,678,712,726
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,2418,2534,2629,2718,2811,2910,3007,3127,3256,3382
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,696,704,713,717,717,723,723,731,738,738
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,19,24,24,24,24,25,25,25,25,26


In [3]:
# Getting country 3-letter ISO codes for choropleth
locator = Nominatim(user_agent="myGeocoder")
def getIsoCodes(country_name,location):
    if pycountry.countries.get(name=country_name) is not None:
        return pycountry.countries.get(name=country_name).alpha_3
    elif pycountry.countries.get(alpha_2=country_name) is not None:
        return pycountry.countries.get(alpha_2=country_name).alpha_3
    else:
        location = locator.reverse(location)
        if 'address' in location.raw and'country_code' in location.raw['address'] and pycountry.countries.get(alpha_2=location.raw['address']['country_code'].upper()) is not None:
            return pycountry.countries.get(alpha_2=location.raw['address']['country_code'].upper()).alpha_3
        
        else:
            return ''


df['iso_codes']=df[[country_col,'Lat','Long']]\
        .apply(lambda record: getIsoCodes(record[country_col],', '.join(record[['Lat','Long']].astype(str).values)),axis=1)


In [18]:
confirmed_df= pd.melt(df[df.columns.difference(['Province/State','Lat','Long'])].groupby([country_col,'iso_codes']).sum().reset_index(),id_vars=[country_col,"iso_codes"], var_name="Date", value_name=confirmed_col)
confirmed_df= pd.merge(confirmed_df,lockdown_df[[country_col,'LockDown Date']].groupby(country_col).first(),left_on=country_col,right_on=country_col,how='left')
confirmed_df['Date']=pd.to_datetime(confirmed_df['Date'])
confirmed_df.sort_values('Date',inplace=True)


## Spread of Coronavirus over the time

In [20]:
fig=px.choropleth(confirmed_df,
               locations='iso_codes',
               hover_name=country_col,
               animation_frame=confirmed_df['Date'].astype(str),
               color=confirmed_col,
               color_continuous_scale=px.colors.sequential.Rainbow,
               projection="natural earth",
               title="Confirmed Cases over the world"
              )
fig.show()

## Spread of Coronavirus over time for top 10 infected countries

In [7]:
# top 10 countries
top_affected_countries=df.sort_values(confirmed_df['Date'].max().strftime('%-m/%-d/%y'),ascending=False)[country_col].iloc[:10].values

confirmed_df=confirmed_df[confirmed_df[country_col].isin(top_affected_countries)].sort_values('Date')

fig=px.line(confirmed_df, 
            color=country_col, 
            x='Date',y=confirmed_col,
            title='Confirmed Case vs Date for top 10 infected countries')
fig.update_xaxes(rangeslider_visible=True)
fig.show()

## Rate of spread of Coronavirus over time for top 10 infected countries

In [8]:
confirmed_pct_df=pd.concat([confirmed_df,confirmed_df.groupby([country_col])[confirmed_col].pct_change().rename('Percentage Change')*100],axis=1)

fig=px.line(confirmed_pct_df, 
            color=country_col, 
            x='Date',
            y='Percentage Change',
            title='Percentage Change each day for top 10 infected countries')
fig.update_layout(yaxis = {'ticksuffix':'%'})
fig.update_xaxes(rangeslider_visible=True)
fig.show()

## Did Lockdown effect the spread of coronavirus?

*To answer that, we need to evaluate the rate of spread before and after lockdown. Average delta change and deviation from delta change mean will tell us the effect of lockdown.*

> Lower mean and low standard deviation will tell if the lockdown was successful in reducing the spread.

In [9]:
confirmed_pct_df['Percentage Change']=confirmed_pct_df[[confirmed_col,'Percentage Change']]\
    .apply(lambda x: x['Percentage Change'] if x['Percentage Change']!= np.inf else x[confirmed_col]*100,axis=1)
confirmed_pct_df['After LockDown']=(confirmed_pct_df['Date']>confirmed_pct_df['LockDown Date']).astype(str)


Mean_Median_Confirmed_df=confirmed_pct_df[[country_col,'After LockDown','Percentage Change']]\
    .groupby([country_col,'After LockDown']).agg(['mean','std'])
Mean_Median_Confirmed_df.columns=Mean_Median_Confirmed_df.columns.droplevel(0)
Mean_Median_Confirmed_df.rename({'mean':'Mean','std':'Standard Deviation'},axis=1,inplace=True)

In [11]:
Mean_Median_Confirmed_df=Mean_Median_Confirmed_df.reset_index()

fig=px.bar(Mean_Median_Confirmed_df,
       x=country_col,
       y='Standard Deviation',
       color='After LockDown',
       barmode='group',
       title='Standard Deviation Comparison of Percentage Change Before & After Lockdown for top 10 infected countries')
fig.show()
fig=px.bar(Mean_Median_Confirmed_df,
       x=country_col,
       y='Mean',
       color='After LockDown',
       barmode='group',
       title='Mean Comparison of Percentage Change Before & After Lockdown for top 10 infected countries')     
fig.show()

The above graph shows Mean and Standard Deviation of delta change before and After Lockdown.

### **It proves the that the Lockdown has helped control the spread of Coronavirus, and reduced the chances of sudden spike in covid cases for top affected countries.**