In [70]:
# importing the required libraries
import numpy as np
import pandas as pd
from datetime import date
import random

# Visualisation libraries
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
%matplotlib inline
import seaborn as sns
from IPython.display import Markdown
import plotly as ply
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as py
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, plot, iplot, download_plotlyjs
import folium 
from folium import plugins
import json
from folium.plugins import HeatMap, HeatMapWithTime

from fbprophet.plot import plot_plotly
from fbprophet.plot import add_changepoints_to_plot

import pycountry

%config InlineBackend.figure_format = 'retina'
init_notebook_mode(connected=True)

# Utility Functions

'''Display markdown formatted output like bold, italic bold etc.'''
def formatted_text(string):
    display(Markdown(string))
    
'''highlight the maximum in a Series or DataFrame'''  
def highlight_max(data, color='yellow'):
    attr = 'background-color: {}'.format(color)
    if data.ndim == 1:  # Series from .apply(axis=0) or axis=1
        is_max = data == data.max()
        return [attr if v else '' for v in is_max]
    else:  # from .apply(axis=None)
        is_max = data == data.max().max()
        return pd.DataFrame(np.where(is_max, attr, ''), index=data.index, columns=data.columns)   

# Disable warnings 
import warnings
warnings.filterwarnings('ignore')

# Exploring Worldwide Data

In [2]:
df = pd.read_csv('../covidAnalysis_preetham/data/csse_daily.csv',parse_dates=['Last_Update'])
df.rename(columns={'Last_Update':'Date', 'Country_Region':'Country'}, inplace=True)

df_confirmed = pd.read_csv("../covidAnalysis_preetham/data/time_series_covid19_confirmed_global.csv")
df_recovered = pd.read_csv("../covidAnalysis_preetham/data/time_series_covid19_recovered_global.csv")
df_deaths = pd.read_csv("../covidAnalysis_preetham/data/time_series_covid19_deaths_global.csv")

df_confirmed.rename(columns={'Country/Region':'Country'}, inplace=True)
df_recovered.rename(columns={'Country/Region':'Country'}, inplace=True)
df_deaths.rename(columns={'Country/Region':'Country'}, inplace=True)

# Data pre-processing

In [3]:
formatted_text('***Covid 19 data information -***')
df.info()

***Covid 19 data information -***

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3813 entries, 0 to 3812
Data columns (total 14 columns):
FIPS                   3131 non-null float64
Admin2                 3135 non-null object
Province_State         3644 non-null object
Country                3813 non-null object
Date                   3813 non-null datetime64[ns]
Lat                    3739 non-null float64
Long_                  3739 non-null float64
Confirmed              3813 non-null int64
Deaths                 3813 non-null int64
Recovered              3813 non-null int64
Active                 3809 non-null float64
Combined_Key           3813 non-null object
Incidence_Rate         3739 non-null float64
Case-Fatality_Ratio    3762 non-null float64
dtypes: datetime64[ns](1), float64(6), int64(3), object(4)
memory usage: 417.2+ KB


In [4]:
formatted_text('***NULL values in the data -***')
df.isnull().sum()

***NULL values in the data -***

FIPS                   682
Admin2                 678
Province_State         169
Country                  0
Date                     0
Lat                     74
Long_                   74
Confirmed                0
Deaths                   0
Recovered                0
Active                   4
Combined_Key             0
Incidence_Rate          74
Case-Fatality_Ratio     51
dtype: int64

**Initial Insights**

- The **'Province/State'** column has some missing values. We could fill in these missing values with the 'Others' name. 
  As we do not know the State for these records, lets go with 'Others' for now.
   
- The column name for - **'Province/State'** and **'Country/Region'** can be simplified. Lets rename them to 'State' and  
  'Country' respectively.

In [5]:
# Convert 'Last Update' column to datetime object
df['Date'] = df['Date'].apply(pd.to_datetime)

# Fill the missing values in 'Province/State' with the 'Country' name.
df['Province_State'] = df['Province_State'].replace(np.nan, df['Country'])

# Fill the missing values (if any) in 'Confirmed', 'Deaths', 'Recovered' with the 0
df['Confirmed'] = df['Confirmed'].replace(np.nan, 0)
df['Deaths'] = df['Deaths'].replace(np.nan, 0)
df['Recovered'] = df['Recovered'].replace(np.nan, 0)

df['Lat'] = df['Lat'].replace(np.nan, 0)
df['Long_'] = df['Long_'].replace(np.nan, 0)

In [6]:
# Check for the missing values again to ensure that there are no more remaining
formatted_text('***NULL values in the data -***')
df.isnull().sum()

***NULL values in the data -***

FIPS                   682
Admin2                 678
Province_State           0
Country                  0
Date                     0
Lat                      0
Long_                    0
Confirmed                0
Deaths                   0
Recovered                0
Active                   4
Combined_Key             0
Incidence_Rate          74
Case-Fatality_Ratio     51
dtype: int64

**==>`The data looks clean now. This should be good to continue with further analysis`**.

In [7]:
df_deaths.head()

Unnamed: 0,Province/State,Country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/4/20,7/5/20,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,826,864,898,920,936,957,971,994,1010,1012
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,74,76,79,81,83,83,85,89,93,95
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,946,952,959,968,978,988,996,1004,1011,1018
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,52,52,52,52,52,52,52,52,52,52
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,19,19,19,21,21,22,23,23,26,26


In [8]:
df.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country,Date,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio
0,45001.0,Abbeville,South Carolina,US,2020-07-13 04:43:00,34.223334,-82.461707,153,1,0,152.0,"Abbeville, South Carolina, US",623.80234,0.653595
1,22001.0,Acadia,Louisiana,US,2020-07-13 04:43:00,30.295065,-92.414197,1339,45,0,1294.0,"Acadia, Louisiana, US",2158.111048,3.360717
2,51001.0,Accomack,Virginia,US,2020-07-13 04:43:00,37.767072,-75.632346,1042,14,0,1028.0,"Accomack, Virginia, US",3224.408962,1.34357
3,16001.0,Ada,Idaho,US,2020-07-13 04:43:00,43.452658,-116.241552,4146,25,0,4121.0,"Ada, Idaho, US",860.903637,0.602991
4,19001.0,Adair,Iowa,US,2020-07-13 04:43:00,41.330756,-94.471059,17,0,0,17.0,"Adair, Iowa, US",237.695749,0.0


In [9]:
# Lets check the total #Countries affected by nCoV

formatted_text('***Affected Countries -***')
Covid_19_Countries = df['Country'].unique().tolist()
print(Covid_19_Countries)
print("\n------------------------------------------------------------------")
print("\nTotal countries affected by nCoV: ",len(Covid_19_Countries))

***Affected Countries -***

['US', 'Italy', 'Brazil', 'Russia', 'Mexico', 'Japan', 'Canada', 'Colombia', 'Peru', 'Spain', 'India', 'United Kingdom', 'China', 'Chile', 'Netherlands', 'Australia', 'Pakistan', 'Germany', 'Sweden', 'Ukraine', 'Denmark', 'France', 'Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Central African Republic', 'Chad', 'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'Gabon', 'Gambia', 'Georgia', 'Ghana', 'Greece

In [10]:
formatted_text('***Country and State wise grouped data -***')

covid_19_country_wise_data = df.groupby(['Country', 'Province_State'])['Confirmed', 'Deaths', 'Recovered'].max()
covid_19_country_wise_data

***Country and State wise grouped data -***

Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Deaths,Recovered
Country,Province_State,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,Afghanistan,34451,1010,21216
Albania,Albania,3454,93,1946
Algeria,Algeria,19195,1011,13743
Andorra,Andorra,855,52,803
Angola,Angola,506,26,118
...,...,...,...,...
West Bank and Gaza,West Bank and Gaza,6230,36,942
Western Sahara,Western Sahara,10,1,8
Yemen,Yemen,1465,417,659
Zambia,Zambia,1895,42,1412


So there are a total of 188 countries affected with Covid-19. 

One stand out from the above country list is the item - **`Others`**. Lets check what are these records.

In [11]:
# Check for India's data
df.query('Country=="India"').groupby("Date")[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()

Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,2020-07-13 04:43:00,878254,23174,553471


In [12]:
strDate = df['Date'][-1:].astype('str')
year = int(strDate.values[0].split('-')[0])
month = int(strDate.values[0].split('-')[1])
day = int(strDate.values[0].split('-')[2].split()[0])

formatted_text('***Last reported case date-time***')

print("The recent date is {0}-{1}-{2}".format(day,month,year))

***Last reported case date-time***

The recent date is 7-6-2020


# Visualizing: Worldwide NCOVID-19 cases

In [13]:
all_cases_world = df.groupby('Date')['Confirmed', 'Deaths', 'Recovered','Case-Fatality_Ratio'].sum()
all_cases_world = all_cases_world.reset_index()
all_cases_world = all_cases_world.sort_values('Date', ascending=False)

fig = go.Figure()
fig.update_layout(title_text='Total number of confirmed, deaths and recovered cases in the World', 
                  xaxis_title='Period Date', yaxis_title='Total Cases', template='plotly_dark')

fig.add_trace(go.Scatter(x=all_cases_world['Date'],
                        y=all_cases_world['Confirmed'],
                        mode='lines+markers',
                        name='Global Confirmed',
                        line=dict(color='yellow', width=2)))

fig.add_trace(go.Scatter(x=all_cases_world['Date'],
                        y=all_cases_world['Deaths'],
                        mode='lines+markers',
                        name='Global Deaths',
                        line=dict(color='red', width=2)))

fig.add_trace(go.Scatter(x=all_cases_world['Date'],
                        y=all_cases_world['Recovered'],
                        mode='lines+markers',
                        name='Global Recovered',
                        line=dict(color='green', width=2)))

fig.add_trace(go.Scatter(x=all_cases_world['Date'],
                        y=all_cases_world['Case-Fatality_Ratio'],
                        mode='lines+markers',
                        name='Global Recovered',
                        line=dict(color='blue', width=2)))


fig.show()

In [14]:
fig = go.Figure()
fig.update_layout(title_text='Log plot of Total number of confirmed, deaths and recovered cases in the World', 
                  xaxis_title='Period Date', yaxis_title='Total Cases', template='plotly_dark')

fig.add_trace(go.Scatter(x=all_cases_world['Date'],
                        y=all_cases_world['Confirmed'],
                        mode='lines+markers',
                        name='Global Confirmed',
                        line=dict(color='yellow', width=2)))

fig.add_trace(go.Scatter(x=all_cases_world['Date'],
                        y=all_cases_world['Deaths'],
                        mode='lines+markers',
                        name='Global Deaths',
                        line=dict(color='red', width=2)))

fig.add_trace(go.Scatter(x=all_cases_world['Date'],
                        y=all_cases_world['Recovered'],
                        mode='lines+markers',
                        name='Global Recovered',
                        line=dict(color='green', width=2)))

fig.add_trace(go.Scatter(x=all_cases_world['Date'],
                        y=all_cases_world['Case-Fatality_Ratio'],
                        mode='lines+markers',
                        name='Global Recovered',
                        line=dict(color='blue', width=2)))

fig.update_layout(yaxis_type="log")
fig.show()

In [51]:
#Prevent division by zero
def ifNull(d):
    temp=1
    if d!=0:
        temp=d
    return temp

global_rate = df.groupby(['Date']).agg({'Confirmed':['sum'],'Deaths':['sum'], 'Recovered': ['sum']})
global_rate.columns = ['Global_Confirmed', 'Global_Deaths', 'Global_Recovered']
global_rate = global_rate.reset_index()
global_rate['Increase_New_Cases_by_Day'] = global_rate['Global_Confirmed'].diff().shift(-1)

#Calculating rates
#Lambda function
global_rate['Global_Deaths_rate_%'] = global_rate.apply(lambda row: ((row.Global_Deaths)/ifNull((row.Global_Confirmed)))*100, axis=1).round(2)
global_rate['Global_Recovered_rate_%'] = global_rate.apply(lambda row: ((row.Global_Recovered)/ifNull((row.Global_Confirmed)))*100, axis=1).round(2)
global_rate['Global_Growth_rate_%'] = global_rate.apply(lambda row: row.Increase_New_Cases_by_Day/ifNull(row.Global_Confirmed*100), axis=1).round(2)
global_rate['Global_Growth_rate_%'] = global_rate['Global_Growth_rate_%'].shift(+1)

fig = go.Figure()
fig.update_layout(title_text='Global rate of growth confirmed, deaths and recovered cases',
                 xaxis_title='Period Date', yaxis_title='Rate', template='plotly_dark')

fig.add_trace(go.Scatter(x=global_rate['Date'],
                        y=global_rate['Global_Growth_rate_%'],
                        mode='lines+markers',
                        name='Global Growth Confirmed rate %',
                        line=dict(color='yellow', width=2)))

fig.add_trace(go.Scatter(x=global_rate['Date'],
                        y=global_rate['Global_Deaths_rate_%'],
                        mode='lines+markers',
                        name='Global Deaths rate %',
                        line=dict(color='red', width=2)))

fig.add_trace(go.Scatter(x=global_rate['Date'],
                        y=global_rate['Global_Recovered_rate_%'],
                        mode='lines+markers',
                        name='Global Recovered rate %',
                        line=dict(color='green', width=2)))

fig.show()

# Chrolopleth For Confirmed ,Recovered & Deaths

In [15]:
# choropleth_map_confirmed = px.choropleth(CountryWiseData, locations='Country', 
#                     locationmode='country names', color='Confirmed', 
#                     hover_name='Country', range_color=[1,max(CountryWiseData.Confirmed)], 
#                     color_continuous_scale='reds', 
#                     title='Covid-19 Globally Confirmed Countries')

# choropleth_map_confirmed.update(layout_coloraxis_showscale=False)
# iplot(choropleth_map_confirmed)

**`China is the worst affected country with Covid-19. The virus has spread to other neighbouring countries and cases of covid-19 have been reported there. 
However the numbers are not as high as China. Some distant countries in Europe, North America & Australia have also seen cases of Covid-19. 
This could be due to some citizens would have been present in China at the time of the virus out-break and unknowingly would have carried along with them in 
their return journey back to their respective countries.`**

In [16]:
# choropleth_map_recovered = px.choropleth(CountryWiseData, locations='Country', 
#                     locationmode='country names', color='Recovered', 
#                     hover_name='Country', range_color=[1,max(CountryWiseData.Recovered)], 
#                     color_continuous_scale='reds', 
#                     title='Covid-19 Global Recovered Cases')

# choropleth_map_recovered.update(layout_coloraxis_showscale=False)
# iplot(choropleth_map_recovered)

**`The recovery rate has been a little slowerthan expected. The virus did not match any other known virus. 
This raised concern because when a virus is new, we do not know how it affects people. There were no existing medications available. 
However, A team of doctors in Thailand have seen some apparent success treating Coronavirus with drug cocktail. The doctors combined the anti-flu drug oseltamivir with lopinavir 
and ritonavir, anti-virals used to treat HIV, Kriengsak said, adding the ministry was awaiting research results to prove the findings.`**

Read more at: https://economictimes.indiatimes.com/news/international/world-news/thailand-sees-apparent-success-treating-virus-with-drug-cocktail/articleshow/73879572.cms?utm_source=contentofinterest&utm_medium=text&utm_campaign=cppst

In [17]:
# choropleth_map_recovered = px.choropleth(CountryWiseData, locations='Country', 
#                     locationmode='country names', color='Deaths', 
#                     hover_name='Country', range_color=[1,max(CountryWiseData.Recovered)], 
#                     color_continuous_scale='reds', 
#                     title='Covid-19 Global Recovered Cases')

# choropleth_map_recovered.update(layout_coloraxis_showscale=False)
# iplot(choropleth_map_recovered)

**`As China has the most reported cases, the number of deaths has also been on the higher side. The virus did not match any other known virus. 
This raised concern because when a virus is new, we do not know how it affects people. There were no existing medications available. 
Due to lack of timely medication available the number of deaths has see the higher side`**

In [18]:
def plot_pie_charts(x,y,title=''):
    c = random.choices(list(mcolors.CSS4_COLORS.values()),k = len(Covid_19_Countries))
    plt.figure(figsize=(10,15))
    plt.title(title,size = 20)
    plt.pie(y,colors= c)
    plt.legend(x,loc='best',fontsize=15)
    plt.show()

# India State Wide Distribution 

In [19]:
india_data_over_time = df[(df['Country'] == 'India')]
india_data_over_time.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country,Date,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio
3150,,,Andaman and Nicobar Islands,India,2020-07-13 04:43:00,11.225999,92.968178,163,0,93,70.0,"Andaman and Nicobar Islands, India",39.085355,0.0
3151,,,Andhra Pradesh,India,2020-07-13 04:43:00,15.9129,79.74,29168,328,15412,13428.0,"Andhra Pradesh, India",54.111622,1.12452
3165,,,Arunachal Pradesh,India,2020-07-13 04:43:00,27.768456,96.384277,359,2,138,219.0,"Arunachal Pradesh, India",22.859573,0.557103
3166,,,Assam,India,2020-07-13 04:43:00,26.357149,92.830441,16071,35,10426,5610.0,"Assam, India",45.134334,0.217784
3188,,,Bihar,India,2020-07-13 04:43:00,25.679658,85.60484,16642,143,11498,5001.0,"Bihar, India",13.334944,0.859272


In [21]:
india_statewise_data = india_data_over_time.groupby(['Province_State'])['Confirmed', 'Deaths', 'Recovered'].max()
india_statewise_data['Province_State'] = india_statewise_data.index
india_statewise_data.index = np.arange(1, len(india_statewise_data.Province_State.unique().tolist())+1)
india_statewise_data = india_statewise_data[['Province_State','Confirmed', 'Deaths', 'Recovered']]

formatted_text('***Country wise numbers of ''Confirmed'', ''Deaths'', ''Recovered'' Cases***')

india_statewise_data.head()

***Country wise numbers of Confirmed, Deaths, Recovered Cases***

Unnamed: 0,Province_State,Confirmed,Deaths,Recovered
1,Andaman and Nicobar Islands,163,0,93
2,Andhra Pradesh,29168,328,15412
3,Arunachal Pradesh,359,2,138
4,Assam,16071,35,10426
5,Bihar,16642,143,11498


In [22]:
# Extract the state latitude and longitude coordinates from the time series data.
india_coordinates = india_data_over_time[['Province_State','Lat','Long_']]
india_coordinates.drop_duplicates(keep='first', inplace=True)

india_coordinates.index = np.arange(1, len(india_coordinates.Province_State.unique().tolist())+1)

india_coordinates.head()

Unnamed: 0,Province_State,Lat,Long_
1,Andaman and Nicobar Islands,11.225999,92.968178
2,Andhra Pradesh,15.9129,79.74
3,Arunachal Pradesh,27.768456,96.384277
4,Assam,26.357149,92.830441
5,Bihar,25.679658,85.60484


In [23]:
india_statewise_data = pd.merge(india_coordinates, india_statewise_data, on='Province_State')

india_statewise_data.head()

Unnamed: 0,Province_State,Lat,Long_,Confirmed,Deaths,Recovered
0,Andaman and Nicobar Islands,11.225999,92.968178,163,0,93
1,Andhra Pradesh,15.9129,79.74,29168,328,15412
2,Arunachal Pradesh,27.768456,96.384277,359,2,138
3,Assam,26.357149,92.830441,16071,35,10426
4,Bihar,25.679658,85.60484,16642,143,11498


# India - Distribution on Map

In [25]:
india_lat = 20.5937
india_lon = 78.9629

formatted_text('***Click on the pin to veiw details stats***')

IndiaMap = folium.Map(location=[india_lat, india_lon], zoom_start=4, tiles='cartodbpositron')

for lat, long, confirmed, deaths, recovered, state in zip(india_statewise_data['Lat'],
                                                           india_statewise_data['Long_'],
                                                           india_statewise_data['Confirmed'],
                                                           india_statewise_data['Deaths'],
                                                           india_statewise_data['Recovered'], 
                                                           india_statewise_data['Province_State']):
    
    if (deaths == 0):
        folium.Marker(location=[lat, long]
                    , popup = ('<strong>nCov Numbers:</strong> ' + '<br>' + 
                                 '<strong>State:</strong> ' + str(state).capitalize() + '<br>'
                                 '<strong>Confirmed:</strong> ' + str(int(confirmed)) + '<br>'
                                 '<strong>Deaths:</strong> ' + str(int(deaths)) + '<br>'
                                 '<strong>Recovered:</strong> ' + str(int(recovered)) + '<br>')
                    , icon=folium.Icon(color='darkblue',icon='info-sign'), color='rgb(55, 83, 109)'
                    , tooltip = str(state).capitalize(), fill_color='rgb(55, 83, 109)').add_to(IndiaMap)
    else:
        folium.Marker(location=[lat, long]
                    , popup = ('<strong>nCov Numbers:</strong> ' + '<br>' + 
                                 '<strong>State:</strong> ' + str(state).capitalize() + '<br>'
                                 '<strong>Confirmed:</strong> ' + str(int(confirmed)) + '<br>'
                                 '<strong>Deaths:</strong> ' + str(int(deaths)) + '<br>'
                                 '<strong>Recovered:</strong> ' + str(int(recovered)) + '<br>')
                    , icon=folium.Icon(color='red', icon='info-sign'), color='rgb(26, 118, 255)'
                    , tooltip = str(state).capitalize(), fill_color='rgb(26, 118, 255)').add_to(IndiaMap)
    
    
IndiaMap

***Click on the pin to veiw details stats***

# Rest of World

In [27]:
rest_of_world = df[df['Country'] != 'India']
rest_of_world.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country,Date,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio
0,45001.0,Abbeville,South Carolina,US,2020-07-13 04:43:00,34.223334,-82.461707,153,1,0,152.0,"Abbeville, South Carolina, US",623.80234,0.653595
1,22001.0,Acadia,Louisiana,US,2020-07-13 04:43:00,30.295065,-92.414197,1339,45,0,1294.0,"Acadia, Louisiana, US",2158.111048,3.360717
2,51001.0,Accomack,Virginia,US,2020-07-13 04:43:00,37.767072,-75.632346,1042,14,0,1028.0,"Accomack, Virginia, US",3224.408962,1.34357
3,16001.0,Ada,Idaho,US,2020-07-13 04:43:00,43.452658,-116.241552,4146,25,0,4121.0,"Ada, Idaho, US",860.903637,0.602991
4,19001.0,Adair,Iowa,US,2020-07-13 04:43:00,41.330756,-94.471059,17,0,0,17.0,"Adair, Iowa, US",237.695749,0.0


# Rest of World - Confirmed Cases

In [28]:
rest_of_world_confirmed = px.choropleth(rest_of_world, locations='Country', 
                    locationmode='country names', color='Confirmed', 
                    hover_name='Country', range_color=[1, 10000], 
                    color_continuous_scale='Geyser', 
                    title='Covid-19 Rest of World Confirmed Cases')

iplot(rest_of_world_confirmed)

# Rest of World - Death Cases

In [None]:
rest_of_world_death = px.choropleth(rest_of_world, locations='Country', 
                    locationmode='country names', color='Deaths', 
                    hover_name='Country', range_color=[0, len(rest_of_world.Deaths)], 
                    color_continuous_scale='Picnic', 
                    title='Covid-19 Rest of World Death Cases')

iplot(rest_of_world_death)

**`There have been countable number of deaths reported outside of China.`**

# Rest of World - Recovered Cases

In [None]:
rest_of_world_recovered = px.choropleth(rest_of_world, locations='Country', 
                    locationmode='country names', color='Recovered', 
                    hover_name='Country', range_color=[1,len(rest_of_world.Recovered)], 
                    color_continuous_scale='viridis', 
                    title='Covid-19 Rest of World Recovered Cases')

iplot(rest_of_world_recovered)

**`Outside of China there hve been countries where the confirmed cases have been all recovered from the virus`**

Lets see how many such countries are there - 

In [29]:
formatted_text('***Countries withh all reported cases recovered -***')
print(rest_of_world[rest_of_world['Confirmed'] == 
                    rest_of_world['Recovered']][['Country','Confirmed', 'Recovered']].reset_index())

***Countries withh all reported cases recovered -***

    index      Country  Confirmed  Recovered
0      85           US          0          0
1     211           US          0          0
2     282           US          0          0
3     722           US          0          0
4     758           US          0          0
..    ...          ...        ...        ...
69   3704      Grenada         23         23
70   3710     Holy See         12         12
71   3727         Laos         19         19
72   3795  Timor-Leste         24         24
73   3812           US          0          0

[74 rows x 4 columns]


# TreeMaps

In [31]:
india_statewise_data["Country"] = "India" # in order to have a single root node

fig1 = px.treemap(india_statewise_data.sort_values(by='Confirmed', ascending=False).reset_index(drop=True), 
                 path=["Country", "Province_State"], values="Confirmed", title='Number of Confirmed Cases in indian Provinces',
                 color_discrete_sequence = px.colors.qualitative.Prism, hover_data=["Confirmed"])

fig1.data[0].textinfo = 'label+text+value+percent entry'
py.offline.iplot(fig1)

fig2 = px.treemap(india_statewise_data.sort_values(by='Deaths', ascending=False).reset_index(drop=True), 
                 path=["Country", "Province_State"], values="Deaths", title='Number of Deaths Reported in Indian Provinces',
                 color_discrete_sequence = px.colors.qualitative.Prism, hover_data=["Deaths"])

fig2.data[0].textinfo = 'label+text+value+percent entry'
py.offline.iplot(fig2)

**As we see from the above maps, Maharashtra,  is the worst affected state in China with a staggering 84% of the confirmed cases coming out of it and 44% of deaths happenning there**

# Global Tree

In [33]:
temp2 = pd.DataFrame(df.groupby(['Country', 'Province_State'])['Confirmed', 'Deaths', 'Recovered'].max().reset_index())
temp2['Global'] = "Global" # to have a single root

## Confirmed Cases

In [34]:
fig = px.treemap(temp2, 
                 path=["Global", "Country"], values="Confirmed", height=700,
                 title='Number of Confirmed Cases Around The Globe',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value+percent entry'
fig.show()

fig = px.treemap(temp2, 
                 path=["Global", "Country", "Province_State"], values="Confirmed", height=700,
                 title='Number of Confirmed Cases Around The Globe',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value+percent parent+percent entry'
fig.show()


**Globally we have more that 8 lakhs confirmed cases of the people infectedd with the Virus from countries all overthe globe. 
The reason why WHO has declared this as a Pandemic. 
From the plots above we know -**

## Reported Deaths

In [36]:
fig = px.treemap(temp2, 
                 path=["Global", "Country"], values="Deaths", height=700,
                 title='Number of Deaths reported Globally',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value+percent entry'
fig.show()

fig = px.treemap(temp2, 
                 path=["Global", "Country", "Province_State"], values="Deaths", height=700,
                 title='Number of Deaths reported Globally',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value+percent parent+percent entry'
fig.show()

# World Heat Map

## 1. Confimed Cases

In [39]:
conf_heatmap = folium.Map(location=[0,0], zoom_start=2)

HeatMap(data=df[['Lat', 'Long_', 'Confirmed']].groupby(['Lat', 'Long_']).sum().reset_index().values.tolist(),radius=18, max_zoom=12).add_to(conf_heatmap)

conf_heatmap

## 2. Reported Deaths

In [40]:
deaths_heatmap = folium.Map(location=[0,0], zoom_start=2)

HeatMap(data=df[['Lat', 'Long_', 'Deaths']].groupby(['Lat', 'Long_']).sum().reset_index().values.tolist(),radius=18, max_zoom=12).add_to(deaths_heatmap)

deaths_heatmap

# Prediction

In [57]:
from fbprophet import Prophet

In [61]:
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
recovered = df.groupby('Date').sum()['Recovered'].reset_index()

In [62]:
confirmed.columns = ['ds','y']
#confirmed['ds'] = confirmed['ds'].dt.date
confirmed['ds'] = pd.to_datetime(confirmed['ds'])

In [63]:
confirmed.tail()

Unnamed: 0,ds,y
0,2020-06-07 19:33:00,0
1,2020-07-13 04:43:00,12910356
2,2020-10-07 02:34:00,1


##  Forecasting Confirmed NCOVID-19 Cases Worldwide with Prophet (Base model)
Generating a week ahead forecast of confirmed cases of NCOVID-19 using Prophet, with 95% prediction interval by creating a base model with no tweaking of seasonality-related parameters and additional regressors.

In [68]:
m = Prophet(interval_width=0.95)
m.fit(confirmed)
future = m.make_future_dataframe(periods=50)
future.tail()

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 1.


The **predict** method will assign each row in future a predicted value which it names **yhat**. If you pass in historical dates, it will provide an in-sample fit. The **forecast object** here is a new dataframe that includes a column yhat with the forecast, as well as columns for components and uncertainty intervals.

In [65]:
#predicting the future with date, and upper and lower limit of y value
confirmed_forecast = m.predict(future)
confirmed_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
48,2020-11-22 02:34:00,1200346.0,-9392747.0,11348070.0
49,2020-11-23 02:34:00,1173373.0,-9559489.0,11403380.0
50,2020-11-24 02:34:00,1146400.0,-9701777.0,11653640.0
51,2020-11-25 02:34:00,1119428.0,-9588467.0,11891700.0
52,2020-11-26 02:34:00,1092455.0,-9955267.0,11383190.0


In [71]:
fig = plot_plotly(m, confirmed_forecast)
annotations = []
annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.10,
                       xanchor='left', yanchor='bottom',
                       text='Total predictions to Confirmed cases in the World',
                       font=dict(family='Arial',
                                size=25,
                                color='rgb(37,37,37)'),
                       showarrow=False))
fig.update_layout(annotations=annotations)
fig

## Forecasting Worldwide Deaths using Prophet (Base model)

In [78]:
deaths.columns = ['ds','y']
deaths['ds'] = pd.to_datetime(deaths['ds'])

In [79]:
m2 = Prophet(interval_width=0.95)
m2.fit(deaths)
future = m2.make_future_dataframe(periods=31)
future.tail()

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 1.


Unnamed: 0,ds
29,2020-11-03 02:34:00
30,2020-11-04 02:34:00
31,2020-11-05 02:34:00
32,2020-11-06 02:34:00
33,2020-11-07 02:34:00


In [75]:
deaths_forecast = m2.predict(future)
deaths_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
29,2020-11-03 02:34:00,75506.540311,-339446.465059,492646.942866
30,2020-11-04 02:34:00,74317.504086,-375735.134858,519188.606947
31,2020-11-05 02:34:00,73128.46786,-375478.801389,568005.7011
32,2020-11-06 02:34:00,71939.431635,-403415.230897,531818.128819
33,2020-11-07 02:34:00,70750.395409,-388650.212107,554738.825758


In [80]:
fig_deaths = plot_plotly(m2, deaths_forecast)
annotations = []
annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.10,
                       xanchor='left', yanchor='bottom',
                       text='Total predictions to Deaths in the World',
                       font=dict(family='Arial',
                                size=25,
                                color='rgb(37,37,37)'),
                       showarrow=False))
fig_deaths.update_layout(annotations=annotations)
fig_deaths

## Forecasting Worldwide Recovered Cases with Prophet (Base model)

In [81]:
recovered.columns = ['ds','y']
recovered['ds'] = pd.to_datetime(recovered['ds'])

In [82]:
m3 = Prophet(interval_width=0.95)
m3.fit(recovered)
future = m3.make_future_dataframe(periods=7)
future.tail()

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 1.


Unnamed: 0,ds
5,2020-10-10 02:34:00
6,2020-10-11 02:34:00
7,2020-10-12 02:34:00
8,2020-10-13 02:34:00
9,2020-10-14 02:34:00


In [83]:
recovered_forecast = m.predict(future)
recovered_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
5,2020-10-10 02:34:00,104043.409724,-340738.574032,566413.090139
6,2020-10-11 02:34:00,102854.373499,-366240.266149,520627.563529
7,2020-10-12 02:34:00,101665.337273,-381021.243468,520558.75906
8,2020-10-13 02:34:00,100476.301048,-353675.985492,576445.745278
9,2020-10-14 02:34:00,99287.264822,-385544.812433,561276.000266


In [84]:
fig_deaths = plot_plotly(m3, recovered_forecast)
annotations = []
annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.10,
                       xanchor='left', yanchor='bottom',
                       text='Total predictions to Deaths in the World',
                       font=dict(family='Arial',
                                size=25,
                                color='rgb(37,37,37)'),
                       showarrow=False))
fig_deaths.update_layout(annotations=annotations)
fig_deaths

# Analysis of Advancement in India

In [93]:
india_cases = df.copy()
india_cases = df.loc[df['Country']=='India']
india_cases = india_cases.groupby(['Date', 'Country']).agg({'Confirmed':['sum'], 'Deaths':['sum'], 'Recovered':['sum']}).sort_values('Date', ascending=False)
india_cases.columns = ['Confirmed', 'Deaths', 'Recovered']
india_cases = india_cases.reset_index()
india_cases['Confirmed_New_Daily_Cases'] = india_cases['Confirmed'].diff().shift(-1)
india_cases['Deaths_New_Daily_Cases'] = india_cases['Deaths'].diff().shift(-1)
india_cases['Recovered_New_Daily_Cases'] = india_cases['Recovered'].diff().shift(-1)
india_cases_confirmed = india_cases[india_cases['Confirmed']!=0]

india_cases_confirmed

Unnamed: 0,Date,Country,Confirmed,Deaths,Recovered,Confirmed_New_Daily_Cases,Deaths_New_Daily_Cases,Recovered_New_Daily_Cases
0,2020-07-13 04:43:00,India,878254,23174,553471,,,
