In [None]:
import sys
!conda install --yes --prefix {sys.prefix} -c plotly plotly-orca 

In [None]:
!pip install wget
!pip install calmap
!pip install psutil requests

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

from datetime import timedelta
from IPython.display import HTML
from IPython.display import FileLink
from plotly.subplots import make_subplots
from plotly.offline import plot, iplot, init_notebook_mode
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
import warnings
import random
import calmap
import folium
import wget
import math
import os



# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
warnings.filterwarnings('ignore')
init_notebook_mode(connected=True)

if not os.path.exists("images"):
    os.mkdir("images")

if not os.path.exists("Maps"):
    os.mkdir("Maps")

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Color pallete
Active, Recovered, Confirmed, Deceased =  '#ff073a', '#28a745', '#007bff', '#6c757d'

In [None]:
# remove existing files
! rm *.csv

# urls of the files
urls = ['https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', 
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv',
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv']

# download files
for url in urls:
    filename = wget.download(url)

In [None]:
confirmed_df = pd.read_csv('time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv('time_series_covid19_deaths_global.csv')
recovered_df = pd.read_csv('time_series_covid19_recovered_global.csv')
cov = pd.read_csv("../input/covid19-useful-features-by-country/Countries_usefulFeatures.csv")

In [None]:
print(confirmed_df.shape)
print(deaths_df.shape)
print(recovered_df.shape)

In [None]:
confirmed_df.head()

In [None]:
deaths_df.head()

In [None]:
recovered_df.head()

In [None]:
dates = confirmed_df.columns[4:]

confirmed_df_long = confirmed_df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
                            value_vars=dates, var_name='Date', value_name='Confirmed')
deaths_df_long = deaths_df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
                            value_vars=dates, var_name='Date', value_name='Deaths')
recovered_df_long = recovered_df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
                            value_vars=dates, var_name='Date', value_name='Recovered')

print(confirmed_df_long.shape)
print(deaths_df_long.shape)
print(recovered_df_long.shape)

In [None]:
full_table = pd.merge(left=confirmed_df_long, right=deaths_df_long, how='left',
                      on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long'])
full_table = pd.merge(left=full_table, right=recovered_df_long, how='left',
                      on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long'])

full_table.head()

In [None]:
print(full_table.shape)

In [None]:
full_table.isna().sum()

In [None]:
full_table['Recovered'] = full_table['Recovered'].fillna(0)
full_table['Recovered'] = full_table['Recovered'].astype('int')
full_table.isna().sum()

In [None]:
full_table['Country/Region'].unique()

In [None]:
full_table['Province/State'].unique()

In [None]:
full_table['Country/Region'] = full_table['Country/Region'].replace('Korea, South', 'South Korea')
full_table['Country/Region'].unique()

In [None]:
full_table = full_table[full_table['Province/State'].str.contains('Recovered')!=True]
full_table = full_table[full_table['Province/State'].str.contains('Grand Princess')!=True]
full_table = full_table[full_table['Province/State'].str.contains('Diamond Princess')!=True]
full_table['Province/State'].unique()

In [None]:
# Active Case = confirmed - deaths - recovered
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths'] - full_table['Recovered']
full_table.sample(10)

In [None]:
full_table['Date'] = pd.to_datetime(full_table.Date)
full_table.sort_values(by=['Date'], inplace=True)
full_table.head(50)

In [None]:
full_table.to_csv('covid_19_cleaned.csv', index=False)

In [None]:
# Grouped by day, country
# =======================

full_grouped = full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

# new cases ======================================================
temp = full_grouped.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths', 'Recovered']
temp = temp.sum().diff().reset_index()

mask = temp['Country/Region'] != temp['Country/Region'].shift(1)

temp.loc[mask, 'Confirmed'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan
temp.loc[mask, 'Recovered'] = np.nan

# renaming columns
temp.columns = ['Country/Region', 'Date', 'No_Of_New_Cases', 'No_Of_New_Deaths', 'No_Of_New_Recovered']
# =================================================================

# merging new values
full_grouped = pd.merge(full_grouped, temp, on=['Country/Region', 'Date'])

# filling na with 0
full_grouped = full_grouped.fillna(0)

# fixing data types
cols = ['No_Of_New_Cases', 'No_Of_New_Deaths', 'No_Of_New_Recovered']
full_grouped[cols] = full_grouped[cols].astype('int')

full_grouped['No_Of_New_Cases'] = full_grouped['No_Of_New_Cases'].apply(lambda x: 0 if x<0 else x)

full_grouped.sample(20)

In [None]:
cov.rename(columns={'Country_Region': 'Country/Region'}, inplace=True)
full_grouped = pd.merge(full_grouped,cov[['Latitude','Longtitude','Country/Region']], on='Country/Region')
full_grouped.head()

In [None]:
full_grouped.to_csv('covid_19_country_wise.csv', index=False)

In [None]:
temp = full_table.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
temp = temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)

melted_temp = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths', 'Recovered'])
fig = px.treemap(melted_temp, path=["variable"], values="value", height=250, width=1200,
                 color_discrete_sequence=[Active, Recovered, Deceased])
fig.data[0].textinfo = 'label+text+value'
fig.show()
fig.write_image("images/treemap_overview.svg")

In [None]:
fig = px.pie(melted_temp, values="value", height=750, names='variable', title='Covid 19',
                 color_discrete_sequence=[Active, Recovered, Deceased])
fig.data[0].textinfo = 'label+text+value'
fig.show()
fig.write_image("images/piechart_overview.svg")

In [None]:
temp = full_grouped.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars="Date", value_vars=['Recovered', 'Deaths', 'Active'],
                 var_name='Case', value_name='Count')
temp.head()

fig = px.area(temp, x="Date", y="Count", color='Case', height=600,
             title='Cases over time', color_discrete_sequence = [Recovered, Deceased, Active])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
fig.write_image("images/area_overview.svg")

In [None]:
temp = full_grouped[full_grouped['Date'] == max(full_grouped['Date'])]
temp.head()

In [None]:
# World wide
temp = full_grouped[full_grouped['Date'] == max(full_grouped['Date'])]

_map = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=4, zoom_start=1.5)

for i in range(0, len(temp)):
    folium.Circle(
        location=[temp.iloc[i]['Latitude'], temp.iloc[i]['Longtitude']],
        color=Confirmed, fill='crimson',
        tooltip =   '<li><bold>Country : '+str(temp.iloc[i]['Country/Region'])+
                    '<li><bold>Confirmed : '+str(temp.iloc[i]['Confirmed'])+
                    '<li><bold>Active : '+str(temp.iloc[i]['Active'])+
                    '<li><bold>Deaths : '+str(temp.iloc[i]['Deaths'])+
                    '<li><bold>New Cases : '+str(temp.iloc[i]['No_Of_New_Cases'])+
                    '<li><bold>New Death : '+str(temp.iloc[i]['No_Of_New_Deaths'])+
                    '<li><bold>New Recovered : '+str(temp.iloc[i]['No_Of_New_Recovered']),
        radius=int(temp.iloc[i]['Confirmed'])**1.05).add_to(_map)

_map.save('./Maps/Confirmed.html')
_map

In [None]:
_map = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=4, zoom_start=1.5)

for i in range(0, len(temp)):
    folium.Circle(
        location=[temp.iloc[i]['Latitude'], temp.iloc[i]['Longtitude']],
        color=Active, fill='crimson',
        tooltip =   '<li><bold>Country : '+str(temp.iloc[i]['Country/Region'])+
                    '<li><bold>Confirmed : '+str(temp.iloc[i]['Confirmed'])+
                    '<li><bold>Active : '+str(temp.iloc[i]['Active'])+
                    '<li><bold>Deaths : '+str(temp.iloc[i]['Deaths'])+
                    '<li><bold>New Cases : '+str(temp.iloc[i]['No_Of_New_Cases'])+
                    '<li><bold>New Death : '+str(temp.iloc[i]['No_Of_New_Deaths'])+
                    '<li><bold>New Recovered : '+str(temp.iloc[i]['No_Of_New_Recovered']),
        radius=int(temp.iloc[i]['Active'])**1.05).add_to(_map)

_map.save('./Maps/Active.html')
_map

In [None]:
_map = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=4, zoom_start=1.5)

for i in range(0, len(temp)):
    folium.Circle(
        location=[temp.iloc[i]['Latitude'], temp.iloc[i]['Longtitude']],
        color=Recovered, fill='crimson',
        tooltip =   '<li><bold>Country : '+str(temp.iloc[i]['Country/Region'])+
                    '<li><bold>Confirmed : '+str(temp.iloc[i]['Confirmed'])+
                    '<li><bold>Active : '+str(temp.iloc[i]['Active'])+
                    '<li><bold>Deaths : '+str(temp.iloc[i]['Deaths'])+
                    '<li><bold>Recovered : '+str(temp.iloc[i]['Recovered'])+
                    '<li><bold>New Cases : '+str(temp.iloc[i]['No_Of_New_Cases'])+
                    '<li><bold>New Death : '+str(temp.iloc[i]['No_Of_New_Deaths'])+
                    '<li><bold>New Recovered : '+str(temp.iloc[i]['No_Of_New_Recovered']),
        radius=int(temp.iloc[i]['Recovered'])**1.05).add_to(_map)

_map.save('./Maps/Recovered.html')
_map

In [None]:
_map = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=4, zoom_start=1.5)

for i in range(0, len(temp)):
    folium.Circle(
        location=[temp.iloc[i]['Latitude'], temp.iloc[i]['Longtitude']],
        color=Deceased, fill='crimson',
        tooltip =   '<li><bold>Country : '+str(temp.iloc[i]['Country/Region'])+
                    '<li><bold>Confirmed : '+str(temp.iloc[i]['Confirmed'])+
                    '<li><bold>Active : '+str(temp.iloc[i]['Active'])+
                    '<li><bold>Deaths : '+str(temp.iloc[i]['Deaths'])+
                    '<li><bold>New Cases : '+str(temp.iloc[i]['No_Of_New_Cases'])+
                    '<li><bold>New Death : '+str(temp.iloc[i]['No_Of_New_Deaths'])+
                    '<li><bold>New Recovered : '+str(temp.iloc[i]['No_Of_New_Recovered']),
        radius=int(temp.iloc[i]['Deaths'])**1.05).add_to(_map)

_map.save('./Maps/Deceased.html')
_map

In [None]:
fig = px.choropleth(full_grouped, locations="Country/Region", locationmode='country names', color=np.log(full_grouped["Confirmed"]), 
                    hover_name="Country/Region",
                    title='Confirmed Cases', color_continuous_scale=px.colors.sequential.Blues)
fig.update(layout_coloraxis_showscale=False)
fig.show()
fig.write_image("images/Confirmed_choropleth.svg")

In [None]:
fig = px.choropleth(full_grouped, locations="Country/Region", locationmode='country names', color=np.log(full_grouped["Active"]), 
                    hover_name="Country/Region",
                    title='Active Cases', color_continuous_scale=px.colors.sequential.Reds)
fig.update(layout_coloraxis_showscale=False)
fig.show()
fig.write_image("images/Active_choropleth.svg")

In [None]:
fig = px.choropleth(full_grouped, locations="Country/Region", locationmode='country names', color=np.log(full_grouped["Recovered"]), 
                    hover_name="Country/Region",
                    title='Recovered Cases', color_continuous_scale=px.colors.sequential.Greens)
fig.update(layout_coloraxis_showscale=False)
fig.show()
fig.write_image("images/Recovered_choropleth.svg")

In [None]:
fig = px.choropleth(full_grouped, locations="Country/Region", locationmode='country names', color=np.log(full_grouped["Deaths"]), 
                    hover_name="Country/Region",
                    title='Deceased Cases', color_continuous_scale=px.colors.sequential.Greys)
fig.update(layout_coloraxis_showscale=False)
fig.show()
fig.write_image("images/Deceased_choropleth.svg")

In [None]:
# Download All files
!pwd
! rm *.zip
!zip -r /kaggle/working/Files.zip /kaggle/working
FileLink(r'Files.zip')