### @Auteur Imed KERAGHEL
### Date de la dernière modification : 13/03/2020
### LIMICS INSERM_1142


# Import des Libraries

In [4]:
! pip install calmap

# libraries essentielles
import json
import random
from urllib.request import urlopen

# Pour l'analyse
import numpy as np
import pandas as pd

# Pour la visualisation
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
import calmap
import folium

# les colors utilisées
cnf = '#393e46' # gray
dth = '#ff2e63' # rouge
rec = '#21bf73' # cyan
act = '#fe9801' # jaune

# convertisseur
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()   

import warnings
warnings.filterwarnings('ignore')

# html embedding
from IPython.display import Javascript
from IPython.core.display import display, HTML

Collecting calmap
  Downloading https://files.pythonhosted.org/packages/60/7a/3340f348c4826fad190a265290ade1b7fbfbb311c84e27d82fb43e12d579/calmap-0.0.7-py2.py3-none-any.whl
Installing collected packages: calmap
Successfully installed calmap-0.0.7


# Dataset

In [5]:
# pour charger le dataset
full_table = pd.read_csv('covid_19_clean_complete.csv', 
                         parse_dates=['Date'])
full_table.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Thailand,15.0,101.0,2020-01-22,2,0,0
1,,Japan,36.0,138.0,2020-01-22,2,0,0
2,,Singapore,1.2833,103.8333,2020-01-22,0,0,0
3,,Nepal,28.1667,84.25,2020-01-22,0,0,0
4,,Malaysia,2.5,112.5,2020-01-22,0,0,0


In [6]:
# infos de dataframe
full_table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11960 entries, 0 to 11959
Data columns (total 8 columns):
Province/State    5876 non-null object
Country/Region    11960 non-null object
Lat               11960 non-null float64
Long              11960 non-null float64
Date              11960 non-null datetime64[ns]
Confirmed         11960 non-null int64
Deaths            11960 non-null int64
Recovered         11960 non-null int64
dtypes: datetime64[ns](1), float64(2), int64(3), object(2)
memory usage: 747.6+ KB


In [7]:
# les valeurs manquées
full_table.isna().sum()

Province/State    6084
Country/Region       0
Lat                  0
Long                 0
Date                 0
Confirmed            0
Deaths               0
Recovered            0
dtype: int64

# Preprocessing

### Nettoyage de données

In [8]:
# cas 
cases = ['Confirmed', 'Deaths', 'Recovered', 'Active']

# Active Case = confirmed - deaths - recovered
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths'] - full_table['Recovered']

full_table['Country/Region'] = full_table['Country/Region'].replace('Mainland China', 'China')

# fillna par 0
full_table[['Province/State']] = full_table[['Province/State']].fillna('')
full_table[cases] = full_table[cases].fillna(0)

### Les tables dérivées

In [13]:
# cas dans les navires ==> ships
ship = full_table[full_table['Province/State'].str.contains('Grand Princess')|full_table['Province/State'].str.contains('Diamond Princess cruise ship')]

# china et le reste ==> row
china = full_table[full_table['Country/Region']=='China']
row = full_table[full_table['Country/Region']!='China']

# latest
full_latest = full_table[full_table['Date'] == max(full_table['Date'])].reset_index()
china_latest = full_latest[full_latest['Country/Region']=='China']
row_latest = full_latest[full_latest['Country/Region']!='China']

# latest condensed
full_latest_grouped = full_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
china_latest_grouped = china_latest.groupby('Province/State')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
row_latest_grouped = row_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

# Dernières données

### Dernières données complètes

In [14]:
temp = full_table.groupby(['Country/Region', 'Province/State'])['Confirmed', 'Deaths', 'Recovered', 'Active'].max()
temp.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Deaths,Recovered,Active
Country/Region,Province/State,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,,7,0,0,7
Albania,,33,1,0,32
Algeria,,26,2,8,20
Andorra,,1,0,1,1
Antigua and Barbuda,,1,0,0,1
Argentina,,31,2,0,29
Armenia,,8,0,0,8
Aruba,,2,0,0,2
Australia,Australian Capital Territory,1,0,0,1
Australia,From Diamond Princess,8,0,0,8


### Dernières données condensées

In [15]:
temp = full_table.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
temp = temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1')

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active
0,2020-03-13 00:00:00,145193,5404,70251,69538


In [16]:
tm = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths', 'Recovered'])
fig = px.treemap(tm, path=["variable"], values="value", height=400, width=600,
                 color_discrete_sequence=[rec, act, dth])
fig.show()

# Données par pays

### Dans chaque pays

In [17]:
temp_f = full_latest_grouped.sort_values(by='Confirmed', ascending=False)
temp_f = temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
0,China,80945,3180,64196,13569
1,Italy,17660,1266,1439,14955
2,Iran,11364,514,2959,7891
3,"Korea, South",7979,66,510,7403
4,Spain,5232,133,193,4906
5,Germany,3675,7,46,3622
6,France,3667,79,12,3576
7,US,2179,47,12,2120
8,Switzerland,1139,11,4,1124
9,Norway,996,0,1,995


### Pays avec décès signalés

In [18]:
temp_flg = temp_f[temp_f['Deaths']>0][['Country/Region', 'Deaths']]
temp_flg.sort_values('Deaths', ascending=False).reset_index(drop=True).style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Deaths
0,China,3180
1,Italy,1266
2,Iran,514
3,Spain,133
4,France,79
5,"Korea, South",66
6,US,47
7,Japan,19
8,Switzerland,11
9,Netherlands,10


### Pays où aucun cas n'a été récupéré

In [19]:
temp = temp_f[temp_f['Recovered']==0][['Country/Region', 'Confirmed', 'Deaths', 'Recovered']]
temp.reset_index(drop=True).style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered
0,Netherlands,804,10,0
1,Qatar,320,0,0
2,Greece,190,1,0
3,Brazil,151,0,0
4,Slovenia,141,0,0
5,Czechia,141,0,0
6,Ireland,90,1,0
7,San Marino,80,5,0
8,Estonia,79,0,0
9,Poland,68,2,0


### Pays avec tous les cas décédés

In [21]:
temp = row_latest_grouped[row_latest_grouped['Confirmed']==
                          row_latest_grouped['Deaths']]
temp = temp[['Country/Region', 'Confirmed', 'Deaths']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths
0,Guyana,1,1
1,Sudan,1,1


### Pays avec tous les cas récupérés

In [22]:
temp = row_latest_grouped[row_latest_grouped['Confirmed']==
                          row_latest_grouped['Recovered']]
temp = temp[['Country/Region', 'Confirmed', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')

Unnamed: 0,Country/Region,Confirmed,Recovered
0,Jordan,1,1
1,Nepal,1,1


### Pays n'ayant plus de cas concerné

In [25]:
temp = row_latest_grouped[row_latest_grouped['Confirmed']==
                          row_latest_grouped['Deaths']+
                          row_latest_grouped['Recovered']]
temp = temp[['Country/Region', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered
0,Guyana,1,1,0
1,Jordan,1,0,1
2,Nepal,1,0,1
3,Sudan,1,1,0


# Données sur les provinces chinoises

### Dans chaque provinces

In [26]:
temp_f = china_latest_grouped[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]
temp_f = temp_f.sort_values(by='Confirmed', ascending=False)
temp_f = temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap='Pastel1_r')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
0,Hubei,67786,3062,51553
1,Guangdong,1356,8,1296
2,Henan,1273,22,1249
3,Zhejiang,1215,1,1197
4,Hunan,1018,4,1005
5,Anhui,990,6,984
6,Jiangxi,935,1,934
7,Shandong,760,7,739
8,Jiangsu,631,0,630
9,Chongqing,576,6,566


### Provinces sans cas récupérés

In [27]:
temp = china_latest_grouped[china_latest_grouped['Recovered']==0]
temp = temp[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1_r')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered


### Provinces où tous les cas sont morts

In [None]:
temp = china_latest_grouped[china_latest_grouped['Confirmed']==
                          china_latest_grouped['Deaths']]
temp = temp[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')

### Provinces où toutes les cas ont été recouvrées

In [28]:
temp = china_latest_grouped[china_latest_grouped['Confirmed']==
                          china_latest_grouped['Recovered']]
temp = temp[['Province/State', 'Confirmed','Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
0,Shanxi,133,0,133
1,Qinghai,18,0,18
2,Macau,10,0,10
3,Tibet,1,0,1


### Provinces n'ayant plus de cas affectés

In [29]:
temp = china_latest_grouped[china_latest_grouped['Confirmed']==
                          china_latest_grouped['Deaths']+
                          china_latest_grouped['Recovered']]
temp = temp[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
0,Anhui,990,6,984
1,Jiangxi,935,1,934
2,Fujian,296,1,295
3,Shanxi,133,0,133
4,Xinjiang,76,3,73
5,Qinghai,18,0,18
6,Macau,10,0,10
7,Tibet,1,0,1


# Maps

### A travers le monde

In [30]:
m = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=4, zoom_start=1)
for i in range(0, len(full_latest)):
    folium.Circle(
        location=[full_latest.iloc[i]['Lat'], full_latest.iloc[i]['Long']],
        color='crimson', 
        tooltip =   '<li><bold>Country : '+str(full_latest.iloc[i]['Country/Region'])+
                    '<li><bold>Province : '+str(full_latest.iloc[i]['Province/State'])+
                    '<li><bold>Confirmed : '+str(full_latest.iloc[i]['Confirmed'])+
                    '<li><bold>Deaths : '+str(full_latest.iloc[i]['Deaths'])+
                    '<li><bold>Recovered : '+str(full_latest.iloc[i]['Recovered']),
        radius=int(full_latest.iloc[i]['Confirmed'])**1.1).add_to(m)
m

In [35]:
# cas confirmés

fig = px.choropleth(full_latest_grouped, locations="Country/Region", 
                    locationmode='country names', color="Confirmed", 
                    hover_name="Country/Region", range_color=[1,7000], 
                    color_continuous_scale="aggrnyl", 
                    title='Pays avec des cas confirmés')
fig.update(layout_coloraxis_showscale=False)
fig.show()

In [34]:
# Décès

fig = px.choropleth(full_latest_grouped[full_latest_grouped['Deaths']>0], 
                    locations="Country/Region", locationmode='country names',
                    color="Deaths", hover_name="Country/Region", 
                    range_color=[1,50], color_continuous_scale="agsunset",
                    title='Pays où des décès ont été signalés')
fig.update(layout_coloraxis_showscale=False)
fig.show()

In [36]:
formated_gdf = full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['Confirmed'].pow(0.3)

fig = px.scatter_geo(formated_gdf, locations="Country/Region", locationmode='country names', 
                     color="Confirmed", size='size', hover_name="Country/Region", 
                     range_color= [0, max(formated_gdf['Confirmed'])+2], 
                     projection="natural earth", animation_frame="Date", 
                     title='Répartition dans le temps')
fig.update(layout_coloraxis_showscale=False)
fig.show()

# Ships

In [37]:
temp = ship[ship['Date']==max(ship['Date'])]
temp = temp[['Province/State', 'Confirmed', 'Deaths', 'Recovered']].reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1_r')

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered
0,Grand Princess,21,0,0
1,Grand Princess,2,0,0


In [38]:
temp = ship[ship['Date']==max(ship['Date'])]

m = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=4, zoom_start=1)

for i in range(0, len(temp)):
    folium.Circle(
        location=[temp.iloc[i]['Lat'], temp.iloc[i]['Long']],
        color='crimson', 
        tooltip =   '<li><bold>Country : '+str(temp.iloc[i]['Country/Region'])+
                    '<li><bold>Province : '+str(temp.iloc[i]['Province/State'])+
                    '<li><bold>Confirmed : '+str(temp.iloc[i]['Confirmed'])+
                    '<li><bold>Deaths : '+str(temp.iloc[i]['Deaths'])+
                    '<li><bold>Recovered : '+str(temp.iloc[i]['Recovered']),
        radius=100000).add_to(m)
m

# Les cas au fil du temps

In [None]:
temp = full_table.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars="Date", value_vars=['Recovered', 'Deaths', 'Active'],
                 var_name='Case', value_name='Count')
temp.head()

fig = px.area(temp, x="Date", y="Count", color='Case',
             title='Cases over time', color_discrete_sequence = [rec, dth, act])
fig.show()

# Taux de récupération et de mortalité au fil du temps

In [48]:
temp = full_table.groupby('Date').sum().reset_index()

# adding two more columns
temp['No. of Deaths to 100 Confirmed Cases'] = round(temp['Deaths']/temp['Confirmed'], 3)*100
temp['No. of Recovered to 100 Confirmed Cases'] = round(temp['Recovered']/temp['Confirmed'], 3)*100
# temp['No. of Recovered to 1 Death Case'] = round(temp['Recovered']/temp['Deaths'], 3)

temp = temp.melt(id_vars='Date', value_vars=['No. of Deaths to 100 Confirmed Cases', 'No. of Recovered to 100 Confirmed Cases'], 
                 var_name='Ratio', value_name='Value')

fig = px.line(temp, x="Date", y="Value", color='Ratio', log_y=True, 
              title='Taux de récupération et de mortalité au fil du temps', color_discrete_sequence=[dth, rec])
fig.show()

# Top 20

In [49]:
flg = full_latest_grouped
flg.head()

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
0,Afghanistan,7,0,0,7
1,Albania,33,1,0,32
2,Algeria,26,2,8,16
3,Andorra,1,0,0,1
4,Antigua and Barbuda,1,0,0,1


In [50]:
fig = px.bar(flg.sort_values('Confirmed', ascending=False).head(20).sort_values('Confirmed', ascending=True), 
             x="Confirmed", y="Country/Region", title='Cas confirmés', text='Confirmed', orientation='h', 
             width=700, height=700, range_x = [0, max(flg['Confirmed'])+10000])
fig.update_traces(marker_color=cnf, opacity=0.6, textposition='outside')
fig.show()

In [41]:
fig = px.bar(flg.sort_values('Deaths', ascending=False).head(20).sort_values('Deaths', ascending=True), 
             x="Deaths", y="Country/Region", title='Deaths', text='Deaths', orientation='h', 
             width=700, height=700, range_x = [0, max(flg['Deaths'])+500])
fig.update_traces(marker_color=dth, opacity=0.6, textposition='outside')
fig.show()