# UTILS

## DASK SESSION

In [14]:
from dask.distributed import Client, progress
client = Client(memory_limit='12GB')
client


Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 38923 instead



0,1
Client  Scheduler: tcp://127.0.0.1:46119  Dashboard: http://127.0.0.1:38923/status,Cluster  Workers: 4  Cores: 8  Memory: 48.00 GB


## PACKAGES

In [1]:
import xarray
import rioxarray
import geopandas as gpd
import dask.dataframe as dd

import numpy as np
import pandas as pd
import plotly.express as px
from keplergl import KeplerGl
from datetime import datetime
import plotly.graph_objects as go
from shapely.geometry.polygon import Polygon
import plotly.express as px

## DATA

In [181]:
# !curl https://static.data.gouv.fr/resources/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/20210104-190649/donnees-hospitalieres-covid19-2021-01-04-19h03.csv --output covid19.csv

In [182]:
#!wget https://download-0008.copernicus-climate.eu/cache-compute-0008/cache/data5/adaptor.mars.internal-1609790914.7741523-10074-8-3e8c7a98-3832-46d5-961b-00ea5c662488.nc

# OPENING THE DATA

### France admin

In [9]:
france = gpd.read_file('departements-version-simplifiee.geojson')
france.head()

Unnamed: 0,code,nom,geometry
0,1,Ain,"POLYGON ((4.78021 46.17668, 4.79458 46.21832, ..."
1,2,Aisne,"POLYGON ((4.04797 49.40564, 4.03991 49.39740, ..."
2,3,Allier,"POLYGON ((3.03207 46.79491, 3.04907 46.75808, ..."
3,4,Alpes-de-Haute-Provence,"POLYGON ((5.67604 44.19143, 5.69209 44.18648, ..."
4,5,Hautes-Alpes,"POLYGON ((6.26057 45.12685, 6.29922 45.10855, ..."


### Covid19 data

In [185]:
covid = dd.read_csv('covid19.csv',
                    sep   = ';',
                    dtype = {
                        'dep'  : str,
                        'sexe' : int,
                        'hosp' : int,
                        'rea'  : int,
                        'rad'  : int,
                        'dc'   : int
                    },
                    parse_dates= ['jour'])

covid['dc'] = covid['dc'].diff(periods=3)
covid['rad'] = covid['rad'].diff(periods=3)

covid.head()

Unnamed: 0,dep,sexe,jour,hosp,rea,rad,dc
0,1,0,2020-03-18,2,0,,
1,1,1,2020-03-18,1,0,,
2,1,2,2020-03-18,1,0,,
3,2,0,2020-03-18,41,10,17.0,11.0
4,2,1,2020-03-18,19,4,10.0,6.0


### ERA5 data

In [20]:
dst = xarray.open_mfdataset('../adaptor.mars.internal-1609790914.7741523-10074-8-3e8c7a98-3832-46d5-961b-00ea5c662488.nc').rio.write_crs(4326)

dst_dayly  = dst[['u10','v10','t2m']].resample(time = 'D').mean()
rain_dayly = dst['tp'].resample(time = 'D').sum()

In [13]:
d_dst_dayly      = dict()
d_rain_dayly     = dict()


for index in france.index:
    
    geometry = [france.iloc[index]['geometry'].__geo_interface__]
    name = france.iloc[index]['code']
    
    try :
        d_dst_dayly[name]         = dst_dayly.rio.clip(geometries=geometry, crs=4326).mean('latitude').mean('longitude').to_dataframe().drop(['spatial_ref'], axis = 1).reset_index()
        d_dst_dayly[name]['dep']  = name
    
        d_rain_dayly[name]        = rain_dayly.rio.clip(geometries=geometry, crs=4326).mean('latitude').mean('longitude').to_dataframe().drop(['spatial_ref'], axis = 1).reset_index()
        d_rain_dayly[name]['dep'] = name
        
    except :
        print(name)

75


In [75]:
df_dst  = pd.concat([d_dst_dayly[key] for key in d_dst_dayly.keys()]).reset_index().drop(['index'], axis =1)
df_rain = pd.concat([d_rain_dayly[key] for key in d_rain_dayly.keys()]).reset_index().drop(['index'], axis =1).drop(['time', 'dep'], axis = 1)
df_dst  = df_dst.join(df_rain)

df_dst.to_csv('df_dst.csv')

df_dst.head()

Unnamed: 0,time,u10,v10,t2m,dep,tp
0,2020-03-01,0.974748,3.110744,280.077057,1,0.063716
1,2020-03-02,0.56813,0.757587,277.452393,1,0.291293
2,2020-03-03,0.947154,-0.337001,276.453033,1,0.056209
3,2020-03-04,0.3226,0.740365,276.798645,1,0.002581
4,2020-03-05,0.730984,3.445092,279.849823,1,0.364009


In [25]:
df_dst = pd.read_csv('df_dst.csv', parse_dates=['time']).drop(['Unnamed: 0'], axis = 1)

In [26]:
fig = px.line(df_dst, x="time", y="t2m", color='dep', title = 'rainfall per departement in france')
fig.show()

In [27]:
df_dst

Unnamed: 0,time,u10,v10,t2m,dep,tp
0,2020-03-01,0.974748,3.110744,280.07706,01,0.063716
1,2020-03-02,0.568130,0.757587,277.45240,01,0.291293
2,2020-03-03,0.947154,-0.337001,276.45303,01,0.056209
3,2020-03-04,0.322600,0.740365,276.79865,01,0.002581
4,2020-03-05,0.730984,3.445092,279.84982,01,0.364009
...,...,...,...,...,...,...
23270,2020-10-27,2.278925,4.051184,283.57090,95,0.043889
23271,2020-10-28,4.221609,3.723945,284.67166,95,0.028072
23272,2020-10-29,3.671320,2.744816,285.45245,95,0.011061
23273,2020-10-30,2.768193,2.770717,287.11942,95,0.004212


In [28]:
df_Pays_de_Loire = df_dst.loc[df_dst['dep']=='44'].copy()
df_Pays_de_Loire['t2m'] = df_Pays_de_Loire['t2m'].apply(lambda x : x-273.15)

fig = go.Figure()
fig.add_trace(go.Scatter(x=df_Pays_de_Loire.time, y=df_Pays_de_Loire.t2m, name = 'temparture in Celsius'))
fig.add_trace(go.Scatter(x=df_Pays_de_Loire.time, y=df_Pays_de_Loire.u10, name = 'norm of eastward wind'))
fig.add_trace(go.Scatter(x=df_Pays_de_Loire.time, y=df_Pays_de_Loire.v10, name = 'norm of northward wind'))
fig.add_trace(go.Scatter(x=df_Pays_de_Loire.time, y=df_Pays_de_Loire.tp, name = 'average of the total precipitation'))



fig.update_layout(title='Our 4 climatic variables for the departement Pays de La Loire',
                   xaxis_title='Month',
                   yaxis_title='Celsius | mm of precipitation | m/s ')
fig.show()

In [29]:
fig = px.line(df_dst, x="time", y="t2m", color='dep', title = 'temperature per departement in france')
fig.show()

In [30]:
fig = px.line(df_dst, x="time", y="u10", color='dep', title = 'u10 per departement in france')
fig.show()

In [31]:
fig = px.line(df_dst, x="time", y="v10", color='dep', title = 'v10 per departement in france')
fig.show()

### Action du gouvernement

In [191]:
# COMMERCES OUVERTS   = 0
# COMMERCES FERMES    = 1
# COUVRE FEU          = 2
# CONFINEMENT 100km   = 3
# CONFINEMENT TOTAL   = 4


etape_0 =  pd.DataFrame({'date' : pd.date_range("2020-01-01", periods=77, freq="D")})
etape_0['etat'] = 0 #'PAS DE CONFINEMENT & COMMERCES OUVERTS' 

etape_1 = pd.DataFrame({'date' : pd.date_range("2020-03-17", periods=56, freq="D")})
etape_1['etat'] = 4 #'CONFINEMENT TOTAL'

etape_2 = pd.DataFrame({'date' : pd.date_range("2020-05-11", periods=18, freq="D")})
etape_2['etat'] = 3 #'CONFINEMENT 100km'

etape_3 = pd.DataFrame({'date' : pd.date_range("2020-05-28", periods=26, freq="D")})
etape_3['etat'] = 1 #'PAS DE CONFINEMENT & COMMERCES FERMES' 

etape_4 = pd.DataFrame({'date' : pd.date_range("2020-06-22", periods=131, freq="D")})
etape_4['etat'] = 0 #'PAS DE CONFINEMENT & COMMERCES OUVERTS' 

etape_5 = pd.DataFrame({'date' : pd.date_range("2020-10-30", periods=47, freq="D")})
etape_5['etat'] = 4 #'CONFINEMENT TOTAL' 

etape_6 = pd.DataFrame({'date' : pd.date_range("2020-12-15", periods=50, freq="D")})
etape_6['etat'] = 2 #'COUVRE FEU' 

action_gouv = pd.concat([etape_0, etape_1,etape_2,etape_3,etape_4,etape_5,etape_6]).set_index('date')

fig = px.line(action_gouv, y="etat", title = 'Action of the gouvernement')
fig.show()

#  JOINING THE DATA

In [196]:
date = '2019-01-01'
departement = dict()
for i in france.code: 
    if i != '75':
        df_dst_dep        = df_dst.loc[df_dst['dep']==i].set_index('time').drop(['dep'], axis =1)
        covid_dep         = covid.loc[covid.dep == i].set_index('jour')
        covid_dep         = covid_dep.loc[covid_dep.sexe != 0]
        covid_dep['dc']   = covid_dep['dc'].apply(lambda x : 0 if x<0 else x,  meta=('int'))
        covid_dep['rea']  = covid_dep['rea'].rolling(window=3).mean()
        covid_dep['hosp'] = covid_dep['hosp'].rolling(window=3).mean()
        departement[i] = covid_dep.join(df_dst_dep)
        departement[i] = departement[i].join(action_gouv)
        departement[i] = departement[i].dropna().compute()
    else :
        pass

In [198]:
df_full  = pd.concat([departement[key] for key in departement.keys()]).reset_index()
df_full['time'] = df_full['index']
df_full = df_full.drop(['index'], axis = 1)
df_full.to_csv('df_full.csv')
df_full

Unnamed: 0,dep,sexe,hosp,rea,rad,dc,u10,v10,t2m,tp,etat,time
0,01,1,1.000000,0.000000,1.0,0.0,-0.276857,0.986420,285.38248,0.002949,4,2020-03-19
1,01,2,1.000000,0.000000,0.0,0.0,-0.276857,0.986420,285.38248,0.002949,4,2020-03-19
2,01,1,1.000000,0.000000,1.0,0.0,-0.211989,0.715435,284.94818,0.006653,4,2020-03-20
3,01,2,1.000000,0.000000,0.0,0.0,-0.211989,0.715435,284.94818,0.006653,4,2020-03-20
4,01,1,1.000000,0.000000,1.0,0.0,-0.352148,-0.681619,283.60290,0.003800,4,2020-03-21
...,...,...,...,...,...,...,...,...,...,...,...,...
43885,95,1,218.333333,28.333333,-934.0,0.0,2.768193,2.770717,287.11942,0.004212,4,2020-10-30
43886,95,2,222.666667,19.000000,-1183.0,0.0,2.768193,2.770717,287.11942,0.004212,0,2020-10-30
43887,95,2,222.666667,19.000000,-1183.0,0.0,2.768193,2.770717,287.11942,0.004212,4,2020-10-30
43888,95,1,226.333333,29.000000,-955.0,0.0,0.902014,3.072099,286.10504,0.017505,4,2020-10-31


In [3]:
df_full = pd.read_csv('df_full.csv', parse_dates=['time']).drop(['Unnamed: 0'], axis = 1)
df_full = df_full.rename(columns = {
    'hosp' : 'nb_hospitalisation',
    'rea'  : 'nb_reanimation',
    'rad'  : 'nb_outside_hospital',
    'dc'   : 'nb_death',
    't2m' : 'temperature',
    'tp'   : 'precipitation',
    'u10'  : 'eastward wind',
    'v10'  : 'northward wind',
    'etat' : 'decision_gouvernement'
})
df_full.to_csv('df_full.csv')
df_full

Unnamed: 0,dep,sexe,nb_hospitalisation,nb_reanimation,nb_outside_hospital,nb_death,eastward wind,northward wind,temperature,precipitation,decision_gouvernement,time
0,01,1,1.000000,0.000000,1.0,0.0,-0.276857,0.986420,285.38248,0.002949,4,2020-03-19
1,01,2,1.000000,0.000000,0.0,0.0,-0.276857,0.986420,285.38248,0.002949,4,2020-03-19
2,01,1,1.000000,0.000000,1.0,0.0,-0.211989,0.715435,284.94818,0.006653,4,2020-03-20
3,01,2,1.000000,0.000000,0.0,0.0,-0.211989,0.715435,284.94818,0.006653,4,2020-03-20
4,01,1,1.000000,0.000000,1.0,0.0,-0.352148,-0.681619,283.60290,0.003800,4,2020-03-21
...,...,...,...,...,...,...,...,...,...,...,...,...
43885,95,1,218.333333,28.333333,-934.0,0.0,2.768193,2.770717,287.11942,0.004212,4,2020-10-30
43886,95,2,222.666667,19.000000,-1183.0,0.0,2.768193,2.770717,287.11942,0.004212,0,2020-10-30
43887,95,2,222.666667,19.000000,-1183.0,0.0,2.768193,2.770717,287.11942,0.004212,4,2020-10-30
43888,95,1,226.333333,29.000000,-955.0,0.0,0.902014,3.072099,286.10504,0.017505,4,2020-10-31


### CHECKING THE CORRELATION

In [7]:
fig = px.imshow(df_full.corr(method='spearman'))
fig.show()

## MAPPING

In [11]:
dict_geo = dict()
for dep in france.code:
    dict_geo[dep] = df_full.loc[df_full['dep'] == dep].corr(method='spearman')['nb_hospitalisation']['temperature']
df_corr = pd.DataFrame({'correlation_temperature_hospitalisation' : dict_geo.values(), 'dep' : dict_geo.keys()}).set_index('dep')

In [14]:
geo = france.set_index('code').join(df_corr)
france_correlation = KeplerGl(height=500)
france_correlation.add_data(geo, name= 'correlation_temperature_hospitalisation')
# france_correlation.save_to_html(file_name='index.html', read_only=True)
france_correlation

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to index.html!


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [], 'layers': [{'id': '3yjgq0t', 'type': …