<b> Source COVID France updated everyday at 7:00 PM : </b> <br>
https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/ <br>

In [16]:
#--- STEP1 ---: Get data source & cleaning
start_time = time.time()
def step1(url):
    df = check_url(url,';')
    return df

url = 'https://www.data.gouv.fr/fr/datasets/r/63352e38-d353-4b54-bfd1-f1b3ee1cabd7'
dataFrance=step1(url)
print("Script execution completed at " + now + ". Time: --- %s secnds ---" % (time.time() - start_time))
# dataFrance

Script execution completed at 13/04/2020 08:43:34. Time: --- 0.79891037940979 secnds ---


In [17]:
#--- STEP2 ---: Create DB with data source (output = DB_CONCAT)
start_time = time.time()
def step2(df):
    df= df[(df['sexe'] == 0)].drop(['sexe'], axis=1)
    df.columns= df.columns.str.upper()
    cols_to_keep= ['DEP','JOUR']
    df= df.melt(id_vars=cols_to_keep ,value_name='VALUE',var_name='STATUS').fillna(0)
    return df

db_concat = step2(dataFrance)
print("Script execution completed at " + now + ". Time: --- %s secnds ---" % (time.time() - start_time))
db_concat.to_csv(output_folder + 'FRANCE_DB_CONCAT.csv',sep=";")
if use_mongo:
    bob.mongo.save_df(db_concat,'FRANCE_DB_CONCAT',db_src,True)
# db_concat

Script execution completed at 13/04/2020 08:43:34. Time: --- 0.010662555694580078 secnds ---
Dataframe FRANCE_DB_CONCAT successfully save in database covid-19-dev in MongoDB. Time: --- 0.35415196418762207 secnds ---


In [18]:
#--- STEP3 ---: Consolidate and enrich data (output = DB_CONSO)
start_time = time.time()
def step3(df,ref):
    #-- Merge France
    df['DEP'] = df['DEP'].astype(str)
    ref['DEP_CODE'] = ref['DEP_CODE'].astype(str)
    ref['DEP_FULL'] = ref['DEP_CODE'] + ' - ' + ref['DEP_NAME']
    cols_to_rename = {"REG_NAME": "LABEL_GROUPS",'DEP_FULL':'LABEL','JOUR':'DATE'}
    df= pd.merge(df,ref, left_on='DEP', right_on='DEP_CODE',how='left').drop(['DEP','DEP_CODE','DEP_NAME'], axis=1).rename(index=str, columns=cols_to_rename)
    
    #-- Conso Region
    df_re = df.copy()
    df_re['LABEL'] = df_re['LABEL_GROUPS']
    df_re['LABEL_GROUPS'] = 'France'
    
    #-- Conso France
    df_fr = df_re.copy()
    df_fr['LABEL'] = df_re['LABEL_GROUPS']
    df_fr['LABEL_GROUPS'] = 'France'
    
    df = pd.concat([df,df_re,df_fr],axis=0)
    cols_to_group = ['DATE','STATUS','LABEL','LABEL_GROUPS']
    df = df.groupby(cols_to_group, as_index=False).agg({'VALUE':'sum'})
    return df.reset_index(drop=True)

db_conso = step3(db_concat,ref_france)
print("Script execution completed at " + now + ". Time: --- %s secnds ---" % (time.time() - start_time))
db_conso.to_csv(output_folder + 'FRANCE_DB_CONSO.csv', sep=';')
if use_mongo:
    bob.mongo.save_df(db_conso,'FRANCE_DB_CONSO',db_src,True)
# db_conso

Script execution completed at 13/04/2020 08:43:34. Time: --- 0.05619692802429199 secnds ---
Dataframe FRANCE_DB_CONSO successfully save in database covid-19-dev in MongoDB. Time: --- 0.38308167457580566 secnds ---


In [20]:
#--- STEP4 ---: Calculate KPIs (output = DB_ALL)
start_time = time.time()
def step4(df):
    df= df.copy().sort_values(by='DATE',ascending=False)
    #-- Calc var vs yesterday
    #Add variable date
    df['DATE']= pd.to_datetime(df['DATE'], format='%Y-%m-%d')
    df['LAST_DAY']= pd.to_datetime(df['DATE'] + timedelta(days=-1))
    
    #Create new df
    cols_to_rename = {"DATE": "LAST_DAY",'VALUE':'VALUE_D-1'}
    df_last = df.drop(['LAST_DAY'],axis=1).rename(index=str, columns=cols_to_rename)
    
    #Merge variation
    cols_to_merge = ['LAST_DAY','STATUS','LABEL','LABEL_GROUPS']
    df = df.merge(df_last, on=cols_to_merge)#.drop(['LAST_DAY'],axis=1)
    df['SCENARIO']= pd.to_datetime(df['DATE'], format='%Y-%m-%d').dt.strftime('%d/%m/%Y')
    df['DATE_ORDER']= pd.to_datetime(df['DATE'], format='%Y-%m-%d').dt.strftime('%Y%m%d')
    
    #calc var
    df['VARV'] = df['VALUE'] - df['VALUE_D-1'] 
    df['VARP'] = df['VARV'] / abs(df['VALUE_D-1']) * 100
    
    #-- Rename status
    df.loc[df['STATUS'] == 'HOSP','STATUS_NAME'] = 'Hospitalisations'
    df.loc[df['STATUS'] == 'REA','STATUS_NAME'] = 'En réanimation'
    df.loc[df['STATUS'] == 'RAD','STATUS_NAME'] = 'Retours à domicile'
    df.loc[df['STATUS'] == 'DC','STATUS_NAME'] = 'Décès à l’hôpital'
    df.loc[df['STATUS'] == 'HOSP','STATUS_ORDER'] = 1
    df.loc[df['STATUS'] == 'REA','STATUS_ORDER'] = 2
    df.loc[df['STATUS'] == 'RAD','STATUS_ORDER'] = 3
    df.loc[df['STATUS'] == 'DC','STATUS_ORDER'] = 4
    return df.reset_index(drop=True)

db_all=step4(db_conso)    
print("Script execution completed at " + now + ". Time: --- %s secnds ---" % (time.time() - start_time))
db_all.to_csv(output_folder + 'FRANCE_DB_ALL.csv',sep=";")
if use_mongo:
    bob.mongo.save_df(db_all,'FRANCE_DB_ALL',db_src,True)
db_all

Script execution completed at 13/04/2020 08:43:34. Time: --- 0.18233466148376465 secnds ---
Dataframe FRANCE_DB_ALL successfully save in database covid-19-dev in MongoDB. Time: --- 0.7130308151245117 secnds ---


Unnamed: 0,DATE,STATUS,LABEL,LABEL_GROUPS,VALUE,LAST_DAY,VALUE_D-1,SCENARIO,DATE_ORDER,VARV,VARP,STATUS_NAME,STATUS_ORDER
0,2020-04-12,REA,Provence-Alpes-Côte d'Azur,France,438,2020-04-11,436,12/04/2020,20200412,2,0.458716,En réanimation,2.0
1,2020-04-12,HOSP,Provence-Alpes-Côte d'Azur,France,1830,2020-04-11,1766,12/04/2020,20200412,64,3.624009,Hospitalisations,1.0
2,2020-04-12,HOSP,31 - Haute-Garonne,Occitanie,244,2020-04-11,239,12/04/2020,20200412,5,2.092050,Hospitalisations,1.0
3,2020-04-12,HOSP,32 - Gers,Occitanie,28,2020-04-11,23,12/04/2020,20200412,5,21.739130,Hospitalisations,1.0
4,2020-04-12,HOSP,33 - Gironde,Nouvelle-Aquitaine,299,2020-04-11,308,12/04/2020,20200412,-9,-2.922078,Hospitalisations,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11995,2020-03-19,RAD,87 - Haute-Vienne,Nouvelle-Aquitaine,2,2020-03-18,2,19/03/2020,20200319,0,0.000000,Retours à domicile,3.0
11996,2020-03-19,RAD,80 - Somme,Hauts-de-France,12,2020-03-18,9,19/03/2020,20200319,3,33.333333,Retours à domicile,3.0
11997,2020-03-19,RAD,79 - Deux-Sèvres,Nouvelle-Aquitaine,3,2020-03-18,2,19/03/2020,20200319,1,50.000000,Retours à domicile,3.0
11998,2020-03-19,RAD,78 - Yvelines,Ile-de-France,7,2020-03-18,5,19/03/2020,20200319,2,40.000000,Retours à domicile,3.0


In [25]:
#--- STEP5 ---: DB_TREND (output = DB_TREND)
start_time = time.time()
def step5(df):
    #-- Melt CALC in rows
    cols_to_keep = ['LABEL','LABEL_GROUPS','SCENARIO','DATE','DATE_ORDER','STATUS','STATUS_NAME','STATUS_ORDER']
    df= df.drop(['VALUE_D-1','LAST_DAY'],axis=1).melt(id_vars=cols_to_keep, value_name='VALUE',var_name='METRIC')

    #-- Get units and precisions
    df.loc[:, 'UNIT'] = " cas"
    df.loc[df['METRIC'] == 'VARP', 'UNIT'] = " %"
    df.loc[df['METRIC'] == 'VALUE', 'PRECISION'] = ",.0f"
    df.loc[df['METRIC'] == 'VARV', 'PRECISION'] = "+,.0f"
    df.loc[df['METRIC'] == 'VARP', 'PRECISION'] = "+,.2f"
    df = df.reset_index(drop=True)
    return df

db_trend=step5(db_all)   
print("Script execution completed at " + now + ". Time: --- %s secnds ---" % (time.time() - start_time))
db_trend.to_csv(output_folder + 'FRANCE_DB_TREND.csv',sep=";")
if use_mongo:
    bob.mongo.save_df(db_all,'FRANCE_DB_TREND',db_src,True)
db_trend

Script execution completed at 13/04/2020 08:43:34. Time: --- 0.06862640380859375 secnds ---
Dataframe FRANCE_DB_TREND successfully save in database covid-19-dev in MongoDB. Time: --- 0.8253769874572754 secnds ---


Unnamed: 0,LABEL,LABEL_GROUPS,SCENARIO,DATE,DATE_ORDER,STATUS,STATUS_NAME,STATUS_ORDER,METRIC,VALUE,UNIT,PRECISION
0,Provence-Alpes-Côte d'Azur,France,12/04/2020,2020-04-12,20200412,REA,En réanimation,2.0,VALUE,438.000000,cas,",.0f"
1,Provence-Alpes-Côte d'Azur,France,12/04/2020,2020-04-12,20200412,HOSP,Hospitalisations,1.0,VALUE,1830.000000,cas,",.0f"
2,31 - Haute-Garonne,Occitanie,12/04/2020,2020-04-12,20200412,HOSP,Hospitalisations,1.0,VALUE,244.000000,cas,",.0f"
3,32 - Gers,Occitanie,12/04/2020,2020-04-12,20200412,HOSP,Hospitalisations,1.0,VALUE,28.000000,cas,",.0f"
4,33 - Gironde,Nouvelle-Aquitaine,12/04/2020,2020-04-12,20200412,HOSP,Hospitalisations,1.0,VALUE,299.000000,cas,",.0f"
...,...,...,...,...,...,...,...,...,...,...,...,...
35995,87 - Haute-Vienne,Nouvelle-Aquitaine,19/03/2020,2020-03-19,20200319,RAD,Retours à domicile,3.0,VARP,0.000000,%,"+,.2f"
35996,80 - Somme,Hauts-de-France,19/03/2020,2020-03-19,20200319,RAD,Retours à domicile,3.0,VARP,33.333333,%,"+,.2f"
35997,79 - Deux-Sèvres,Nouvelle-Aquitaine,19/03/2020,2020-03-19,20200319,RAD,Retours à domicile,3.0,VARP,50.000000,%,"+,.2f"
35998,78 - Yvelines,Ile-de-France,19/03/2020,2020-03-19,20200319,RAD,Retours à domicile,3.0,VARP,40.000000,%,"+,.2f"
