<b> Source COVID France updated everyday at 7:00 PM : </b> <br>
https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/ <br>

In [None]:
%run __init__.ipynb

In [None]:
%%time

#--- STEP1 ---: Get data source & cleaning
def step1(url):
    df = check_url(url, ';')
    return df

url = 'https://www.data.gouv.fr/fr/datasets/r/63352e38-d353-4b54-bfd1-f1b3ee1cabd7'
dataFrance=step1(url)
df_save(dataFrance,'FRANCE_INIT', 'csv')
if USE_MONGO:
    naas_drivers.mongo.send(dataFrance,'FRANCE_INIT', DB_SRC, True)
dataFrance

In [None]:
%%time

#--- STEP2 ---: Create DB with data source (output = DB_CONCAT)
def step2(df):
    df= df[(df['sexe'] == 0)].drop(['sexe'], axis=1)
    df.columns= df.columns.str.upper()
    cols_to_keep= ['DEP','JOUR']
    df= df.melt(id_vars=cols_to_keep ,value_name='VALUE',var_name='STATUS').fillna(0)
    return df

db_concat = step2(dataFrance)
df_save(db_concat,'FRANCE_DB_CONCAT','csv')
if USE_MONGO:
    naas_drivers.mongo.send(db_concat,'FRANCE_DB_CONCAT',DB_SRC,True)
# db_concat

In [None]:
%%time

#--- STEP3 ---: Consolidate and enrich data (output = DB_CONSO)
def step3(df,ref):
    #-- Merge France
    df['DEP'] = df['DEP'].astype(str)
    ref['DEP_CODE'] = ref['DEP_CODE'].astype(str)
    ref['DEP_FULL'] = ref['DEP_CODE'] + ' - ' + ref['DEP_NAME']
    cols_to_rename = {"REG_NAME": "LABEL_GROUPS",'DEP_FULL':'LABEL','JOUR':'DATE'}
    df= pd.merge(df,ref, left_on='DEP', right_on='DEP_CODE',how='left').drop(['DEP','DEP_CODE','DEP_NAME'], axis=1).rename(index=str, columns=cols_to_rename)
    
    #-- Conso Region
    df_re = df.copy()
    df_re['LABEL'] = df_re['LABEL_GROUPS']
    df_re['LABEL_GROUPS'] = 'France'
    
    #-- Conso France
    df_fr = df_re.copy()
    df_fr['LABEL'] = df_re['LABEL_GROUPS']
    df_fr['LABEL_GROUPS'] = 'France'
    
    df = pd.concat([df,df_re,df_fr],axis=0)
    df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%d', infer_datetime_format=True)
    
    cols_to_group = ['DATE','STATUS','LABEL','LABEL_GROUPS']
    df = df.groupby(cols_to_group, as_index=False).agg({'VALUE':'sum'})
    return df.reset_index(drop=True)

db_conso = step3(db_concat,REF_FRANCE)
df_save(db_conso,'FRANCE_DB_CONSO','csv')
if USE_MONGO:
    naas_drivers.mongo.send(db_conso,'FRANCE_DB_CONSO',DB_SRC,True)
db_conso

In [None]:
%%time

#--- STEP4 ---: Calculate KPIs (output = DB_ALL)
def step4(df):
    df = df.copy().sort_values(by='DATE', 
                               ascending=False)
    #-- Calc var vs yesterday
    #Add variable date
    df['LAST_DAY']= df['DATE'] + timedelta(days=-1)
    
    #Create new df
    cols_to_rename = {"DATE": "LAST_DAY",'VALUE':'VALUE_D-1'}
    df_last = df.drop(['LAST_DAY'],axis=1).rename(index=str, columns=cols_to_rename)
    
    #Merge variation
    cols_to_merge = ['LAST_DAY','STATUS','LABEL','LABEL_GROUPS']
    df = df.merge(df_last, on=cols_to_merge)#.drop(['LAST_DAY'],axis=1)
    df['SCENARIO']= df['DATE'].dt.strftime('%d/%m/%Y')
    df['DATE_ORDER']= df['DATE'].dt.strftime('%Y%m%d')
    
    #calc var
    df['VARV'] = df['VALUE'] - df['VALUE_D-1'] 
    df['VARP'] = df['VARV'] / abs(df['VALUE_D-1']) * 100
    
    #-- Rename status
    df.loc[df['STATUS'] == 'HOSP','STATUS_NAME'] = 'Hospitalisations'
    df.loc[df['STATUS'] == 'REA','STATUS_NAME'] = 'En réanimation'
    df.loc[df['STATUS'] == 'RAD','STATUS_NAME'] = 'Retours à domicile'
    df.loc[df['STATUS'] == 'DC','STATUS_NAME'] = 'Décès à l’hôpital'
    df.loc[df['STATUS'] == 'HOSP','STATUS_ORDER'] = 1
    df.loc[df['STATUS'] == 'REA','STATUS_ORDER'] = 2
    df.loc[df['STATUS'] == 'RAD','STATUS_ORDER'] = 3
    df.loc[df['STATUS'] == 'DC','STATUS_ORDER'] = 4
    return df.reset_index(drop=True)

db_all=step4(db_conso)    
df_save(db_all,'FRANCE_DB_ALL','csv')
if USE_MONGO:
    naas_drivers.mongo.send(db_all,'FRANCE_DB_ALL',DB_SRC,True)
# db_all

In [None]:
%%time

#--- STEP5 ---: DB_TREND (output = DB_TREND)
def step5(df):
    #-- Melt CALC in rows
    cols_to_keep = ['LABEL','LABEL_GROUPS','SCENARIO','DATE','DATE_ORDER','STATUS','STATUS_NAME','STATUS_ORDER']
    df= df.drop(['VALUE_D-1','LAST_DAY'],axis=1).melt(id_vars=cols_to_keep, value_name='VALUE',var_name='METRIC')

    #-- Get units and precisions
    df.loc[:, 'UNIT'] = " cas"
    df.loc[df['METRIC'] == 'VARP', 'UNIT'] = " %"
    df.loc[df['METRIC'] == 'VALUE', 'PRECISION'] = ",.0f"
    df.loc[df['METRIC'] == 'VARV', 'PRECISION'] = "+,.0f"
    df.loc[df['METRIC'] == 'VARP', 'PRECISION'] = "+,.2f"
    df = df.reset_index(drop=True)
    return df

db_trend=step5(db_all)   
df_save(db_trend,'FRANCE_DB_TREND','csv')
if USE_MONGO:
    naas_drivers.mongo.send(db_trend,'FRANCE_DB_TREND', DB_SRC, True)
# db_trend