<b> Source COVID France updated everyday at 7:00 PM : </b> <br>
https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/ <br>

In [None]:
# --- STEP1 ---: Get data source & cleaning
START_TIME = time.time()


def step1(url):
    """
    Get data source & cleaning
    """
    dataf = check_url(url, ';')
    return dataf


URL = ('https://www.data.gouv.fr/fr/datasets'
       '/r/63352e38-d353-4b54-bfd1-f1b3ee1cabd7')
DATA_FRANCE = step1(URL)
print(f"Script execution completed at {NOW}. "
      f"Time: --- {time.time() - START_TIME} seconds ---")
DATA_FRANCE.to_csv(os.path.join(OUTPUT_FOLDER, 'FRANCE_INIT.csv'), sep=";")
if USE_MONGO:
    bob.mongo.save_df(DATA_FRANCE, 'FRANCE_INIT', DB_SRC, True)
# DATA_FRANCE

In [None]:
# --- STEP2 ---: Create DB with data source (output = DB_CONCAT)
START_TIME = time.time()


def step2(dataf):
    """
    Create DB with data source (output = DB_CONCAT)
    """
    dataf = dataf[(dataf['sexe'] == 0)].drop(['sexe'], axis=1)
    dataf.columns = dataf.columns.str.upper()
    cols_to_keep = ['DEP', 'JOUR']
    dataf = dataf.melt(id_vars=cols_to_keep,
                       value_name='VALUE',
                       var_name='STATUS').fillna(0)
    return dataf


DB_CONCAT = step2(DATA_FRANCE)
print(f"Script execution completed at {NOW}. "
      f"Time: --- {time.time() - START_TIME} secnds ---")
DB_CONCAT.to_csv(os.path.join(OUTPUT_FOLDER,
                              'FRANCE_DB_CONCAT.csv'), sep=";")
if USE_MONGO:
    bob.mongo.save_df(DB_CONCAT, 'FRANCE_DB_CONCAT', DB_SRC, True)
# DB_CONCAT

In [None]:
# --- STEP3 ---: Consolidate and enrich data (output = DB_CONSO)
START_TIME = time.time()


def step3(dataf, ref):
    """
    Consolidate and enrich data (output = DB_CONSO)
    """
    # -- Merge France
    dataf['DEP'] = dataf['DEP'].astype(str)
    ref['DEP_CODE'] = ref['DEP_CODE'].astype(str)
    ref['DEP_FULL'] = ref['DEP_CODE'] + ' - ' + ref['DEP_NAME']
    cols_to_rename = {"REG_NAME": "LABEL_GROUPS",
                      'DEP_FULL': 'LABEL',
                      'JOUR': 'DATE'}
    dataf = pd.merge(dataf, ref,
                     left_on='DEP',
                     right_on='DEP_CODE',
                     how='left').drop(['DEP', 'DEP_CODE', 'DEP_NAME'],
                                      axis=1).rename(index=str,
                                                     columns=cols_to_rename)

    # -- Conso Region
    df_re = dataf.copy()
    df_re['LABEL'] = df_re['LABEL_GROUPS']
    df_re['LABEL_GROUPS'] = 'France'

    # -- Conso France
    df_fr = df_re.copy()
    df_fr['LABEL'] = df_re['LABEL_GROUPS']
    df_fr['LABEL_GROUPS'] = 'France'

    dataf = pd.concat([dataf, df_re, df_fr], axis=0)
    cols_to_group = ['DATE', 'STATUS', 'LABEL', 'LABEL_GROUPS']
    dataf = dataf.groupby(cols_to_group, as_index=False).agg({'VALUE': 'sum'})
    return dataf.reset_index(drop=True)


DB_CONSO = step3(DB_CONCAT, REF_FRANCE)
print(f"Script execution completed at {NOW}. "
      "Time: --- {time.time() - START_TIME} secnds ---")
DB_CONSO.to_csv(os.path.join(OUTPUT_FOLDER, 'FRANCE_DB_CONSO.csv'), sep=';')
if USE_MONGO:
    bob.mongo.save_df(DB_CONSO, 'FRANCE_DB_CONSO', DB_SRC, True)
# DB_CONSO

In [None]:
# --- STEP4 ---: Calculate KPIs (output = DB_ALL)
START_TIME = time.time()


def step4(dataf):
    """
    Calculate KPIs (output = DB_ALL)
    """
    dataf = dataf.copy().sort_values(by='DATE', ascending=False)
    # -- Calc var vs yesterday
    # Add variable date
    fmt = '%Y-%m-%d'
    dataf['DATE'] = pd.to_datetime(dataf['DATE'], format=fmt)
    dataf['LAST_DAY'] = pd.to_datetime(dataf['DATE'] + timedelta(days=-1))

    # Create new df
    cols_to_rename = {"DATE": "LAST_DAY", 'VALUE': 'VALUE_D-1'}
    df_last = dataf.drop(['LAST_DAY'], axis=1).rename(index=str,
                                                      columns=cols_to_rename)

    # Merge variation
    cols_to_merge = ['LAST_DAY', 'STATUS', 'LABEL', 'LABEL_GROUPS']
    dataf = dataf.merge(df_last, on=cols_to_merge)
    # .drop(['LAST_DAY'], axis=1)
    dataf['SCENARIO'] = pd.to_datetime(dataf['DATE'],
                                       format=fmt).dt.strftime('%d/%m/%Y')
    dataf['DATE_ORDER'] = pd.to_datetime(dataf['DATE'],
                                         format=fmt).dt.strftime('%Y%m%d')

    # calc var
    dataf['VARV'] = dataf['VALUE'] - dataf['VALUE_D-1']
    dataf['VARP'] = dataf['VARV'] / abs(dataf['VALUE_D-1']) * 100

    # -- Rename status
    dataf.loc[dataf['STATUS'] == 'HOSP', 'STATUS_NAME'] = 'Hospitalisations'
    dataf.loc[dataf['STATUS'] == 'REA', 'STATUS_NAME'] = 'En réanimation'
    dataf.loc[dataf['STATUS'] == 'RAD', 'STATUS_NAME'] = 'Retours à domicile'
    dataf.loc[dataf['STATUS'] == 'DC', 'STATUS_NAME'] = 'Décès à l’hôpital'
    dataf.loc[dataf['STATUS'] == 'HOSP', 'STATUS_ORDER'] = 1
    dataf.loc[dataf['STATUS'] == 'REA', 'STATUS_ORDER'] = 2
    dataf.loc[dataf['STATUS'] == 'RAD', 'STATUS_ORDER'] = 3
    dataf.loc[dataf['STATUS'] == 'DC', 'STATUS_ORDER'] = 4
    return dataf.reset_index(drop=True)


DB_ALL = step4(DB_CONSO)
print(f"Script execution completed at {NOW}. "
      f"Time: --- {time.time() - START_TIME} secnds ---")
DB_ALL.to_csv(os.path.join(OUTPUT_FOLDER, 'FRANCE_DB_ALL.csv'), sep=";")
if USE_MONGO:
    bob.mongo.save_df(DB_ALL, 'FRANCE_DB_ALL', DB_SRC, True)
DB_ALL

In [None]:
# --- STEP5 ---: DB_TREND (output = DB_TREND)
START_TIME = time.time()


def step5(dataf):
    """
    DB_TREND (output = DB_TREND)
    """
    # -- Melt CALC in rows
    cols_to_keep = ['LABEL', 'LABEL_GROUPS', 'SCENARIO', 'DATE', 'DATE_ORDER',
                    'STATUS', 'STATUS_NAME', 'STATUS_ORDER']
    dataf = dataf.drop(['VALUE_D-1',
                        'LAST_DAY'], axis=1).melt(id_vars=cols_to_keep,
                                                  value_name='VALUE',
                                                  var_name='METRIC')

    # -- Get units and precisions
    dataf.loc[:, 'UNIT'] = " cas"
    dataf.loc[dataf['METRIC'] == 'VARP', 'UNIT'] = " %"
    dataf.loc[dataf['METRIC'] == 'VALUE', 'PRECISION'] = ",.0f"
    dataf.loc[dataf['METRIC'] == 'VARV', 'PRECISION'] = "+,.0f"
    dataf.loc[dataf['METRIC'] == 'VARP', 'PRECISION'] = "+,.2f"
    dataf = dataf.reset_index(drop=True)
    return dataf


DB_TREND = step5(DB_ALL)
print(f"Script execution completed at {NOW}. "
      f"Time: --- {time.time() - START_TIME} secnds ---")
DB_TREND.to_csv(os.path.join(OUTPUT_FOLDER, 'FRANCE_DB_TREND.csv'), sep=";")
if USE_MONGO:
    bob.mongo.save_df(DB_ALL, 'FRANCE_DB_TREND', DB_SRC, True)
DB_TREND