In [7]:
#  bob.refresh {"cron": "0,25 19-22 * * *", "timeout": 40000}

In [8]:
import bob
hc = bob.HealthCheck()
hc_key = "a0475c76-9c0c-41b3-a001-354b535171df"
start = hc.start(hc_key)
print(start)

HealthCheck init in prod mode
Start ==> send to https://health.cashstory.com/ping/a0475c76-9c0c-41b3-a001-354b535171df, 2020-04-07


In [9]:
%run __init__.ipynb

Successfully connected to MongoDB


In [10]:
#--- STEP1 ---: Get data source & cleaning
start_time = time.time()
def get_datasource(url):
    df = pd.DataFrame()
    try:
        df = pd.read_csv(url, sep=';')
    except Exception as e:
        print(f'Error connecting to {url}.')
        print(e.__doc__)
        print(str(e)) 
    return df

url = 'https://www.data.gouv.fr/fr/datasets/r/63352e38-d353-4b54-bfd1-f1b3ee1cabd7'
dataFrance = get_datasource(url)
ref_france = pd.read_excel(input_folder + 'REF_WSR.xlsx', sheet_name = 'REF_FRANCE')
print("Script execution completed at " + now + ". Time: --- %s secnds ---" % (time.time() - start_time))

Script execution completed at 07/04/2020 19:05:28. Time: --- 1.1002347469329834 secnds ---


In [11]:
#--- STEP2 ---: Create DB with data source (output = DB_CONCAT)
start_time = time.time()
def step2(df):
    df= df[(df['sexe'] == 0)].drop(['sexe'], axis=1)
    df.columns= df.columns.str.upper()
    cols_to_keep= ['DEP','JOUR']
    df= df.melt(id_vars=cols_to_keep ,value_name='VALUE',var_name='STATUS').fillna(0)
    return df

db_concat = step2(dataFrance)
print("Script execution completed at " + now + ". Time: --- %s secnds ---" % (time.time() - start_time))
db_concat.to_csv(output_folder + 'FRANCE_DB_CONCAT.csv',sep=";")
bob.mongo.save_df(db_concat,'FRANCE_DB_CONCAT',db_src,True)
# db_concat

Script execution completed at 07/04/2020 19:05:28. Time: --- 0.010419130325317383 secnds ---
Dataframe FRANCE_DB_CONCAT successfully save in database covid-19-dev in MongoDB. Time: --- 0.22269725799560547 secnds ---


In [12]:
#--- STEP3 ---: Consolidate and enrich data (output = DB_CONSO)
start_time = time.time()
def step3(df,ref):
    #-- Merge France
    df['DEP'] = df['DEP'].astype(str)
    ref['DEP_CODE'] = ref['DEP_CODE'].astype(str)
    ref['DEP_FULL'] = ref['DEP_CODE'] + ' - ' + ref['DEP_NAME']
    cols_to_rename = {"REG_NAME": "LABEL_GROUPS",'DEP_FULL':'LABEL','JOUR':'DATE'}
    df= pd.merge(df,ref, left_on='DEP', right_on='DEP_CODE',how='left').drop(['DEP','DEP_CODE','DEP_NAME'], axis=1).rename(index=str, columns=cols_to_rename)
    
    #-- Conso Region
    df_re = df.copy()
    df_re['LABEL'] = df_re['LABEL_GROUPS']
    df_re['LABEL_GROUPS'] = 'France'
    
    #-- Conso France
    df_fr = df_re.copy()
    df_fr['LABEL'] = df_re['LABEL_GROUPS']
    df_fr['LABEL_GROUPS'] = 'France'
    
    df = pd.concat([df,df_re,df_fr],axis=0)
    cols_to_group = ['DATE','STATUS','LABEL','LABEL_GROUPS']
    df = df.groupby(cols_to_group, as_index=False).agg({'VALUE':'sum'})
    return df.reset_index(drop=True)

db_conso = step3(db_concat,ref_france)
print("Script execution completed at " + now + ". Time: --- %s secnds ---" % (time.time() - start_time))
db_conso.to_csv(output_folder + 'FRANCE_DB_CONSO.csv', sep=';')
bob.mongo.save_df(db_conso,'FRANCE_DB_CONSO',db_src,True)
# db_conso

Script execution completed at 07/04/2020 19:05:28. Time: --- 0.04443168640136719 secnds ---
Dataframe FRANCE_DB_CONSO successfully save in database covid-19-dev in MongoDB. Time: --- 0.33692455291748047 secnds ---


In [13]:
#--- STEP4 ---: Calculate KPIs (output = DB_ALL)
start_time = time.time()
def step4(df):
    df= df.copy().sort_values(by='DATE',ascending=False)
    #-- Calc var vs yesterday
    #Add variable date
    df['DATE']= pd.to_datetime(df['DATE'], format='%Y-%m-%d')
    df['LAST_DAY']= pd.to_datetime(df['DATE'] + timedelta(days=-1))
    
    #Create new df
    cols_to_rename = {"DATE": "LAST_DAY",'VALUE':'VALUE_D-1'}
    df_last = df.drop(['LAST_DAY'],axis=1).rename(index=str, columns=cols_to_rename)
    
    #Merge variation
    cols_to_merge = ['LAST_DAY','STATUS','LABEL','LABEL_GROUPS']
    df = df.merge(df_last, on=cols_to_merge)#.drop(['LAST_DAY'],axis=1)
    df['SCENARIO']= pd.to_datetime(df['DATE'], format='%Y-%m-%d').dt.strftime('%d/%m/%Y')
    df['DATE_ORDER']= pd.to_datetime(df['DATE'], format='%Y-%m-%d').dt.strftime('%Y%m%d')
    
    #calc var
    df['VARV'] = df['VALUE'] - df['VALUE_D-1'] 
    df['VARP'] = df['VARV'] / abs(df['VALUE_D-1']) * 100
    
    #-- Rename status
    df.loc[df['STATUS'] == 'HOSP','STATUS_NAME'] = 'Hospitalisations'
    df.loc[df['STATUS'] == 'REA','STATUS_NAME'] = 'En réanimation'
    df.loc[df['STATUS'] == 'RAD','STATUS_NAME'] = 'Retours à domicile'
    df.loc[df['STATUS'] == 'DC','STATUS_NAME'] = 'Décès à l’hôpital'
    df.loc[df['STATUS'] == 'HOSP','STATUS_ORDER'] = 1
    df.loc[df['STATUS'] == 'REA','STATUS_ORDER'] = 2
    df.loc[df['STATUS'] == 'RAD','STATUS_ORDER'] = 3
    df.loc[df['STATUS'] == 'DC','STATUS_ORDER'] = 4
    return df.reset_index(drop=True)

db_all=step4(db_conso)    
print("Script execution completed at " + now + ". Time: --- %s secnds ---" % (time.time() - start_time))
db_all.to_csv(output_folder + 'FRANCE_DB_ALL.csv',sep=";")
bob.mongo.save_df(db_all,'FRANCE_DB_ALL',db_src,True)
# db_all=db_all[(db_all['STATUS'] == 'REA') & (db_all['LABEL'] == 'France')]
# db_all.head(5)

Script execution completed at 07/04/2020 19:05:28. Time: --- 0.18207144737243652 secnds ---
Dataframe FRANCE_DB_ALL successfully save in database covid-19-dev in MongoDB. Time: --- 0.6737484931945801 secnds ---


In [14]:
done = hc.done(hc_key)
print(done)

End ==> send to https://health.cashstory.com/ping/a0475c76-9c0c-41b3-a001-354b535171df, 2020-04-07
