In [1]:
%autosave 0
import requests
import io
import pandas as pd
from datetime import date, timedelta

## scrap data from source
url="https://covid.ourworldindata.org/data/owid-covid-data.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

## this dataset sometime no receive update, so i check the simple link 
verif_data = c[c['date'].isin([f"{date.today()}"])]

if len(verif_data) == 0:
    print("Not today, check other link")
    url="https://covid.ourworldindata.org/data/ecdc/full_data.csv"
    s=requests.get(url).content
    c=pd.read_csv(io.StringIO(s.decode('utf-8')))
    
    verif_data = c[c['date'].isin([f"{date.today()}"])]
    
    ## If with the second link data have not yet be updated, raise Error & try again later
    if len(verif_data) == 0:
        verif_data = c[c['date'].isin([f"{date.today()}"])]
        raise ValueError('Data have not receive Update at this moment, Try later !')
    
verif_data.head(3)

Autosave disabled


Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
191,AFG,Asia,Afghanistan,2020-07-19,35301.0,12.0,1164.0,17.0,906.82,0.308,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
324,ALB,Europe,Albania,2020-07-19,4008.0,102.0,111.0,4.0,1392.731,35.444,...,8.643,11803.431,1.1,304.195,10.08,7.1,51.2,,2.89,78.57
521,DZA,Africa,Algeria,2020-07-19,22549.0,601.0,1068.0,11.0,514.218,13.705,...,3.857,13913.839,0.5,278.364,6.73,0.7,30.4,83.741,1.9,76.88


In [2]:
## dump the file for using it in other nb
c.to_csv(f"AzureSC\\Base_Files\\full_data{date.today()}.csv", index=False)

In [3]:
## Load data from days -1 to evaluate the performances of model
data_load = pd.read_csv(f"AzureSC\\Pred\\predDf{date.today() - timedelta(days=1)}.csv", index_col=0)
data_load

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-07-18,France,175300.0,0.0,30171.0,0.0,0.0,0.0
0,2020-07-18,China,85491.0,0.0,4654.0,0.0,0.0,0.0
0,2020-07-18,Italy,244317.0,0.0,35070.0,0.0,0.0,0.0
0,2020-07-18,Spain,261528.0,0.0,28434.0,0.0,0.0,0.0
0,2020-07-18,United States,3710543.0,0.0,140279.0,0.0,0.0,0.0
0,2020-07-18,World,14274787.0,0.0,603127.0,0.0,0.0,0.0
0,2020-07-18,United Kingdom,294214.0,0.0,45349.0,0.0,0.0,0.0
0,2020-07-18,Germany,201991.0,0.0,9096.0,0.0,0.0,0.0
0,2020-07-18,Iran,272875.0,0.0,13969.0,0.0,0.0,0.0
0,2020-07-18,Turkey,218699.0,0.0,5478.0,0.0,0.0,0.0


In [4]:
def Eval(c,data_load,country):
    """ Factorise function for evaluate all models
    params : 
        c = data scrap today
        data_load = dataset from day -1
        country = country concerned by eval
    """
    df = c
    df = df[df['location'].isin([f"{country}"])]
    sle = df.iloc[-1]
    v0 = sle["date"]
    v1 = sle["total_cases"]
    v2 = sle["total_deaths"]
    
    res_tempo = data_load[data_load['country'].isin([f"{country}"])]
    rez1 = res_tempo.loc[0,"total_cases_predict"]
    rez2 = res_tempo.loc[0,"total_deaths_predict"]
    
    errorCase1 = rez1-v1
    errorCase2 = rez2-v2

    follow_df = pd.DataFrame()
    follow_df.loc[0,"date"] = v0
    follow_df.loc[0,"country"] = country
    follow_df.loc[0,"total_cases_predict"] = rez1
    follow_df.loc[0,"total_cases_real"] = v1
    follow_df.loc[0,"total_deaths_predict"] = rez2
    follow_df.loc[0,"total_deaths_real"] = v2
    follow_df.loc[0,"error_abs_cases"] = errorCase1
    follow_df.loc[0,"error_abs_deaths"] = errorCase2
    
    return follow_df

In [5]:
## call eval function and store dataframe on var's
follow_df1 = Eval(c,data_load,"France")
follow_df2 = Eval(c,data_load,"China")
follow_df3 = Eval(c,data_load,"Italy")
follow_df4 = Eval(c,data_load,"Spain")
follow_df5 = Eval(c,data_load,"United States")
follow_df6 = Eval(c,data_load,"World")
follow_df7 = Eval(c,data_load,"United Kingdom")
follow_df8 = Eval(c,data_load,"Germany")
follow_df9 = Eval(c,data_load,"Iran")
follow_df10 = Eval(c,data_load,"Turkey")
follow_df11 = Eval(c,data_load,"Brazil")

In [6]:
## concatenate all df in 1
frames = [follow_df1, follow_df2, follow_df3, follow_df4,
          follow_df5, follow_df6, follow_df7, follow_df8,
          follow_df9, follow_df10, follow_df11]
rapport = pd.concat(frames)
rapport

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-07-19,France,175300.0,174674.0,30171.0,30152.0,626.0,19.0
0,2020-07-19,China,85491.0,85483.0,4654.0,4646.0,8.0,8.0
0,2020-07-19,Italy,244317.0,244216.0,35070.0,35042.0,101.0,28.0
0,2020-07-18,Spain,261528.0,260255.0,28434.0,28420.0,1273.0,14.0
0,2020-07-19,United States,3710543.0,3711464.0,140279.0,140119.0,-921.0,160.0
0,2020-07-19,World,14274787.0,14267093.0,603127.0,601934.0,7694.0,1193.0
0,2020-07-19,United Kingdom,294214.0,294066.0,45349.0,45273.0,148.0,76.0
0,2020-07-19,Germany,201991.0,201574.0,9096.0,9084.0,417.0,12.0
0,2020-07-19,Iran,272875.0,271606.0,13969.0,13979.0,1269.0,-10.0
0,2020-07-19,Turkey,218699.0,218717.0,5478.0,5475.0,-18.0,3.0


In [7]:
## dump dataset for use it in nb 03, in 03 nb he will be add to full_rapport then delete 
rapport.to_csv(f"AzureSC\\Rapport\\rap{date.today()}.csv")