In [1]:
%autosave 0
import requests
import io
import pandas as pd
from datetime import date, timedelta

## scrap data from source
url="https://covid.ourworldindata.org/data/owid-covid-data.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

## this dataset sometime no receive update, so i check the simple link 
verif_data = c[c['date'].isin([f"{date.today()}"])]

if len(verif_data) == 0:
    print("Not today, check other link")
    url="https://covid.ourworldindata.org/data/ecdc/full_data.csv"
    s=requests.get(url).content
    c=pd.read_csv(io.StringIO(s.decode('utf-8')))
    
    verif_data = c[c['date'].isin([f"{date.today()}"])]
    
    ## If with the second link data have not yet be updated, raise Error & try again later
    if len(verif_data) == 0:
        verif_data = c[c['date'].isin([f"{date.today()}"])]
        raise ValueError('Data have not receive Update at this moment, Try later !')
    
verif_data.head(3)

Autosave disabled


Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand
155,AFG,Asia,Afghanistan,2020-06-13,23546.0,656.0,446.0,20.0,604.855,16.851,...,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5
252,ALB,Europe,Albania,2020-06-13,1416.0,31.0,36.0,1.0,492.043,10.772,...,13.188,8.643,11803.431,1.1,304.195,10.08,7.1,51.2,,2.89
413,DZA,Africa,Algeria,2020-06-13,10698.0,109.0,751.0,10.0,243.962,2.486,...,6.211,3.857,13913.839,0.5,278.364,6.73,0.7,30.4,83.741,1.9


In [2]:
## dump the file for using it in other nb
c.to_csv(f"AzureSC\\Base_Files\\full_data{date.today()}.csv", index=False)

In [3]:
## Load data from days -1 to evaluate the performances of model
data_load = pd.read_csv(f"AzureSC\\Pred\\predDf{date.today() - timedelta(days=1)}.csv", index_col=0)
data_load

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-06-12,France,156090.0,0.0,29375.0,0.0,0.0,0.0
0,2020-06-12,China,84275.0,0.0,4651.0,0.0,0.0,0.0
0,2020-06-12,Italy,236480.0,0.0,34258.0,0.0,0.0,0.0
0,2020-06-12,Spain,243075.0,0.0,27147.0,0.0,0.0,0.0
0,2020-06-12,United States,2046527.0,0.0,114863.0,0.0,0.0,0.0
0,2020-06-12,World,7631031.0,0.0,426764.0,0.0,0.0,0.0
0,2020-06-12,United Kingdom,292833.0,0.0,41563.0,0.0,0.0,0.0
0,2020-06-12,Germany,186282.0,0.0,8798.0,0.0,0.0,0.0
0,2020-06-12,Iran,182518.0,0.0,8667.0,0.0,0.0,0.0
0,2020-06-12,Turkey,174909.0,0.0,4779.0,0.0,0.0,0.0


In [4]:
def Eval(c,data_load,country):
    """ Factorise function for evaluate all models
    params : 
        c = data scrap today
        data_load = dataset from day -1
        country = country concerned by eval
    """
    df = c
    df = df[df['location'].isin([f"{country}"])]
    sle = df.iloc[-1]
    v0 = sle["date"]
    v1 = sle["total_cases"]
    v2 = sle["total_deaths"]
    
    res_tempo = data_load[data_load['country'].isin([f"{country}"])]
    rez1 = res_tempo.loc[0,"total_cases_predict"]
    rez2 = res_tempo.loc[0,"total_deaths_predict"]
    
    errorCase1 = rez1-v1
    errorCase2 = rez2-v2

    follow_df = pd.DataFrame()
    follow_df.loc[0,"date"] = v0
    follow_df.loc[0,"country"] = country
    follow_df.loc[0,"total_cases_predict"] = rez1
    follow_df.loc[0,"total_cases_real"] = v1
    follow_df.loc[0,"total_deaths_predict"] = rez2
    follow_df.loc[0,"total_deaths_real"] = v2
    follow_df.loc[0,"error_abs_cases"] = errorCase1
    follow_df.loc[0,"error_abs_deaths"] = errorCase2
    
    return follow_df

In [5]:
## call eval function and store dataframe on var's
follow_df1 = Eval(c,data_load,"France")
follow_df2 = Eval(c,data_load,"China")
follow_df3 = Eval(c,data_load,"Italy")
follow_df4 = Eval(c,data_load,"Spain")
follow_df5 = Eval(c,data_load,"United States")
follow_df6 = Eval(c,data_load,"World")
follow_df7 = Eval(c,data_load,"United Kingdom")
follow_df8 = Eval(c,data_load,"Germany")
follow_df9 = Eval(c,data_load,"Iran")
follow_df10 = Eval(c,data_load,"Turkey")
follow_df11 = Eval(c,data_load,"Brazil")

In [6]:
## concatenate all df in 1
frames = [follow_df1, follow_df2, follow_df3, follow_df4,
          follow_df5, follow_df6, follow_df7, follow_df8,
          follow_df9, follow_df10, follow_df11]
rapport = pd.concat(frames)
rapport

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-06-13,France,156090.0,156287.0,29375.0,29374.0,-197.0,1.0
0,2020-06-13,China,84275.0,84228.0,4651.0,4638.0,47.0,13.0
0,2020-06-13,Italy,236480.0,236305.0,34258.0,34223.0,175.0,35.0
0,2020-06-12,Spain,243075.0,243209.0,27147.0,27136.0,-134.0,11.0
0,2020-06-13,United States,2046527.0,2048986.0,114863.0,114669.0,-2459.0,194.0
0,2020-06-13,World,7631031.0,7625883.0,426764.0,425931.0,5148.0,833.0
0,2020-06-13,United Kingdom,292833.0,292950.0,41563.0,41481.0,-117.0,82.0
0,2020-06-13,Germany,186282.0,186022.0,8798.0,8781.0,260.0,17.0
0,2020-06-13,Iran,182518.0,182545.0,8667.0,8659.0,-27.0,8.0
0,2020-06-13,Turkey,174909.0,175218.0,4779.0,4778.0,-309.0,1.0


In [7]:
## dump dataset for use it in nb 03, in 03 nb he will be add to full_rapport then delete 
rapport.to_csv(f"AzureSC\\Rapport\\rap{date.today()}.csv")