In [1]:
%autosave 0
import requests
import io
import pandas as pd
from datetime import date, timedelta

## scrap data from source
url="https://covid.ourworldindata.org/data/owid-covid-data.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

## this dataset sometime no receive update, so i check the simple link 
verif_data = c[c['date'].isin([f"{date.today()}"])]

if len(verif_data) == 0:
    print("Not today, check other link")
    url="https://covid.ourworldindata.org/data/ecdc/full_data.csv"
    s=requests.get(url).content
    c=pd.read_csv(io.StringIO(s.decode('utf-8')))
    
    verif_data = c[c['date'].isin([f"{date.today()}"])]
    
    ## If with the second link data have not yet be updated, raise Error & try again later
    if len(verif_data) == 0:
        verif_data = c[c['date'].isin([f"{date.today()}"])]
        raise ValueError('Data have not receive Update at this moment, Try later !')
    
verif_data.head(3)

Autosave disabled


Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand
156,AFG,Asia,Afghanistan,2020-06-14,24102.0,556.0,451.0,5.0,619.138,14.283,...,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5
254,ALB,Europe,Albania,2020-06-14,1464.0,48.0,36.0,0.0,508.722,16.679,...,13.188,8.643,11803.431,1.1,304.195,10.08,7.1,51.2,,2.89
416,DZA,Africa,Algeria,2020-06-14,10810.0,112.0,760.0,9.0,246.516,2.554,...,6.211,3.857,13913.839,0.5,278.364,6.73,0.7,30.4,83.741,1.9


In [2]:
## dump the file for using it in other nb
c.to_csv(f"AzureSC\\Base_Files\\full_data{date.today()}.csv", index=False)

In [3]:
## Load data from days -1 to evaluate the performances of model
data_load = pd.read_csv(f"AzureSC\\Pred\\predDf{date.today() - timedelta(days=1)}.csv", index_col=0)
data_load

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-06-13,France,156766.0,0.0,29406.0,0.0,0.0,0.0
0,2020-06-13,China,84287.0,0.0,4651.0,0.0,0.0,0.0
0,2020-06-13,Italy,236807.0,0.0,34297.0,0.0,0.0,0.0
0,2020-06-13,Spain,243680.0,0.0,27147.0,0.0,0.0,0.0
0,2020-06-13,United States,2074014.0,0.0,115693.0,0.0,0.0,0.0
0,2020-06-13,World,7777704.0,0.0,431137.0,0.0,0.0,0.0
0,2020-06-13,United Kingdom,294599.0,0.0,41686.0,0.0,0.0,0.0
0,2020-06-13,Germany,186369.0,0.0,8801.0,0.0,0.0,0.0
0,2020-06-13,Iran,185054.0,0.0,8740.0,0.0,0.0,0.0
0,2020-06-13,Turkey,176159.0,0.0,4794.0,0.0,0.0,0.0


In [4]:
def Eval(c,data_load,country):
    """ Factorise function for evaluate all models
    params : 
        c = data scrap today
        data_load = dataset from day -1
        country = country concerned by eval
    """
    df = c
    df = df[df['location'].isin([f"{country}"])]
    sle = df.iloc[-1]
    v0 = sle["date"]
    v1 = sle["total_cases"]
    v2 = sle["total_deaths"]
    
    res_tempo = data_load[data_load['country'].isin([f"{country}"])]
    rez1 = res_tempo.loc[0,"total_cases_predict"]
    rez2 = res_tempo.loc[0,"total_deaths_predict"]
    
    errorCase1 = rez1-v1
    errorCase2 = rez2-v2

    follow_df = pd.DataFrame()
    follow_df.loc[0,"date"] = v0
    follow_df.loc[0,"country"] = country
    follow_df.loc[0,"total_cases_predict"] = rez1
    follow_df.loc[0,"total_cases_real"] = v1
    follow_df.loc[0,"total_deaths_predict"] = rez2
    follow_df.loc[0,"total_deaths_real"] = v2
    follow_df.loc[0,"error_abs_cases"] = errorCase1
    follow_df.loc[0,"error_abs_deaths"] = errorCase2
    
    return follow_df

In [5]:
## call eval function and store dataframe on var's
follow_df1 = Eval(c,data_load,"France")
follow_df2 = Eval(c,data_load,"China")
follow_df3 = Eval(c,data_load,"Italy")
follow_df4 = Eval(c,data_load,"Spain")
follow_df5 = Eval(c,data_load,"United States")
follow_df6 = Eval(c,data_load,"World")
follow_df7 = Eval(c,data_load,"United Kingdom")
follow_df8 = Eval(c,data_load,"Germany")
follow_df9 = Eval(c,data_load,"Iran")
follow_df10 = Eval(c,data_load,"Turkey")
follow_df11 = Eval(c,data_load,"Brazil")

In [6]:
## concatenate all df in 1
frames = [follow_df1, follow_df2, follow_df3, follow_df4,
          follow_df5, follow_df6, follow_df7, follow_df8,
          follow_df9, follow_df10, follow_df11]
rapport = pd.concat(frames)
rapport

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-06-14,France,156766.0,156813.0,29406.0,29398.0,-47.0,8.0
0,2020-06-14,China,84287.0,84288.0,4651.0,4638.0,-1.0,13.0
0,2020-06-14,Italy,236807.0,236651.0,34297.0,34301.0,156.0,-4.0
0,2020-06-13,Spain,243680.0,243605.0,27147.0,27136.0,75.0,11.0
0,2020-06-14,United States,2074014.0,2074526.0,115693.0,115436.0,-512.0,257.0
0,2020-06-14,World,7777704.0,7759691.0,431137.0,430127.0,18013.0,1010.0
0,2020-06-14,United Kingdom,294599.0,294375.0,41686.0,41662.0,224.0,24.0
0,2020-06-14,Germany,186369.0,186269.0,8801.0,8787.0,100.0,14.0
0,2020-06-14,Iran,185054.0,184955.0,8740.0,8730.0,99.0,10.0
0,2020-06-14,Turkey,176159.0,176677.0,4794.0,4792.0,-518.0,2.0


In [7]:
## dump dataset for use it in nb 03, in 03 nb he will be add to full_rapport then delete 
rapport.to_csv(f"AzureSC\\Rapport\\rap{date.today()}.csv")