In [1]:
%autosave 0
import requests
import io
import pandas as pd
from datetime import date, timedelta

## scrap data from source
url="https://covid.ourworldindata.org/data/owid-covid-data.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

## this dataset sometime no receive update, so i check the simple link 
verif_data = c[c['date'].isin([f"{date.today()}"])]

if len(verif_data) == 0:
    print("Not today, check other link")
    url="https://covid.ourworldindata.org/data/ecdc/full_data.csv"
    s=requests.get(url).content
    c=pd.read_csv(io.StringIO(s.decode('utf-8')))
    
    verif_data = c[c['date'].isin([f"{date.today()}"])]
    
    ## If with the second link data have not yet be updated, raise Error & try again later
    if len(verif_data) == 0:
        raise ValueError('Data have not receive Update at this moment, Try later !')
    
verif_data.head(3)

Autosave disabled


Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
198,ABW,North America,Aruba,2020-10-02,3998.0,35.0,34.571,27.0,0.0,0.286,...,35973.781,,,11.62,,,,,76.29,
475,AFG,Asia,Afghanistan,2020-10-02,39285.0,17.0,14.143,1458.0,0.0,1.0,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498
670,AGO,Africa,Angola,2020-10-02,4972.0,0.0,87.0,183.0,1.0,3.429,...,5819.495,,276.045,3.94,,,26.664,,61.15,0.581


In [2]:
## dump the file for using it in other nb
c.to_csv(f"AzureSC\\Base_Files\\full_data{date.today()}.csv", index=False)

In [3]:
## Load data from days -1 to evaluate the performances of model
data_load = pd.read_csv(f"AzureSC\\Pred\\predDf{date.today() - timedelta(days=1)}.csv", index_col=0)
data_load

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-10-01,France,572274.0,0.0,32037.0,0.0,0.0,0.0
0,2020-10-01,China,90543.0,0.0,4744.0,0.0,0.0,0.0
0,2020-10-01,Italy,316291.0,0.0,35932.0,0.0,0.0,0.0
0,2020-10-01,Spain,779576.0,0.0,31964.0,0.0,0.0,0.0
0,2020-10-01,United States,7308470.0,0.0,208037.0,0.0,0.0,0.0
0,2020-10-01,World,34548771.0,0.0,1023121.0,0.0,0.0,0.0
0,2020-10-01,United Kingdom,458634.0,0.0,42225.0,0.0,0.0,0.0
0,2020-10-01,Germany,293271.0,0.0,9518.0,0.0,0.0,0.0
0,2020-10-01,Iran,460215.0,0.0,26386.0,0.0,0.0,0.0
0,2020-10-01,Turkey,319837.0,0.0,8251.0,0.0,0.0,0.0


In [4]:
def Eval(c,data_load,country):
    """ function for evaluate all models
    params : 
        c = data scrap today
        data_load = dataset from day -1
        country = country concerned by eval
    """
    df = c
    df = df[df['location'].isin([f"{country}"])]
    sle = df.iloc[-1]
    v0 = sle["date"]
    v1 = sle["total_cases"]
    v2 = sle["total_deaths"]
    
    res_tempo = data_load[data_load['country'].isin([f"{country}"])]
    rez1 = res_tempo.at[0,"total_cases_predict"]
    rez2 = res_tempo.at[0,"total_deaths_predict"]
    
    errorCase1 = rez1-v1
    errorCase2 = rez2-v2

    follow_df = pd.DataFrame()
    follow_df.at[0,"date"] = v0
    follow_df.at[0,"country"] = country
    follow_df.at[0,"total_cases_predict"] = rez1
    follow_df.at[0,"total_cases_real"] = v1
    follow_df.at[0,"total_deaths_predict"] = rez2
    follow_df.at[0,"total_deaths_real"] = v2
    follow_df.at[0,"error_abs_cases"] = errorCase1
    follow_df.at[0,"error_abs_deaths"] = errorCase2
    
    return follow_df

In [5]:
## call eval function and store dataframe on var's
follow_df1 = Eval(c,data_load,"France")
follow_df2 = Eval(c,data_load,"China")
follow_df3 = Eval(c,data_load,"Italy")
follow_df4 = Eval(c,data_load,"Spain")
follow_df5 = Eval(c,data_load,"United States")
follow_df6 = Eval(c,data_load,"World")
follow_df7 = Eval(c,data_load,"United Kingdom")
follow_df8 = Eval(c,data_load,"Germany")
follow_df9 = Eval(c,data_load,"Iran")
follow_df10 = Eval(c,data_load,"Turkey")
follow_df11 = Eval(c,data_load,"Brazil")

In [6]:
## concatenate all df in 1
frames = [follow_df1, follow_df2, follow_df3, follow_df4,
          follow_df5, follow_df6, follow_df7, follow_df8,
          follow_df9, follow_df10, follow_df11]
rapport = pd.concat(frames)
rapport

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-10-02,France,572274.0,577505.0,32037.0,32019.0,-5231.0,18.0
0,2020-10-02,China,90543.0,90567.0,4744.0,4739.0,-24.0,5.0
0,2020-10-02,Italy,316291.0,317409.0,35932.0,35918.0,-1118.0,14.0
0,2020-10-01,Spain,779576.0,778607.0,31964.0,31973.0,969.0,-9.0
0,2020-10-02,United States,7308470.0,7277814.0,208037.0,207808.0,30656.0,229.0
0,2020-10-02,World,34548771.0,34350717.0,1023121.0,1023876.0,198054.0,-755.0
0,2020-10-02,United Kingdom,458634.0,460178.0,42225.0,42202.0,-1544.0,23.0
0,2020-10-02,Germany,293271.0,294395.0,9518.0,9508.0,-1124.0,10.0
0,2020-10-02,Iran,460215.0,461044.0,26386.0,26380.0,-829.0,6.0
0,2020-10-02,Turkey,319837.0,320070.0,8251.0,8262.0,-233.0,-11.0


In [7]:
## dump dataset for use it in nb 03, in 03 nb he will be add to full_rapport then delete 
rapport.to_csv(f"AzureSC\\Rapport\\rap{date.today()}.csv")