In [1]:
%autosave 0
import requests
import io
import pandas as pd
from datetime import date, timedelta

## scrap data from source
url="https://covid.ourworldindata.org/data/owid-covid-data.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

## this dataset sometime no receive update, so i check the simple link 
verif_data = c[c['date'].isin([f"{date.today()}"])]

if len(verif_data) == 0:
    print("Not today, check other link")
    url="https://covid.ourworldindata.org/data/ecdc/full_data.csv"
    s=requests.get(url).content
    c=pd.read_csv(io.StringIO(s.decode('utf-8')))
    
    verif_data = c[c['date'].isin([f"{date.today()}"])]
    
    ## If with the second link data have not yet be updated, raise Error & try again later
    if len(verif_data) == 0:
        verif_data = c[c['date'].isin([f"{date.today()}"])]
        raise ValueError('Data have not receive Update at this moment, Try later !')
    
verif_data.head(3)

Autosave disabled


Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
142,ABW,North America,Aruba,2020-08-12,717.0,87.0,3.0,0.0,6715.621,814.866,...,7.452,35973.781,,,11.62,,,,,76.29
358,AFG,Asia,Afghanistan,2020-08-12,37269.0,215.0,1344.0,32.0,957.374,5.523,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
502,AGO,Africa,Angola,2020-08-12,1679.0,7.0,78.0,3.0,51.086,0.213,...,1.362,5819.495,,276.045,3.94,,,26.664,,61.15


In [2]:
## dump the file for using it in other nb
c.to_csv(f"AzureSC\\Base_Files\\full_data{date.today()}.csv", index=False)

In [3]:
## Load data from days -1 to evaluate the performances of model
data_load = pd.read_csv(f"AzureSC\\Pred\\predDf{date.today() - timedelta(days=1)}.csv", index_col=0)
data_load

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-08-11,France,202876.0,0.0,30364.0,0.0,0.0,0.0
0,2020-08-11,China,89003.0,0.0,4697.0,0.0,0.0,0.0
0,2020-08-11,Italy,251361.0,0.0,35233.0,0.0,0.0,0.0
0,2020-08-11,Spain,323100.0,0.0,28594.0,0.0,0.0,0.0
0,2020-08-11,United States,5182676.0,0.0,164259.0,0.0,0.0,0.0
0,2020-08-11,World,20570787.0,0.0,743828.0,0.0,0.0,0.0
0,2020-08-11,United Kingdom,312851.0,0.0,46595.0,0.0,0.0,0.0
0,2020-08-11,Germany,217772.0,0.0,9209.0,0.0,0.0,0.0
0,2020-08-11,Iran,332850.0,0.0,18839.0,0.0,0.0,0.0
0,2020-08-11,Turkey,243048.0,0.0,5872.0,0.0,0.0,0.0


In [4]:
def Eval(c,data_load,country):
    """ Factorise function for evaluate all models
    params : 
        c = data scrap today
        data_load = dataset from day -1
        country = country concerned by eval
    """
    df = c
    df = df[df['location'].isin([f"{country}"])]
    sle = df.iloc[-1]
    v0 = sle["date"]
    v1 = sle["total_cases"]
    v2 = sle["total_deaths"]
    
    res_tempo = data_load[data_load['country'].isin([f"{country}"])]
    rez1 = res_tempo.loc[0,"total_cases_predict"]
    rez2 = res_tempo.loc[0,"total_deaths_predict"]
    
    errorCase1 = rez1-v1
    errorCase2 = rez2-v2

    follow_df = pd.DataFrame()
    follow_df.loc[0,"date"] = v0
    follow_df.loc[0,"country"] = country
    follow_df.loc[0,"total_cases_predict"] = rez1
    follow_df.loc[0,"total_cases_real"] = v1
    follow_df.loc[0,"total_deaths_predict"] = rez2
    follow_df.loc[0,"total_deaths_real"] = v2
    follow_df.loc[0,"error_abs_cases"] = errorCase1
    follow_df.loc[0,"error_abs_deaths"] = errorCase2
    
    return follow_df

In [5]:
## call eval function and store dataframe on var's
follow_df1 = Eval(c,data_load,"France")
follow_df2 = Eval(c,data_load,"China")
follow_df3 = Eval(c,data_load,"Italy")
follow_df4 = Eval(c,data_load,"Spain")
follow_df5 = Eval(c,data_load,"United States")
follow_df6 = Eval(c,data_load,"World")
follow_df7 = Eval(c,data_load,"United Kingdom")
follow_df8 = Eval(c,data_load,"Germany")
follow_df9 = Eval(c,data_load,"Iran")
follow_df10 = Eval(c,data_load,"Turkey")
follow_df11 = Eval(c,data_load,"Brazil")

In [6]:
## concatenate all df in 1
frames = [follow_df1, follow_df2, follow_df3, follow_df4,
          follow_df5, follow_df6, follow_df7, follow_df8,
          follow_df9, follow_df10, follow_df11]
rapport = pd.concat(frames)
rapport

Unnamed: 0,date,country,total_cases_predict,total_cases_real,total_deaths_predict,total_deaths_real,error_abs_cases,error_abs_deaths
0,2020-08-12,France,202876.0,204172.0,30364.0,30354.0,-1296.0,10.0
0,2020-08-12,China,89003.0,88964.0,4697.0,4693.0,39.0,4.0
0,2020-08-12,Italy,251361.0,251237.0,35233.0,35215.0,124.0,18.0
0,2020-08-11,Spain,323100.0,326612.0,28594.0,28581.0,-3512.0,13.0
0,2020-08-12,United States,5182676.0,5141207.0,164259.0,164537.0,41469.0,-278.0
0,2020-08-12,World,20570787.0,20330351.0,743828.0,742413.0,240436.0,1415.0
0,2020-08-12,United Kingdom,312851.0,312789.0,46595.0,46526.0,62.0,69.0
0,2020-08-12,Germany,217772.0,218519.0,9209.0,9207.0,-747.0,2.0
0,2020-08-12,Iran,332850.0,331189.0,18839.0,18800.0,1661.0,39.0
0,2020-08-12,Turkey,243048.0,243180.0,5872.0,5873.0,-132.0,-1.0


In [7]:
## dump dataset for use it in nb 03, in 03 nb he will be add to full_rapport then delete 
rapport.to_csv(f"AzureSC\\Rapport\\rap{date.today()}.csv")