In [1]:
from matplotlib.dates import date2num, num2date
from matplotlib.colors import ListedColormap
from matplotlib import dates as mdates
from matplotlib.patches import Patch
from matplotlib import pyplot as plt
from matplotlib import ticker

import pandas as pd
import numpy as np
import sys
import os

sys.path.insert(0, "..")
from global_config import config

results_dir   = config.get_property('results_dir')
data_dir      = config.get_property('data_dir')

In [2]:
usa_df          = pd.read_csv(os.path.join(data_dir, 'processed_data_us.csv'))
dates_forecasts = usa_df.date[11:].values

path_to_frcst_1 = os.path.join(results_dir, "forecast", "arima")
path_to_frcst_2 = os.path.join(results_dir, "forecast", "eakf_model1")
path_to_frcst_3 = os.path.join(results_dir, "forecast", "eakf_model2")
path_to_frcst_4 = os.path.join(results_dir, "forecast", "eakf_model3")
path_to_frcst_5 = os.path.join(results_dir, "forecast", "sarima")



In [3]:
from utils.utils_eval import compute_evals
from tqdm import tqdm

usa_df["date"] = pd.to_datetime(usa_df["date"])

evals_df     = []
forecasts_df = []
for idx, date in tqdm(enumerate(dates_forecasts[:-6])):
    ts = pd.to_datetime(str(date))
    d = ts.strftime('%Y-%m-%d')

    frcst1_df = pd.read_csv(os.path.join(path_to_frcst_1, f"{d}.csv"), parse_dates=["date"]).iloc[:6]
    frcst2_df = pd.read_csv(os.path.join(path_to_frcst_2, f"{d}.csv"), parse_dates=["date"]).iloc[:6]
    frcst3_df = pd.read_csv(os.path.join(path_to_frcst_3, f"{d}.csv"), parse_dates=["date"]).iloc[:6]
    frcst4_df = pd.read_csv(os.path.join(path_to_frcst_4, f"{d}.csv"), parse_dates=["date"]).iloc[:6]
    frcst5_df = pd.read_csv(os.path.join(path_to_frcst_5, f"{d}.csv"), parse_dates=["date"]).iloc[:6]

    frcst1_df["forecast_date"] = d; frcst1_df["model"] = "arima"
    frcst2_df["forecast_date"] = d; frcst2_df["model"] = "model1"
    frcst3_df["forecast_date"] = d; frcst3_df["model"] = "model2"
    frcst4_df["forecast_date"] = d; frcst4_df["model"] = "model3"
    frcst5_df["forecast_date"] = d; frcst5_df["model"] = "sarima"

    forecasts_df.append(pd.concat([frcst1_df, frcst2_df, frcst3_df, frcst4_df, frcst5_df]))

    dates_use       = frcst2_df.date
    fit_data        = usa_df[usa_df.date.isin((list(dates_use)))].set_index("date")[["total"]]

    eval1_df               = compute_evals(frcst1_df, fit_data)
    eval1_df["frsct_date"] = date
    eval1_df["method"]     = "arima"
    evals_df.append(eval1_df.reset_index())

    eval2_df               = compute_evals(frcst2_df, fit_data)
    eval2_df["frsct_date"] = date
    eval2_df["method"]     = "eakf_model1"
    evals_df.append(eval2_df.reset_index())

    eval3_df               = compute_evals(frcst3_df, fit_data)
    eval3_df["frsct_date"] = date
    eval3_df["method"]     = "eakf_model2"
    evals_df.append(eval3_df.reset_index())

    eval4_df               = compute_evals(frcst4_df, fit_data)
    eval4_df["frsct_date"] = date
    eval4_df["method"]     = "eakf_model3"
    evals_df.append(eval4_df.reset_index())

    eval5_df               = compute_evals(frcst5_df, fit_data)
    eval5_df["frsct_date"] = date
    eval5_df["method"]     = "sarima"
    evals_df.append(eval5_df.reset_index())

forecasts_df = pd.concat(forecasts_df)
evals_df     = pd.concat(evals_df)

forecasts_df.to_csv(os.path.join(results_dir, "forecast", "evaluation", "forecasts.csv"), index=False)
evals_df.to_csv(os.path.join(results_dir, "forecast", "evaluation", "scores.csv"), index=False)


  iss = (upper-lowerr) + 2/alpha * (lowerr-obs) * (obs<lowerr) + 2/alpha * (obs-upper) * (obs>upper)
185it [00:37,  4.89it/s]


In [4]:
forecasts_df

Unnamed: 0.1,date,mean,median,low_5,up_5,low_10,up_10,low_15,up_15,low_20,...,low_95,up_95,low_97.5,up_97.5,forecast_date,add,model,Unnamed: 0,std,type
0,2007-01-31,52.000000,52.000000,51.224068,52.787686,50.456716,53.590487,49.694873,54.412052,48.935478,...,32.503049,83.192194,30.382226,88.999404,2006-12-31,0,arima,,,
1,2007-02-28,52.000000,52.000000,50.906067,53.117440,49.830961,54.263452,48.770251,55.443636,47.719632,...,26.754185,101.068301,24.319177,111.187974,2006-12-31,0,arima,,,
2,2007-03-31,52.000000,52.000000,50.663396,53.371866,49.356070,54.785562,48.072445,56.248439,46.807204,...,23.042401,117.348880,20.500801,131.897288,2006-12-31,0,arima,,,
3,2007-04-30,52.000000,52.000000,50.459714,53.587303,48.959235,55.229621,47.491930,56.935989,46.051558,...,20.316312,133.095023,17.751532,152.324883,2006-12-31,0,arima,,,
4,2007-05-31,52.000000,52.000000,50.280945,53.777828,48.612262,55.623826,46.986298,57.548693,45.395936,...,18.183116,148.709388,15.636567,172.927987,2006-12-31,0,arima,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,2022-06-30,4.592287,4.592287,4.442385,4.747248,4.296813,4.908081,4.154905,5.075712,4.016044,...,1.627576,12.957373,1.402342,15.038490,2022-04-30,0,sarima,,,
2,2022-07-31,4.452510,4.452510,4.306050,4.603951,4.163857,4.761173,4.025281,4.925084,3.889715,...,1.565254,12.665575,1.347071,14.717004,2022-04-30,0,sarima,,,
3,2022-08-31,3.682629,3.682629,3.560789,3.808639,3.442522,3.939484,3.327285,4.075923,3.214575,...,1.286623,10.540582,1.106296,12.258712,2022-04-30,0,sarima,,,
4,2022-09-30,4.017135,4.017135,3.883494,4.155374,3.753798,4.298946,3.627449,4.448684,3.503894,...,1.395238,11.566031,1.198672,13.462708,2022-04-30,0,sarima,,,


In [5]:
### eval models with birth and deaths

In [6]:
usa_df          = pd.read_csv(os.path.join(data_dir, 'processed_data_us.csv'))
dates_forecasts = usa_df.date[11:].values

path_to_frcst_1 = os.path.join(results_dir, "forecast", "arima")
path_to_frcst_2 = os.path.join(results_dir, "forecast", "BD_eakf_model1")
path_to_frcst_3 = os.path.join(results_dir, "forecast", "BD_eakf_model2")
path_to_frcst_4 = os.path.join(results_dir, "forecast", "BD_eakf_model3")
path_to_frcst_5 = os.path.join(results_dir, "forecast", "sarima")



In [9]:
from utils.utils_eval import compute_evals
from tqdm import tqdm

usa_df["date"] = pd.to_datetime(usa_df["date"])

evals_df     = []
forecasts_df = []
for idx, date in tqdm(enumerate(dates_forecasts[:-6])):
    ts = pd.to_datetime(str(date))
    d = ts.strftime('%Y-%m-%d')

    frcst1_df = pd.read_csv(os.path.join(path_to_frcst_1, f"{d}.csv"), parse_dates=["date"]).iloc[:6]
    frcst2_df = pd.read_csv(os.path.join(path_to_frcst_2, f"{d}.csv"), parse_dates=["date"]).iloc[:6]
    frcst3_df = pd.read_csv(os.path.join(path_to_frcst_3, f"{d}.csv"), parse_dates=["date"]).iloc[:6]
    frcst4_df = pd.read_csv(os.path.join(path_to_frcst_4, f"{d}.csv"), parse_dates=["date"]).iloc[:6]
    frcst5_df = pd.read_csv(os.path.join(path_to_frcst_5, f"{d}.csv"), parse_dates=["date"]).iloc[:6]

    frcst1_df["forecast_date"] = d; frcst1_df["model"] = "arima"
    frcst2_df["forecast_date"] = d; frcst2_df["model"] = "model1"
    frcst3_df["forecast_date"] = d; frcst3_df["model"] = "model2"
    frcst4_df["forecast_date"] = d; frcst4_df["model"] = "model3"
    frcst5_df["forecast_date"] = d; frcst5_df["model"] = "sarima"

    forecasts_df.append(pd.concat([frcst1_df, frcst2_df, frcst3_df, frcst4_df, frcst5_df]))

    dates_use       = frcst2_df.date
    fit_data        = usa_df[usa_df.date.isin((list(dates_use)))].set_index("date")[["total"]]

    eval1_df               = compute_evals(frcst1_df, fit_data)
    eval1_df["frsct_date"] = date
    eval1_df["method"]     = "arima"
    evals_df.append(eval1_df.reset_index())

    eval2_df               = compute_evals(frcst2_df, fit_data)
    eval2_df["frsct_date"] = date
    eval2_df["method"]     = "eakf_model1"
    evals_df.append(eval2_df.reset_index())

    eval3_df               = compute_evals(frcst3_df, fit_data)
    eval3_df["frsct_date"] = date
    eval3_df["method"]     = "eakf_model2"
    evals_df.append(eval3_df.reset_index())

    eval4_df               = compute_evals(frcst4_df, fit_data)
    eval4_df["frsct_date"] = date
    eval4_df["method"]     = "eakf_model3"
    evals_df.append(eval4_df.reset_index())

    eval5_df               = compute_evals(frcst5_df, fit_data)
    eval5_df["frsct_date"] = date
    eval5_df["method"]     = "sarima"
    evals_df.append(eval5_df.reset_index())

forecasts_df = pd.concat(forecasts_df)
evals_df     = pd.concat(evals_df)

forecasts_df.to_csv(os.path.join(results_dir, "forecast", "evaluation", "BD_forecasts.csv"), index=False)
evals_df.to_csv(os.path.join(results_dir, "forecast", "evaluation", "BD_scores.csv"), index=False)


  iss = (upper-lowerr) + 2/alpha * (lowerr-obs) * (obs<lowerr) + 2/alpha * (obs-upper) * (obs>upper)
185it [00:47,  3.89it/s]
