In [1]:
import sys
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from functools import reduce

from darts import TimeSeries
from darts.models import (
    NaiveSeasonal,
    NaiveDrift,
    Prophet,
    ExponentialSmoothing,
    ARIMA,
    AutoARIMA,
    RegressionEnsembleModel,
    RegressionModel,
    Theta,
    FFT,
    RNNModel
)

from darts.metrics import mape, mase, mae, rmse
from darts.utils.statistics import check_seasonality, plot_acf, plot_residuals_analysis
from darts.datasets import AirPassengersDataset

import warnings
warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)
import pickle

# Daily cases

## Delta 1

In [48]:
name = 'Daily cases'

In [49]:
delta = 1

In [50]:
df = pd.read_csv("Covid19-italy.csv", parse_dates=["Date"], index_col="Date")

In [51]:
df

Unnamed: 0_level_0,Daily cases,Total positives,Total positives variation,Total intensive care,Total hospitalized,Total deaths,Tests,Daily intensive care,Daily hospitalized,Daily deaths,Daily tests
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-02-25,93,311,90,35,150,10,8623,9.0,23.0,3.0,4299.0
2020-02-26,78,385,74,36,164,12,9587,1.0,14.0,2.0,964.0
2020-02-27,250,588,203,56,304,17,12014,20.0,140.0,5.0,2427.0
2020-02-28,238,821,233,64,409,21,15695,8.0,105.0,4.0,3681.0
2020-02-29,240,1049,228,105,506,29,18661,41.0,97.0,8.0,2966.0
...,...,...,...,...,...,...,...,...,...,...,...
2021-05-21,5218,291788,-7698,1469,11394,125028,64092492,-75.0,-533.0,218.0,269744.0
2021-05-22,4717,283744,-8044,1430,10918,125153,64379095,-39.0,-476.0,125.0,286603.0
2021-05-23,3995,281092,-2652,1410,10571,125225,64558486,-20.0,-347.0,72.0,179391.0
2021-05-24,2490,276439,-4653,1382,10332,125335,64665967,-28.0,-239.0,110.0,107481.0


In [52]:
df = df.loc[:pd.Timestamp("20210225"), :]

In [53]:
df

Unnamed: 0_level_0,Daily cases,Total positives,Total positives variation,Total intensive care,Total hospitalized,Total deaths,Tests,Daily intensive care,Daily hospitalized,Daily deaths,Daily tests
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-02-25,93,311,90,35,150,10,8623,9.0,23.0,3.0,4299.0
2020-02-26,78,385,74,36,164,12,9587,1.0,14.0,2.0,964.0
2020-02-27,250,588,203,56,304,17,12014,20.0,140.0,5.0,2427.0
2020-02-28,238,821,233,64,409,21,15695,8.0,105.0,4.0,3681.0
2020-02-29,240,1049,228,105,506,29,18661,41.0,97.0,8.0,2966.0
...,...,...,...,...,...,...,...,...,...,...,...
2021-02-21,13452,388895,4272,2094,19898,95718,38058939,31.0,110.0,232.0,250986.0
2021-02-22,9630,387903,-992,2118,20273,95992,38229611,24.0,375.0,274.0,170672.0
2021-02-23,13314,387948,45,2146,20441,96348,38533461,28.0,168.0,356.0,303850.0
2021-02-24,16424,389433,1485,2157,20374,96666,38873708,11.0,-67.0,318.0,340247.0


In [54]:
series = TimeSeries.from_series(df[name])

## Darts

In [10]:
models = [ExponentialSmoothing(), AutoARIMA(), Prophet()]

In [None]:
import functools

backtests = []

for model in models:
    print(f"{model}: running...")
    initial_time = time.time()
    hist_pred = model.historical_forecasts(series,
                            start=pd.Timestamp('2020-08-19'),
                            forecast_horizon=delta, stride=delta, verbose=True, last_points_only=False)
    hist_pred = functools.reduce(lambda a, b: a.append(b), hist_pred)
    backtests.append(hist_pred)
    
    final_time = time.time() - initial_time
    print(f"{model}: final time spent: {round(final_time, 3)}")

In [None]:
from darts.dataprocessing.transformers import Scaler

print(f"LSTM: running...")
initial_time = time.time()
transformer = Scaler()
transformed_series = transformer.fit_transform(series)
lstm = RNNModel(model='LSTM', input_chunk_length=round(len(series)/4), output_chunk_length=1)
models.append(lstm)

hist_pred =  lstm.historical_forecasts(transformed_series,
                            start=pd.Timestamp('2020-08-19'),
                            forecast_horizon=delta, stride=delta, verbose=True, last_points_only=False)
hist_pred = functools.reduce(lambda a, b: a.append(b), hist_pred)
backtests.append(transformer.inverse_transform(hist_pred))
final_time = time.time() - initial_time
print(f"LSTM: final time spent: {round(final_time, 3)}")

In [None]:
darts_maes = {}
darts_rmses = {}

for i, m in enumerate(models):
    prediction = backtests[i]
    #     print(prediction)
    err_mae = mae(backtests[i], series)
    err_rmse = rmse(backtests[i], series)
    darts_maes[m] = err_mae
    darts_rmses[m] = err_rmse


## Timex

In [None]:
with open(f"univariate/delta_{delta}/historical_predictions.pkl", 'rb') as file:
    p = pickle.load(file)

In [None]:
timex_maes = {}
timex_rmses = {}

for i, m in enumerate(p):
    pred_timex = p[m]
    pred_timex = pred_timex[name].astype('float')
    pred_timex = TimeSeries.from_series(pred_timex)
    pred_timex = pred_timex.slice_intersect(backtests[0])
    #     pred_timex = pred_timex.drop_after(backtests[0].time_index()[-1] + pd.Timedelta(days=1))
    assert len(pred_timex) == len(backtests[i])

    err_mae = mae(pred_timex, series)
    err_rmse = rmse(pred_timex, series)

    timex_maes[m] = err_mae
    timex_rmses[m] = err_rmse

In [None]:
print("########## FINAL RESULTS ##########")
print(f"Case: Covid-19, case: {name}, delta: {delta}")
print("MAES")
print("Darts results:")
for i, m in enumerate(darts_maes):
    print(f"{m}, MAE={round(darts_maes[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_maes):
    print(f"{m}, MAE={round(timex_maes[m], 3)}")

print("------------------------")
print("RMSES")
print("Darts results:")

for i, m in enumerate(darts_rmses):
    print(f"{m}, RMSE={round(darts_rmses[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_rmses):
    print(f"{m}, RMSE={round(timex_rmses[m], 3)}")

In [None]:
with open(f"multivariate/delta_{delta}/historical_predictions.pkl", 'rb') as file:
    p = pickle.load(file)

In [None]:
timex_maes = {}
timex_rmses = {}

for i, m in enumerate(p):
    pred_timex = p[m]
    pred_timex = pred_timex[name].astype('float')
    pred_timex = TimeSeries.from_series(pred_timex)
    pred_timex = pred_timex.slice_intersect(backtests[0])
    #     pred_timex = pred_timex.drop_after(backtests[0].time_index()[-1] + pd.Timedelta(days=1))
    assert len(pred_timex) == len(backtests[i])

    err_mae = mae(pred_timex, series)
    err_rmse = rmse(pred_timex, series)

    timex_maes[m] = err_mae
    timex_rmses[m] = err_rmse

In [None]:
print("########## FINAL RESULTS ##########")
print(f"Case: Covid-19 - Multivariate, case: {name}, delta: {delta}")
print("MAES")
print("Darts results:")
for i, m in enumerate(darts_maes):
    print(f"{m}, MAE={round(darts_maes[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_maes):
    print(f"{m}, MAE={round(timex_maes[m], 3)}")

print("------------------------")
print("RMSES")
print("Darts results:")

for i, m in enumerate(darts_rmses):
    print(f"{m}, RMSE={round(darts_rmses[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_rmses):
    print(f"{m}, RMSE={round(timex_rmses[m], 3)}")

## Delta 7

In [2]:
name = 'Daily cases'

In [3]:
delta = 7

In [4]:
df = pd.read_csv("Covid19-italy.csv", parse_dates=["Date"], index_col="Date")

In [5]:
df

Unnamed: 0_level_0,Daily cases,Total positives,Total positives variation,Total intensive care,Total hospitalized,Total deaths,Tests,Daily intensive care,Daily hospitalized,Daily deaths,Daily tests
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-02-25,93,311,90,35,150,10,8623,9.0,23.0,3.0,4299.0
2020-02-26,78,385,74,36,164,12,9587,1.0,14.0,2.0,964.0
2020-02-27,250,588,203,56,304,17,12014,20.0,140.0,5.0,2427.0
2020-02-28,238,821,233,64,409,21,15695,8.0,105.0,4.0,3681.0
2020-02-29,240,1049,228,105,506,29,18661,41.0,97.0,8.0,2966.0
...,...,...,...,...,...,...,...,...,...,...,...
2021-05-21,5218,291788,-7698,1469,11394,125028,64092492,-75.0,-533.0,218.0,269744.0
2021-05-22,4717,283744,-8044,1430,10918,125153,64379095,-39.0,-476.0,125.0,286603.0
2021-05-23,3995,281092,-2652,1410,10571,125225,64558486,-20.0,-347.0,72.0,179391.0
2021-05-24,2490,276439,-4653,1382,10332,125335,64665967,-28.0,-239.0,110.0,107481.0


In [6]:
df = df.loc[:pd.Timestamp("20210225"), :]

In [7]:
df

Unnamed: 0_level_0,Daily cases,Total positives,Total positives variation,Total intensive care,Total hospitalized,Total deaths,Tests,Daily intensive care,Daily hospitalized,Daily deaths,Daily tests
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-02-25,93,311,90,35,150,10,8623,9.0,23.0,3.0,4299.0
2020-02-26,78,385,74,36,164,12,9587,1.0,14.0,2.0,964.0
2020-02-27,250,588,203,56,304,17,12014,20.0,140.0,5.0,2427.0
2020-02-28,238,821,233,64,409,21,15695,8.0,105.0,4.0,3681.0
2020-02-29,240,1049,228,105,506,29,18661,41.0,97.0,8.0,2966.0
...,...,...,...,...,...,...,...,...,...,...,...
2021-02-21,13452,388895,4272,2094,19898,95718,38058939,31.0,110.0,232.0,250986.0
2021-02-22,9630,387903,-992,2118,20273,95992,38229611,24.0,375.0,274.0,170672.0
2021-02-23,13314,387948,45,2146,20441,96348,38533461,28.0,168.0,356.0,303850.0
2021-02-24,16424,389433,1485,2157,20374,96666,38873708,11.0,-67.0,318.0,340247.0


In [8]:
series = TimeSeries.from_series(df[name])

## Darts

In [11]:
import functools

backtests = []

for model in models:
    print(f"{model}: running...")
    initial_time = time.time()
    hist_pred = model.historical_forecasts(series,
                            start=pd.Timestamp('2020-08-19'),
                            forecast_horizon=delta, stride=delta, verbose=True, last_points_only=False)
    hist_pred = functools.reduce(lambda a, b: a.append(b), hist_pred)
    backtests.append(hist_pred)
    
    final_time = time.time() - initial_time
    print(f"{model}: final time spent: {round(final_time, 3)}")

Exponential smoothing: running...


  0%|          | 0/27 [00:00<?, ?it/s]

Exponential smoothing: final time spent: 3.611
Auto-ARIMA: running...


  0%|          | 0/27 [00:00<?, ?it/s]

Auto-ARIMA: final time spent: 136.55
Prophet: running...


  0%|          | 0/27 [00:00<?, ?it/s]

Prophet: final time spent: 41.826


In [12]:
from darts.dataprocessing.transformers import Scaler

print(f"LSTM: running...")
initial_time = time.time()
transformer = Scaler()
transformed_series = transformer.fit_transform(series)
lstm = RNNModel(model='LSTM', input_chunk_length=round(len(series)/4), output_chunk_length=1)
models.append(lstm)

hist_pred =  lstm.historical_forecasts(transformed_series,
                            start=pd.Timestamp('2020-08-19'),
                            forecast_horizon=delta, stride=delta, verbose=True, last_points_only=False)
hist_pred = functools.reduce(lambda a, b: a.append(b), hist_pred)
backtests.append(transformer.inverse_transform(hist_pred))
final_time = time.time() - initial_time
print(f"LSTM: final time spent: {round(final_time, 3)}")

LSTM: running...


  0%|          | 0/27 [00:00<?, ?it/s]

LSTM: final time spent: 86.452


In [13]:
darts_maes = {}
darts_rmses = {}

for i, m in enumerate(models):
    prediction = backtests[i]
    #     print(prediction)
    err_mae = mae(backtests[i], series)
    err_rmse = rmse(backtests[i], series)
    darts_maes[m] = err_mae
    darts_rmses[m] = err_rmse

#     print(f"{m}: MAE = {round(err_mae, 3)}, RMSE = {round(err_rmse, 3)}")


## Timex

In [14]:
with open(f"univariate/delta_{delta}/historical_predictions.pkl", 'rb') as file:
    p = pickle.load(file)

In [15]:
timex_maes = {}
timex_rmses = {}

for i, m in enumerate(p):
    pred_timex = p[m]
    pred_timex = pred_timex[name].astype('float')
    pred_timex = TimeSeries.from_series(pred_timex)
    pred_timex = pred_timex.slice_intersect(backtests[0])
    #     pred_timex = pred_timex.drop_after(backtests[0].time_index()[-1] + pd.Timedelta(days=1))
    assert len(pred_timex) == len(backtests[i])

    err_mae = mae(pred_timex, series)
    err_rmse = rmse(pred_timex, series)

    timex_maes[m] = err_mae
    timex_rmses[m] = err_rmse

In [16]:
print("########## FINAL RESULTS ##########")
print(f"Case: Covid-19, case: {name}, delta: {delta}")
print("MAES")
print("Darts results:")
for i, m in enumerate(darts_maes):
    print(f"{m}, MAE={round(darts_maes[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_maes):
    print(f"{m}, MAE={round(timex_maes[m], 3)}")

print("------------------------")
print("RMSES")
print("Darts results:")

for i, m in enumerate(darts_rmses):
    print(f"{m}, RMSE={round(darts_rmses[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_rmses):
    print(f"{m}, RMSE={round(timex_rmses[m], 3)}")

########## FINAL RESULTS ##########
Case: Covid-19, case: Daily cases, delta: 7
MAES
Darts results:
Exponential smoothing, MAE=2823.24
Auto-ARIMA, MAE=2224.84
Prophet, MAE=7144.699
<darts.models.rnn_model.RNNModel object at 0x7f40dd0e31f0>, MAE=2886.039
Timex results:
exponentialsmoothing, MAE=2321.648
fbprophet, MAE=2501.13
lstm, MAE=2898.893
arima, MAE=2179.737
------------------------
RMSES
Darts results:
Exponential smoothing, RMSE=3757.231
Auto-ARIMA, RMSE=3224.379
Prophet, RMSE=9647.744
<darts.models.rnn_model.RNNModel object at 0x7f40dd0e31f0>, RMSE=4258.9
Timex results:
exponentialsmoothing, RMSE=3528.258
fbprophet, RMSE=4530.969
lstm, RMSE=4020.379
arima, RMSE=3255.058


In [17]:
with open(f"multivariate/delta_{delta}/historical_predictions.pkl", 'rb') as file:
    p = pickle.load(file)

In [18]:
timex_maes = {}
timex_rmses = {}

for i, m in enumerate(p):
    pred_timex = p[m]
    pred_timex = pred_timex[name].astype('float')
    pred_timex = TimeSeries.from_series(pred_timex)
    pred_timex = pred_timex.slice_intersect(backtests[0])
    #     pred_timex = pred_timex.drop_after(backtests[0].time_index()[-1] + pd.Timedelta(days=1))
    assert len(pred_timex) == len(backtests[i])

    err_mae = mae(pred_timex, series)
    err_rmse = rmse(pred_timex, series)

    timex_maes[m] = err_mae
    timex_rmses[m] = err_rmse

In [19]:
print("########## FINAL RESULTS ##########")
print(f"Case: Covid-19 - Multivariate, case: {name}, delta: {delta}")
print("MAES")
print("Darts results:")
for i, m in enumerate(darts_maes):
    print(f"{m}, MAE={round(darts_maes[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_maes):
    print(f"{m}, MAE={round(timex_maes[m], 3)}")

print("------------------------")
print("RMSES")
print("Darts results:")

for i, m in enumerate(darts_rmses):
    print(f"{m}, RMSE={round(darts_rmses[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_rmses):
    print(f"{m}, RMSE={round(timex_rmses[m], 3)}")

########## FINAL RESULTS ##########
Case: Covid-19 - Multivariate, case: Daily cases, delta: 7
MAES
Darts results:
Exponential smoothing, MAE=2823.24
Auto-ARIMA, MAE=2224.84
Prophet, MAE=7144.699
<darts.models.rnn_model.RNNModel object at 0x7f40dd0e31f0>, MAE=2886.039
Timex results:
fbprophet, MAE=2752.839
------------------------
RMSES
Darts results:
Exponential smoothing, RMSE=3757.231
Auto-ARIMA, RMSE=3224.379
Prophet, RMSE=9647.744
<darts.models.rnn_model.RNNModel object at 0x7f40dd0e31f0>, RMSE=4258.9
Timex results:
fbprophet, RMSE=4957.999


# Daily deaths

## Delta 1

In [None]:
name = 'Daily deaths'

In [None]:
delta = 1

In [None]:
df = pd.read_csv("Covid19-italy.csv", parse_dates=["Date"], index_col="Date")

In [None]:
df

In [None]:
df = df.loc[:pd.Timestamp("20210225"), :]

In [None]:
df

In [None]:
series = TimeSeries.from_series(df[name])

## Darts

In [None]:
models = [ExponentialSmoothing(), AutoARIMA(), Prophet()]

In [None]:
import functools

backtests = []

for model in models:
    print(f"{model}: running...")
    initial_time = time.time()
    hist_pred = model.historical_forecasts(series,
                            start=pd.Timestamp('2020-08-19'),
                            forecast_horizon=delta, stride=delta, verbose=True, last_points_only=False)
    hist_pred = functools.reduce(lambda a, b: a.append(b), hist_pred)
    backtests.append(hist_pred)
    
    final_time = time.time() - initial_time
    print(f"{model}: final time spent: {round(final_time, 3)}")

In [None]:
from darts.dataprocessing.transformers import Scaler

print(f"LSTM: running...")
initial_time = time.time()
transformer = Scaler()
transformed_series = transformer.fit_transform(series)
lstm = RNNModel(model='LSTM', input_chunk_length=round(len(series)/4), output_chunk_length=1)
models.append(lstm)

hist_pred =  lstm.historical_forecasts(transformed_series,
                            start=pd.Timestamp('2020-08-19'),
                            forecast_horizon=delta, stride=delta, verbose=True, last_points_only=False)
hist_pred = functools.reduce(lambda a, b: a.append(b), hist_pred)
backtests.append(transformer.inverse_transform(hist_pred))
final_time = time.time() - initial_time
print(f"LSTM: final time spent: {round(final_time, 3)}")

In [None]:
darts_maes = {}
darts_rmses = {}

for i, m in enumerate(models):
    prediction = backtests[i]
    #     print(prediction)
    err_mae = mae(backtests[i], series)
    err_rmse = rmse(backtests[i], series)
    darts_maes[m] = err_mae
    darts_rmses[m] = err_rmse

    print(f"{m}: MAE = {round(err_mae, 3)}, RMSE = {round(err_rmse, 3)}")


## Timex

In [None]:
with open(f"univariate/delta_{delta}/historical_predictions.pkl", 'rb') as file:
    p = pickle.load(file)

In [None]:
timex_maes = {}
timex_rmses = {}

for i, m in enumerate(p):
    pred_timex = p[m]
    pred_timex = pred_timex[name].astype('float')
    pred_timex = TimeSeries.from_series(pred_timex)
    pred_timex = pred_timex.slice_intersect(backtests[0])
    #     pred_timex = pred_timex.drop_after(backtests[0].time_index()[-1] + pd.Timedelta(days=1))
    assert len(pred_timex) == len(backtests[i])

    err_mae = mae(pred_timex, series)
    err_rmse = rmse(pred_timex, series)

    timex_maes[m] = err_mae
    timex_rmses[m] = err_rmse

In [None]:
print("########## FINAL RESULTS ##########")
print(f"Case: Covid-19, case: {name}, delta: {delta}")
print("MAES")
print("Darts results:")
for i, m in enumerate(darts_maes):
    print(f"{m}, MAE={round(darts_maes[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_maes):
    print(f"{m}, MAE={round(timex_maes[m], 3)}")

print("------------------------")
print("RMSES")
print("Darts results:")

for i, m in enumerate(darts_rmses):
    print(f"{m}, RMSE={round(darts_rmses[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_rmses):
    print(f"{m}, RMSE={round(timex_rmses[m], 3)}")

In [None]:
with open(f"multivariate/delta_{delta}/historical_predictions.pkl", 'rb') as file:
    p = pickle.load(file)

In [None]:
timex_maes = {}
timex_rmses = {}

for i, m in enumerate(p):
    pred_timex = p[m]
    pred_timex = pred_timex[name].astype('float')
    pred_timex = TimeSeries.from_series(pred_timex)
    pred_timex = pred_timex.slice_intersect(backtests[0])
    #     pred_timex = pred_timex.drop_after(backtests[0].time_index()[-1] + pd.Timedelta(days=1))
    assert len(pred_timex) == len(backtests[i])

    err_mae = mae(pred_timex, series)
    err_rmse = rmse(pred_timex, series)

    timex_maes[m] = err_mae
    timex_rmses[m] = err_rmse

In [None]:
print("########## FINAL RESULTS ##########")
print(f"Case: Covid-19 - Multivariate, case: {name}, delta: {delta}")
print("MAES")
print("Darts results:")
for i, m in enumerate(darts_maes):
    print(f"{m}, MAE={round(darts_maes[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_maes):
    print(f"{m}, MAE={round(timex_maes[m], 3)}")

print("------------------------")
print("RMSES")
print("Darts results:")

for i, m in enumerate(darts_rmses):
    print(f"{m}, RMSE={round(darts_rmses[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_rmses):
    print(f"{m}, RMSE={round(timex_rmses[m], 3)}")

## Delta 7

In [20]:
name = 'Daily deaths'

In [21]:
delta = 7

In [22]:
df = pd.read_csv("Covid19-italy.csv", parse_dates=["Date"], index_col="Date")

In [23]:
df

Unnamed: 0_level_0,Daily cases,Total positives,Total positives variation,Total intensive care,Total hospitalized,Total deaths,Tests,Daily intensive care,Daily hospitalized,Daily deaths,Daily tests
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-02-25,93,311,90,35,150,10,8623,9.0,23.0,3.0,4299.0
2020-02-26,78,385,74,36,164,12,9587,1.0,14.0,2.0,964.0
2020-02-27,250,588,203,56,304,17,12014,20.0,140.0,5.0,2427.0
2020-02-28,238,821,233,64,409,21,15695,8.0,105.0,4.0,3681.0
2020-02-29,240,1049,228,105,506,29,18661,41.0,97.0,8.0,2966.0
...,...,...,...,...,...,...,...,...,...,...,...
2021-05-21,5218,291788,-7698,1469,11394,125028,64092492,-75.0,-533.0,218.0,269744.0
2021-05-22,4717,283744,-8044,1430,10918,125153,64379095,-39.0,-476.0,125.0,286603.0
2021-05-23,3995,281092,-2652,1410,10571,125225,64558486,-20.0,-347.0,72.0,179391.0
2021-05-24,2490,276439,-4653,1382,10332,125335,64665967,-28.0,-239.0,110.0,107481.0


In [24]:
df = df.loc[:pd.Timestamp("20210225"), :]

In [25]:
df

Unnamed: 0_level_0,Daily cases,Total positives,Total positives variation,Total intensive care,Total hospitalized,Total deaths,Tests,Daily intensive care,Daily hospitalized,Daily deaths,Daily tests
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-02-25,93,311,90,35,150,10,8623,9.0,23.0,3.0,4299.0
2020-02-26,78,385,74,36,164,12,9587,1.0,14.0,2.0,964.0
2020-02-27,250,588,203,56,304,17,12014,20.0,140.0,5.0,2427.0
2020-02-28,238,821,233,64,409,21,15695,8.0,105.0,4.0,3681.0
2020-02-29,240,1049,228,105,506,29,18661,41.0,97.0,8.0,2966.0
...,...,...,...,...,...,...,...,...,...,...,...
2021-02-21,13452,388895,4272,2094,19898,95718,38058939,31.0,110.0,232.0,250986.0
2021-02-22,9630,387903,-992,2118,20273,95992,38229611,24.0,375.0,274.0,170672.0
2021-02-23,13314,387948,45,2146,20441,96348,38533461,28.0,168.0,356.0,303850.0
2021-02-24,16424,389433,1485,2157,20374,96666,38873708,11.0,-67.0,318.0,340247.0


In [26]:
series = TimeSeries.from_series(df[name])

## Darts

In [27]:
models = [ExponentialSmoothing(), AutoARIMA(), Prophet()]

In [28]:
import functools

backtests = []

for model in models:
    print(f"{model}: running...")
    initial_time = time.time()
    hist_pred = model.historical_forecasts(series,
                            start=pd.Timestamp('2020-08-19'),
                            forecast_horizon=delta, stride=delta, verbose=True, last_points_only=False)
    hist_pred = functools.reduce(lambda a, b: a.append(b), hist_pred)
    backtests.append(hist_pred)
    
    final_time = time.time() - initial_time
    print(f"{model}: final time spent: {round(final_time, 3)}")

Exponential smoothing: running...


  0%|          | 0/27 [00:00<?, ?it/s]

Exponential smoothing: final time spent: 3.682
Auto-ARIMA: running...


  0%|          | 0/27 [00:00<?, ?it/s]

Auto-ARIMA: final time spent: 199.429
Prophet: running...


  0%|          | 0/27 [00:00<?, ?it/s]

Prophet: final time spent: 39.514


In [29]:
from darts.dataprocessing.transformers import Scaler

print(f"LSTM: running...")
initial_time = time.time()
transformer = Scaler()
transformed_series = transformer.fit_transform(series)
lstm = RNNModel(model='LSTM', input_chunk_length=round(len(series)/4), output_chunk_length=1)
models.append(lstm)

hist_pred =  lstm.historical_forecasts(transformed_series,
                            start=pd.Timestamp('2020-08-19'),
                            forecast_horizon=delta, stride=delta, verbose=True, last_points_only=False)
hist_pred = functools.reduce(lambda a, b: a.append(b), hist_pred)
backtests.append(transformer.inverse_transform(hist_pred))
final_time = time.time() - initial_time
print(f"LSTM: final time spent: {round(final_time, 3)}")

LSTM: running...


  0%|          | 0/27 [00:00<?, ?it/s]

LSTM: final time spent: 73.014


In [30]:
darts_maes = {}
darts_rmses = {}

for i, m in enumerate(models):
    prediction = backtests[i]
    #     print(prediction)
    err_mae = mae(backtests[i], series)
    err_rmse = rmse(backtests[i], series)
    darts_maes[m] = err_mae
    darts_rmses[m] = err_rmse

    print(f"{m}: MAE = {round(err_mae, 3)}, RMSE = {round(err_rmse, 3)}")


Exponential smoothing: MAE = 62.484, RMSE = 94.394
Auto-ARIMA: MAE = 51.062, RMSE = 82.213
Prophet: MAE = 208.744, RMSE = 238.023
<darts.models.rnn_model.RNNModel object at 0x7f404bfc8340>: MAE = 63.319, RMSE = 88.318


## Timex

In [31]:
with open(f"univariate/delta_{delta}/historical_predictions.pkl", 'rb') as file:
    p = pickle.load(file)

In [32]:
timex_maes = {}
timex_rmses = {}

for i, m in enumerate(p):
    pred_timex = p[m]
    pred_timex = pred_timex[name].astype('float')
    pred_timex = TimeSeries.from_series(pred_timex)
    pred_timex = pred_timex.slice_intersect(backtests[0])
    #     pred_timex = pred_timex.drop_after(backtests[0].time_index()[-1] + pd.Timedelta(days=1))
    assert len(pred_timex) == len(backtests[i])

    err_mae = mae(pred_timex, series)
    err_rmse = rmse(pred_timex, series)

    timex_maes[m] = err_mae
    timex_rmses[m] = err_rmse

In [33]:
print("########## FINAL RESULTS ##########")
print(f"Case: Covid-19, case: {name}, delta: {delta}")
print("MAES")
print("Darts results:")
for i, m in enumerate(darts_maes):
    print(f"{m}, MAE={round(darts_maes[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_maes):
    print(f"{m}, MAE={round(timex_maes[m], 3)}")

print("------------------------")
print("RMSES")
print("Darts results:")

for i, m in enumerate(darts_rmses):
    print(f"{m}, RMSE={round(darts_rmses[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_rmses):
    print(f"{m}, RMSE={round(timex_rmses[m], 3)}")

########## FINAL RESULTS ##########
Case: Covid-19, case: Daily deaths, delta: 7
MAES
Darts results:
Exponential smoothing, MAE=62.484
Auto-ARIMA, MAE=51.062
Prophet, MAE=208.744
<darts.models.rnn_model.RNNModel object at 0x7f404bfc8340>, MAE=63.319
Timex results:
exponentialsmoothing, MAE=42.487
fbprophet, MAE=59.522
lstm, MAE=68.49
arima, MAE=55.921
------------------------
RMSES
Darts results:
Exponential smoothing, RMSE=94.394
Auto-ARIMA, RMSE=82.213
Prophet, RMSE=238.023
<darts.models.rnn_model.RNNModel object at 0x7f404bfc8340>, RMSE=88.318
Timex results:
exponentialsmoothing, RMSE=64.44
fbprophet, RMSE=100.179
lstm, RMSE=100.057
arima, RMSE=94.462


In [34]:
with open(f"multivariate/delta_{delta}/historical_predictions.pkl", 'rb') as file:
    p = pickle.load(file)

In [35]:
timex_maes = {}
timex_rmses = {}

for i, m in enumerate(p):
    pred_timex = p[m]
    pred_timex = pred_timex[name].astype('float')
    pred_timex = TimeSeries.from_series(pred_timex)
    pred_timex = pred_timex.slice_intersect(backtests[0])
    #     pred_timex = pred_timex.drop_after(backtests[0].time_index()[-1] + pd.Timedelta(days=1))
    assert len(pred_timex) == len(backtests[i])

    err_mae = mae(pred_timex, series)
    err_rmse = rmse(pred_timex, series)

    timex_maes[m] = err_mae
    timex_rmses[m] = err_rmse

In [36]:
print("########## FINAL RESULTS ##########")
print(f"Case: Covid-19 - Multivariate, case: {name}, delta: {delta}")
print("MAES")
print("Darts results:")
for i, m in enumerate(darts_maes):
    print(f"{m}, MAE={round(darts_maes[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_maes):
    print(f"{m}, MAE={round(timex_maes[m], 3)}")

print("------------------------")
print("RMSES")
print("Darts results:")

for i, m in enumerate(darts_rmses):
    print(f"{m}, RMSE={round(darts_rmses[m], 3)}")

print("Timex results:")
for i, m in enumerate(timex_rmses):
    print(f"{m}, RMSE={round(timex_rmses[m], 3)}")

########## FINAL RESULTS ##########
Case: Covid-19 - Multivariate, case: Daily deaths, delta: 7
MAES
Darts results:
Exponential smoothing, MAE=62.484
Auto-ARIMA, MAE=51.062
Prophet, MAE=208.744
<darts.models.rnn_model.RNNModel object at 0x7f404bfc8340>, MAE=63.319
Timex results:
fbprophet, MAE=74.646
------------------------
RMSES
Darts results:
Exponential smoothing, RMSE=94.394
Auto-ARIMA, RMSE=82.213
Prophet, RMSE=238.023
<darts.models.rnn_model.RNNModel object at 0x7f404bfc8340>, RMSE=88.318
Timex results:
fbprophet, RMSE=170.897
