In [1]:
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import model, preprocessing, split_forecasts_by_week, plot_coverage, print_forecasts_by_week, forecast_by_week, train_test_split, update_deepAR_parameters
from PythonFiles.Configuration import Configuration
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.rolling_dataset import generate_rolling_dataset,StepStrategy
config = Configuration()
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

In [2]:
config.train_start_time = datetime(1999,1,1,0,0,0)#datetime(2010,1,1,0,0,0)
config.train_end_time = datetime(2016,9,30,23,0,0)
config.test_end_time = datetime(2018,9,30,23,0,0)
overall_evaluation_df = pd.DataFrame()

data_splits_dict = {}
output_dict = {}

locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(config, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0]

# Creating the different splits

In [3]:
# Data split with no additional features and training start in 2010
#data_splits_dict["without_features_2001"] = list(train_test_split(config, df, False))
data_splits_dict["2001"] = list(train_test_split(config, df, True))

# Change the beginning of the training period
config.train_start_time = datetime(2010,1,1,0,0,0)
#data_splits_dict["without_features_2010"] = list(train_test_split(config, df, False))
data_splits_dict["2010"] = list(train_test_split(config, df, True))

# Model fitting

In [None]:
deepAR_with_features = config.deeparestimator
new_parameters = {"use_feat_dynamic_real" : False,
                  "use_feat_static_real" : False}
deepAR_without_features = update_deepAR_parameters(config, new_parameters)


model_dict = {"DeepAR_without_features":deepAR_without_features, "FFNN":config.feedforwardestimator, "DeepAR_with_features":deepAR_with_features}
for data_split in data_splits_dict.keys():
    print(data_split)
    forecasts_dict, evaluator_df_dict = forecast_by_week(config, data_splits_dict[data_split][0], data_splits_dict[data_split][1], locations, model_dict)
    output_dict[data_split] = [forecasts_dict, evaluator_df_dict]
    #plot_coverage(config, evaluator_df_dict)

2001


100%|██████████| 50/50 [00:04<00:00, 10.85it/s, epoch=1/8, avg_epoch_loss=1.05]
100%|██████████| 50/50 [00:03<00:00, 12.80it/s, epoch=2/8, avg_epoch_loss=1.99]
100%|██████████| 50/50 [00:04<00:00, 11.58it/s, epoch=3/8, avg_epoch_loss=0.982]
100%|██████████| 50/50 [00:03<00:00, 12.88it/s, epoch=4/8, avg_epoch_loss=0.726]
100%|██████████| 50/50 [00:04<00:00, 11.20it/s, epoch=5/8, avg_epoch_loss=2.25]
100%|██████████| 50/50 [00:04<00:00, 11.22it/s, epoch=6/8, avg_epoch_loss=1.04]
100%|██████████| 50/50 [00:04<00:00, 11.49it/s, epoch=7/8, avg_epoch_loss=0.582]
100%|██████████| 50/50 [00:04<00:00, 12.30it/s, epoch=8/8, avg_epoch_loss=1.82]


# Comparative Evaluation

In [None]:
final_evaluation_df = pd.DataFrame()
for data_split in output_dict.keys():
    forecasts_dict = output_dict[data_split][0]
    evaluator_df_dict = output_dict[data_split][1]
    #print(data_split)
    #plot_coverage(config, evaluator_df_dict)
    for key in evaluator_df_dict.keys():
        evaluation_df = evaluator_df_dict[key].copy()
        evaluation_df = evaluation_df.loc[evaluation_df.item_id.isin([item_id for item_id in evaluation_df.item_id if "aggregate" in item_id]),]        
        evaluation_df['ID'] = str(data_split) + str(key)
        final_evaluation_df = pd.concat([final_evaluation_df, evaluation_df])
for i in range(1,5):
    print(f"Week-{i}-Ahead")
    print(final_evaluation_df.loc[final_evaluation_df.item_id == "aggregated {"f"{i}"+"}" ,["ID", "mean_WIS", "MAE_Coverage"]].reset_index(drop=True))

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(16, 9), sharey=True)
for i in range(1,5):
    if i == 1:
        plotnumber = (0, 0)
    if i == 2:
        plotnumber = (0, 1)
    if i == 3:
        plotnumber = (1, 0)
    if i == 4:
        plotnumber = (1, 1)
    axs[plotnumber].bar(final_evaluation_df.loc[final_evaluation_df.item_id == "aggregated {"f"{i}"+"}","ID"], final_evaluation_df.loc[final_evaluation_df.item_id == "aggregated {"f"{i}"+"}","mean_WIS"])
    axs[plotnumber].set_title(f"mean_WIS Scores Week-{i}-ahead")
fig.autofmt_xdate(rotation=60, ha='right')
plt.show()