In [1]:
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import model, preprocessing, split_forecasts_by_week, plot_coverage, print_forecasts_by_week, forecast_by_week, train_test_split, update_deepAR_parameters
from PythonFiles.Configuration import Configuration
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.mx import Trainer
from gluonts.evaluation import make_evaluation_predictions
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.rolling_dataset import generate_rolling_dataset,StepStrategy
from gluonts.evaluation import make_evaluation_predictions, Evaluator
import ray
from ray.air import session
from ray import tune
from gluonts.mx.distribution import NegativeBinomialOutput
config = Configuration()
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

config.train_start_time = datetime(1999,1,1,0,0,0)#datetime(2010,1,1,0,0,0)
config.train_end_time = datetime(2016,9,30,23,0,0)
config.test_end_time = datetime(2018,9,30,23,0,0)
overall_evaluation_df = pd.DataFrame()

data_splits_dict = {}
output_dict = {}

locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(config, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0].astype(int)
    
# Data split with no additional features and training start in 2010
#data_splits_dict["without_features_2001"] = list(train_test_split(config, df, False))
data_splits_dict["with_features_2001"] = list(train_test_split(config, df, True))

# Change the beginning of the training period
#config.train_start_time = datetime(2010,1,1,0,0,0)
#data_splits_dict["without_features_2010"] = list(train_test_split(config, df, False))
#data_splits_dict["with_features_2010"] = list(train_test_split(config, df, True))



In [4]:
def evaluate(config, train, test, configuration):
    deeparestimator = update_deepAR_parameters(configuration, config)
    forecasts, tss = model(train, test, deeparestimator)
    # Evaluation with the quantiles of the configuration and calculation of the mean_WIS
    evaluator = Evaluator(quantiles=configuration.quantiles)    
    agg_metrics = evaluator(tss, forecasts)[0]
    mean_WIS = agg_metrics["mean_absolute_QuantileLoss"]/(configuration.parameters["prediction_length"]*411)
    return mean_WIS

def objective(config, train, test, configuration):
    score = evaluate(config, train, test, configuration)
    session.report({"mean_WIS":score})

In [None]:
hp_search_space = {
    "num_cells": tune.grid_search([40]),
    "num_layers": tune.grid_search([2]),
    "context_length":tune.grid_search([4]),
    "prediction_length":tune.grid_search([4]),
    "cell_type":tune.grid_search(["lstm"]),
    "epochs":tune.grid_search([100]),
    "use_feat_static_real":tune.grid_search([False]),
    "use_feat_dynamic_real":tune.grid_search([False]),
    "use_feat_static_cat":tune.grid_search([False]),
}


train = data_splits_dict["with_features_2001"][0]
test = data_splits_dict["with_features_2001"][1]
configuration = Configuration()

tuner = tune.Tuner(
    tune.with_parameters(objective, train=train, test=test, configuration=configuration),
    tune_config=tune.TuneConfig(
        num_samples = 100,
        metric="mean_WIS",
        mode="min",
        max_concurrent_trials=3,
    ),
    param_space=hp_search_space,
)
results = tuner.fit()

print("Best hyperparameters found were: ", results.get_best_result().config)

results_df = results.get_dataframe()
print(results_df)
results_df.to_csv("default_DeepAR_results_12_05.csv")

2023-05-12 18:55:57,909	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-05-12 19:33:03
Running for:,00:36:57.29
Memory:,132.7/236.0 GiB

Trial name,status,loc,cell_type,context_length,epochs,num_cells,num_layers,prediction_length,use_feat_dynamic_rea l,use_feat_static_cat,use_feat_static_real
objective_e2296_00000,RUNNING,172.22.1.197:757608,lstm,4,100,40,2,4,False,False,False
objective_e2296_00001,RUNNING,172.22.1.197:758172,lstm,4,100,40,2,4,False,False,False
objective_e2296_00002,RUNNING,172.22.1.197:758303,lstm,4,100,40,2,4,False,False,False


  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  8%|▊         | 4/50 [00:10<01:56,  2.54s/it, epoch=1/100, avg_epoch_loss=2.06]
  0%|          | 0/50 [00:00<?, ?it/s]
 10%|█         | 5/50 [00:11<01:39,  2.22s/it, epoch=1/100, avg_epoch_loss=0.858]
 18%|█▊        | 9/50 [00:21<01:34,  2.31s/it, epoch=1/100, avg_epoch_loss=1.58]
 12%|█▏        | 6/50 [00:10<01:16,  1.75s/it, epoch=1/100, avg_epoch_loss=1.35]
 20%|██        | 10/50 [00:21<01:26,  2.16s/it, epoch=1/100, avg_epoch_loss=1.07]
 36%|███▌      | 18/50 [00:31<00:51,  1.60s/it, epoch=1/100, avg_epoch_loss=1.44]
 26%|██▌       | 13/50 [00:21<01:01,  1.65s/it, epoch=1/100, avg_epoch_loss=1.23]
 48%|████▊     | 24/50 [00:32<00:30,  1.18s/it, epoch=1/100, avg_epoch_loss=1.12]
 64%|██████▍   | 32/50 [00:42<00:19,  1.11s/it, epoch=1/100, avg_epoch_loss=1.29]
 44%|████▍     | 22/50 [00:31<00:38,  1.36s/it, epoch=1/100, avg_epoch_loss=1.17]
 92%|█████████▏| 46/50 [00:55<00:04,  1.05s/it, epoch=1/100, avg_e