In [1]:
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import model, preprocessing, split_forecasts_by_week, plot_coverage, print_forecasts_by_week, forecast_by_week, train_test_split, update_deepAR_parameters
from PythonFiles.Configuration import Configuration
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.mx import Trainer
from gluonts.evaluation import make_evaluation_predictions
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.rolling_dataset import generate_rolling_dataset,StepStrategy
from gluonts.evaluation import make_evaluation_predictions, Evaluator
import ray
from ray.air import session
from ray import tune
from gluonts.mx.distribution import NegativeBinomialOutput
config = Configuration()
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

config.train_start_time = datetime(1999,1,1,0,0,0)#datetime(2010,1,1,0,0,0)
config.train_end_time = datetime(2016,9,30,23,0,0)
config.test_end_time = datetime(2018,9,30,23,0,0)
overall_evaluation_df = pd.DataFrame()

data_splits_dict = {}
output_dict = {}

locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(config, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0].astype(int)
    
# Data split with no additional features and training start in 2010
#data_splits_dict["without_features_2001"] = list(train_test_split(config, df, False))
data_splits_dict["with_features_2001"] = list(train_test_split(config, df, True))

# Change the beginning of the training period
#config.train_start_time = datetime(2010,1,1,0,0,0)
#data_splits_dict["without_features_2010"] = list(train_test_split(config, df, False))
#data_splits_dict["with_features_2010"] = list(train_test_split(config, df, True))



In [2]:
def update_FNN_parameters(config, new_parameters):
    ''' 
    This function updates the DeepAR-configuration in the Configuration.py file. 
    Note that new_parameters must be a dict containing the exact keys used in config.parameters.
    '''
    parameters = config.fnnparameters.copy()
    for key in new_parameters.keys():
        if key in parameters.keys():
            parameters[key] = new_parameters[key]
        else:
            print(f"This key {key} isn't available in config.parameters! Thus the default config will maintain.")

    fnnestimator = SimpleFeedForwardEstimator(num_hidden_dimensions=parameters["num_hidden_dimensions"],
                                              prediction_length=parameters["prediction_length"],
                                              context_length=parameters["context_length"],
                                              distr_output=parameters["distr_output"],
                                              batch_size=parameters["batch_size"],
                                              batch_normalization=parameters["batch_normalization"],
                                              trainer=Trainer(epochs=parameters["epochs"],
                                                              num_batches_per_epoch=parameters["num_batches_per_epoch"],
                                                             ),
                                              )
    return fnnestimator

def evaluate(config, train, test, configuration):
    
    fnnestimator = update_FNN_parameters(configuration, config)
    
    predictor = fnnestimator.train(train)
    
    forecast_it, ts_it = make_evaluation_predictions(dataset=test, predictor=predictor,num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    
    # Evaluation with the quantiles of the configuration and calculation of the mean_WIS
    evaluator = Evaluator(quantiles=configuration.quantiles)    
    agg_metrics = evaluator(tss, forecasts)[0]
    mean_WIS = agg_metrics["mean_absolute_QuantileLoss"]/(config["prediction_length"]*411)
    return mean_WIS

def objectiveFNN(config, train, test, configuration):
    score = evaluate(config, train, test, configuration)
    session.report({"mean_WIS":score})

In [None]:
hp_search_space = {
    "num_hidden_dimensions": tune.grid_search([[5],[10], [20]]),
    "context_length":tune.grid_search([4, 52, 104]),
    "epochs":tune.grid_search([5, 10, 20]),
    "prediction_length" : tune.grid_search([4, 52, 104]), 
    "num_batches_per_epoch":tune.grid_search([25, 50, 100]),
    "batch_normalization":tune.grid_search([True, False]),
    "batch_size":tune.grid_search([20, 32, 64]),
    }




train = data_splits_dict["with_features_2001"][0]
test = data_splits_dict["with_features_2001"][1]
configuration = Configuration()

tuner = tune.Tuner(
    tune.with_parameters(objectiveFNN, train=train, test=test, configuration=configuration),
    tune_config=tune.TuneConfig(
        num_samples = 5,
        metric="mean_WIS",
        mode="min",
        max_concurrent_trials=8,
    ),
    param_space=hp_search_space,
)
results = tuner.fit()

print("Best hyperparameters found were: ", results.get_best_result().config)

results_df = results.get_dataframe()
print(results_df)
results_df.to_csv("FNN_Hyperparameter_results_15_05.csv")

2023-05-15 08:37:52,845	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-05-15 10:04:29
Running for:,01:26:28.76
Memory:,62.2/236.0 GiB

Trial name,status,loc,batch_normalization,batch_size,context_length,epochs,num_batches_per_epoc h,num_hidden_dimension s,prediction_length,iter,total time (s),mean_WIS
objectiveFNN_07888_00100,RUNNING,172.22.1.197:3099262,True,64,52,20,50,[5],4,,,
objectiveFNN_07888_00101,RUNNING,172.22.1.197:3099061,False,64,52,20,50,[5],4,,,
objectiveFNN_07888_00104,RUNNING,172.22.1.197:3099268,True,32,104,20,50,[5],4,,,
objectiveFNN_07888_00105,RUNNING,172.22.1.197:3099264,False,32,104,20,50,[5],4,,,
objectiveFNN_07888_00106,RUNNING,172.22.1.197:3099272,True,64,104,20,50,[5],4,,,
objectiveFNN_07888_00107,RUNNING,172.22.1.197:3099266,False,64,104,20,50,[5],4,,,
objectiveFNN_07888_00108,RUNNING,172.22.1.197:3099260,True,20,4,5,100,[5],4,,,
objectiveFNN_07888_00109,RUNNING,172.22.1.197:3099270,False,20,4,5,100,[5],4,,,
objectiveFNN_07888_00000,TERMINATED,172.22.1.197:3099061,True,20,4,5,25,[5],4,1.0,129.29,723.72
objectiveFNN_07888_00001,TERMINATED,172.22.1.197:3099260,False,20,4,5,25,[5],4,1.0,117.361,734.446


  0%|          | 0/25 [00:00<?, ?it/s]m 
  0%|          | 0/25 [00:00<?, ?it/s]m 
  0%|          | 0/25 [00:00<?, ?it/s]m 
  0%|          | 0/25 [00:00<?, ?it/s]m 
  0%|          | 0/25 [00:00<?, ?it/s]m 
  0%|          | 0/25 [00:00<?, ?it/s]m 
  0%|          | 0/25 [00:00<?, ?it/s]m 
  0%|          | 0/25 [00:00<?, ?it/s]m 
 44%|████▍     | 11/25 [00:10<00:13,  1.04it/s, epoch=1/5, avg_epoch_loss=2.06]
 32%|███▏      | 8/25 [00:10<00:21,  1.25s/it, epoch=1/5, avg_epoch_loss=1.41]
 52%|█████▏    | 13/25 [00:10<00:09,  1.22it/s, epoch=1/5, avg_epoch_loss=1.29]
 52%|█████▏    | 13/25 [00:10<00:09,  1.28it/s, epoch=1/5, avg_epoch_loss=3.38]
 56%|█████▌    | 14/25 [00:10<00:08,  1.31it/s, epoch=1/5, avg_epoch_loss=2.86]
 36%|███▌      | 9/25 [00:11<00:19,  1.22s/it, epoch=1/5, avg_epoch_loss=1.63]
 20%|██        | 5/25 [00:11<00:45,  2.27s/it, epoch=1/5, avg_epoch_loss=1.86]
 16%|█▌        | 4/25 [00:11<01:00,  2.87s/it, epoch=1/5, avg_epoch_loss=1.21]
100%|██████████| 25/25 [00:18<00:00,

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,mean_WIS,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objectiveFNN_07888_00000,2023-05-15_08-40-13,True,,dcb02b9c51ad4282ae036e453da1cfc4,"0_batch_normalization=True,batch_size=20,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,723.72,172.22.1.197,3099061,129.29,129.29,129.29,1684132813,0,,1,07888_00000,0.00727654
objectiveFNN_07888_00001,2023-05-15_08-40-06,True,,b49d4aac74924e7c9a6a5aa187ef3f88,"1_batch_normalization=False,batch_size=20,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,734.446,172.22.1.197,3099260,117.361,117.361,117.361,1684132806,0,,1,07888_00001,0.00570297
objectiveFNN_07888_00002,2023-05-15_08-40-54,True,,a41809fbf41d4bc6bd6cbdfaf756c97e,"2_batch_normalization=True,batch_size=32,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,768.128,172.22.1.197,3099262,164.928,164.928,164.928,1684132854,0,,1,07888_00002,0.00533438
objectiveFNN_07888_00003,2023-05-15_08-41-00,True,,f8c897c29f534d21bc292b01cdea9aa5,"3_batch_normalization=False,batch_size=32,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,754.526,172.22.1.197,3099264,171.071,171.071,171.071,1684132860,0,,1,07888_00003,0.00530243
objectiveFNN_07888_00004,2023-05-15_08-42-28,True,,7b5a5cc7eabc4a12ae63603d196a6fcd,"4_batch_normalization=True,batch_size=64,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,773.727,172.22.1.197,3099266,259.058,259.058,259.058,1684132948,0,,1,07888_00004,0.0058465
objectiveFNN_07888_00005,2023-05-15_08-42-17,True,,3102a4e228264305b8bd8dffd9903040,"5_batch_normalization=False,batch_size=64,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,764.653,172.22.1.197,3099268,248.149,248.149,248.149,1684132937,0,,1,07888_00005,0.00515771
objectiveFNN_07888_00006,2023-05-15_08-40-13,True,,a880be4acce1434fbd53b7aa8b9d74a6,"6_batch_normalization=True,batch_size=20,context_length=52,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,705.844,172.22.1.197,3099270,124.431,124.431,124.431,1684132813,0,,1,07888_00006,0.00559187
objectiveFNN_07888_00007,2023-05-15_08-40-21,True,,be10275c4cb44cdeaadbe1682a31e892,"7_batch_normalization=False,batch_size=20,context_length=52,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,744.312,172.22.1.197,3099272,132.09,132.09,132.09,1684132821,0,,1,07888_00007,0.00540161
objectiveFNN_07888_00008,2023-05-15_08-42-52,True,,b49d4aac74924e7c9a6a5aa187ef3f88,"8_batch_normalization=True,batch_size=32,context_length=52,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,784.187,172.22.1.197,3099260,165.669,165.669,165.669,1684132972,0,,1,07888_00008,0.00570297
objectiveFNN_07888_00009,2023-05-15_08-43-04,True,,a880be4acce1434fbd53b7aa8b9d74a6,"9_batch_normalization=False,batch_size=32,context_length=52,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,709.727,172.22.1.197,3099270,170.487,170.487,170.487,1684132984,0,,1,07888_00009,0.00559187


  0%|          | 0/25 [00:00<?, ?it/s]m 
 76%|███████▌  | 19/25 [00:54<00:20,  3.40s/it, epoch=3/5, avg_epoch_loss=1.44]
[2m[36m(objectiveFNN pid=3099270)[0m   return arr.astype(dtype, copy=True)
[2m[36m(objectiveFNN pid=3099061)[0m   return arr.astype(dtype, copy=True)
100%|██████████| 25/25 [01:02<00:00,  2.49s/it, epoch=3/5, avg_epoch_loss=1.47]
  0%|          | 0/25 [00:00<?, ?it/s]m 
  0%|          | 0/25 [00:00<?, ?it/s]m 
  0%|          | 0/25 [00:00<?, ?it/s]m 
 20%|██        | 5/25 [00:10<00:42,  2.11s/it, epoch=1/5, avg_epoch_loss=4.58]
[2m[36m(objectiveFNN pid=3099272)[0m   return arr.astype(dtype, copy=True)
  0%|          | 0/25 [00:00<?, ?it/s]m 
100%|██████████| 25/25 [01:05<00:00,  2.64s/it, epoch=3/5, avg_epoch_loss=1.37]
  0%|          | 0/25 [00:00<?, ?it/s]m 
 24%|██▍       | 6/25 [00:10<00:31,  1.68s/it, epoch=1/5, avg_epoch_loss=3.82]
 28%|██▊       | 7/25 [00:11<00:28,  1.58s/it, epoch=4/5, avg_epoch_loss=1.15]
Running evaluation: 0it [00:00, ?it/s]m 
Ru