In [1]:
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import model, preprocessing, split_forecasts_by_week, plot_coverage, print_forecasts_by_week, forecast_by_week, train_test_split, update_deepAR_parameters
from PythonFiles.Configuration import Configuration
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.mx import Trainer
from gluonts.evaluation import make_evaluation_predictions
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.rolling_dataset import generate_rolling_dataset,StepStrategy
from gluonts.evaluation import make_evaluation_predictions, Evaluator
import ray
from ray.air import session
from ray import tune
from gluonts.mx.distribution import NegativeBinomialOutput
config = Configuration()
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

config.train_start_time = datetime(1999,1,1,0,0,0)#datetime(2010,1,1,0,0,0)
config.train_end_time = datetime(2016,9,30,23,0,0)
config.test_end_time = datetime(2018,9,30,23,0,0)
overall_evaluation_df = pd.DataFrame()

data_splits_dict = {}
output_dict = {}

locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(config, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0].astype(int)
    
# Data split with no additional features and training start in 2010
#data_splits_dict["without_features_2001"] = list(train_test_split(config, df, False))
data_splits_dict["with_features_2001"] = list(train_test_split(config, df, True))

# Change the beginning of the training period
#config.train_start_time = datetime(2010,1,1,0,0,0)
#data_splits_dict["without_features_2010"] = list(train_test_split(config, df, False))
#data_splits_dict["with_features_2010"] = list(train_test_split(config, df, True))



In [2]:
def update_FNN_parameters(config, new_parameters):
    ''' 
    This function updates the DeepAR-configuration in the Configuration.py file. 
    Note that new_parameters must be a dict containing the exact keys used in config.parameters.
    '''
    parameters = config.fnnparameters.copy()
    for key in new_parameters.keys():
        if key in parameters.keys():
            parameters[key] = new_parameters[key]
        else:
            print(f"This key {key} isn't available in config.parameters! Thus the default config will maintain.")

    fnnestimator = SimpleFeedForwardEstimator(num_hidden_dimensions=parameters["num_hidden_dimensions"],
                                              prediction_length=parameters["prediction_length"],
                                              context_length=parameters["context_length"],
                                              distr_output=parameters["distr_output"],
                                              batch_size=parameters["batch_size"],
                                              batch_normalization=parameters["batch_normalization"],
                                              trainer=Trainer(epochs=parameters["epochs"],
                                                              num_batches_per_epoch=parameters["num_batches_per_epoch"],
                                                             ),
                                              )
    return fnnestimator

def evaluate(config, train, test, configuration):
    
    fnnestimator = update_FNN_parameters(configuration, config)
    
    predictor = fnnestimator.train(train)
    
    forecast_it, ts_it = make_evaluation_predictions(dataset=test, predictor=predictor,num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    
    # Evaluation with the quantiles of the configuration and calculation of the mean_WIS
    evaluator = Evaluator(quantiles=configuration.quantiles)    
    agg_metrics = evaluator(tss, forecasts)[0]
    mean_WIS = agg_metrics["mean_absolute_QuantileLoss"]/(config["prediction_length"]*411)
    return mean_WIS

def objectiveFNN(config, train, test, configuration):
    score = evaluate(config, train, test, configuration)
    session.report({"mean_WIS":score})

In [None]:
hp_search_space = {
    "num_hidden_dimensions": tune.grid_search([[5],[10], [20]]),
    "context_length":tune.grid_search([4, 52, 104]),
    "epochs":tune.grid_search([5, 10, 20]),
    "prediction_length" : tune.grid_search([4, 52, 104]), 
    "num_batches_per_epoch":tune.grid_search([25, 50, 100]),
    "batch_normalization":tune.grid_search([True, False]),
    "batch_size":tune.grid_search([20, 32, 64]),
    }




train = data_splits_dict["with_features_2001"][0]
test = data_splits_dict["with_features_2001"][1]
configuration = Configuration()

tuner = tune.Tuner(
    tune.with_parameters(objectiveFNN, train=train, test=test, configuration=configuration),
    tune_config=tune.TuneConfig(
        num_samples = 5,
        metric="mean_WIS",
        mode="min",
        max_concurrent_trials=10,
    ),
    param_space=hp_search_space,
)
results = tuner.fit()

print("Best hyperparameters found were: ", results.get_best_result().config)

results_df = results.get_dataframe()
print(results_df)
results_df.to_csv("FNN_Hyperparameter_results_12_05.csv")

2023-05-12 18:36:17,852	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-05-12 19:22:58
Running for:,00:46:34.65
Memory:,136.4/236.0 GiB

Trial name,status,loc,batch_normalization,batch_size,context_length,epochs,num_batches_per_epoc h,num_hidden_dimension s,prediction_length,iter,total time (s),mean_WIS
objectiveFNN_21ea7_00052,RUNNING,172.22.1.197:715699,True,64,104,20,25,[5],4,,,
objectiveFNN_21ea7_00053,RUNNING,172.22.1.197:715707,False,64,104,20,25,[5],4,,,
objectiveFNN_21ea7_00064,RUNNING,172.22.1.197:715693,True,64,52,5,50,[5],4,,,
objectiveFNN_21ea7_00065,RUNNING,172.22.1.197:715598,False,64,52,5,50,[5],4,,,
objectiveFNN_21ea7_00068,RUNNING,172.22.1.197:715697,True,32,104,5,50,[5],4,,,
objectiveFNN_21ea7_00069,RUNNING,172.22.1.197:715701,False,32,104,5,50,[5],4,,,
objectiveFNN_21ea7_00070,RUNNING,172.22.1.197:715703,True,64,104,5,50,[5],4,,,
objectiveFNN_21ea7_00071,RUNNING,172.22.1.197:715705,False,64,104,5,50,[5],4,,,
objectiveFNN_21ea7_00072,RUNNING,172.22.1.197:715691,True,20,4,10,50,[5],4,,,
objectiveFNN_21ea7_00073,RUNNING,172.22.1.197:715695,False,20,4,10,50,[5],4,,,


  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
 44%|████▍     | 11/25 [00:10<00:12,  1.09it/s, epoch=1/5, avg_epoch_loss=1.29]
 52%|█████▏    | 13/25 [00:10<00:09,  1.28it/s, epoch=1/5, avg_epoch_loss=1.37]
 52%|█████▏    | 13/25 [00:10<00:09,  1.28it/s, epoch=1/5, avg_epoch_loss=3.59]
 48%|████▊     | 12/25 [00:10<00:11,  1.18it/s, epoch=1/5, avg_epoch_loss=4.17]
 32%|███▏      | 8/25 [00:10<00:21,  1.27s/it, epoch=1/5, avg_epoch_loss=1.63]
 32%|███▏      | 8/25 [00:10<00:21,  1.29s/it, epoch=1/5, avg_epoch_loss=3.69]
 16%|█▌        | 4/25 [00:10<00:55,  2.63s/it, epoch=1/5, avg_epoch_loss=1.24]
 36%|███▌      | 9/25 [00:11<00:20,  1.29s/

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,mean_WIS,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objectiveFNN_21ea7_00000,2023-05-12_18-38-52,True,,b7c681a606d54d8586222bf5b5fa79cf,"0_batch_normalization=True,batch_size=20,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,717.115,172.22.1.197,715598,144.652,144.652,144.652,1683909532,0,,1,21ea7_00000,0.00603008
objectiveFNN_21ea7_00001,2023-05-12_18-38-59,True,,882278c1249e49b6b46f579a4e0b1947,"1_batch_normalization=False,batch_size=20,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,738.678,172.22.1.197,715691,147.416,147.416,147.416,1683909539,0,,1,21ea7_00001,0.00529432
objectiveFNN_21ea7_00002,2023-05-12_18-39-47,True,,b3df4ced9a844ba8a972a507f064e3f8,"2_batch_normalization=True,batch_size=32,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,782.791,172.22.1.197,715693,195.145,195.145,195.145,1683909587,0,,1,21ea7_00002,0.00548911
objectiveFNN_21ea7_00003,2023-05-12_18-40-03,True,,f3bea6905c434efea152c74b93670117,"3_batch_normalization=False,batch_size=32,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,756.077,172.22.1.197,715695,211.043,211.043,211.043,1683909603,0,,1,21ea7_00003,0.00589633
objectiveFNN_21ea7_00004,2023-05-12_18-41-33,True,,4e3ff4e32ca748288ab5b32e8c01e3c8,"4_batch_normalization=True,batch_size=64,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,730.252,172.22.1.197,715697,301.106,301.106,301.106,1683909693,0,,1,21ea7_00004,0.00532126
objectiveFNN_21ea7_00005,2023-05-12_18-42-01,True,,5187ca9dc20a4901a77de82e75725891,"5_batch_normalization=False,batch_size=64,context_length=4,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,742.673,172.22.1.197,715699,328.852,328.852,328.852,1683909721,0,,1,21ea7_00005,0.00515485
objectiveFNN_21ea7_00006,2023-05-12_18-38-40,True,,206cd563d601411ca82066a9431102fa,"6_batch_normalization=True,batch_size=20,context_length=52,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,779.026,172.22.1.197,715701,127.671,127.671,127.671,1683909520,0,,1,21ea7_00006,0.00483036
objectiveFNN_21ea7_00007,2023-05-12_18-38-59,True,,89b700de0de449c5b80a343324a93315,"7_batch_normalization=False,batch_size=20,context_length=52,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,805.93,172.22.1.197,715703,146.523,146.523,146.523,1683909539,0,,1,21ea7_00007,0.0051353
objectiveFNN_21ea7_00008,2023-05-12_18-39-59,True,,7c9443386fd44d3590252e9fa1d08f42,"8_batch_normalization=True,batch_size=32,context_length=52,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,701.978,172.22.1.197,715705,207.103,207.103,207.103,1683909599,0,,1,21ea7_00008,0.00540805
objectiveFNN_21ea7_00009,2023-05-12_18-39-32,True,,5faa15eb465149e280405e9fd4bedead,"9_batch_normalization=False,batch_size=32,context_length=52,epochs=5,num_batches_per_epoch=25,num_hidden_dimensions=5,prediction_length=4",econ-stat-rr01,1,765.949,172.22.1.197,715707,179.709,179.709,179.709,1683909572,0,,1,21ea7_00009,0.00560713


  0%|          | 0/25 [00:00<?, ?it/s] 
 56%|█████▌    | 14/25 [01:04<00:43,  3.99s/it, epoch=3/5, avg_epoch_loss=1.37]
 64%|██████▍   | 16/25 [01:07<00:50,  5.60s/it, epoch=3/5, avg_epoch_loss=1.36]
 72%|███████▏  | 18/25 [01:07<00:27,  3.97s/it, epoch=3/5, avg_epoch_loss=1.27]
[2m[36m(objectiveFNN pid=715598)[0m   return arr.astype(dtype, copy=True)
  8%|▊         | 2/25 [00:11<02:07,  5.52s/it, epoch=1/5, avg_epoch_loss=4.44]
  0%|          | 0/25 [00:00<?, ?it/s] 
[2m[36m(objectiveFNN pid=715703)[0m   return arr.astype(dtype, copy=True)
 72%|███████▏  | 18/25 [01:18<00:38,  5.53s/it, epoch=3/5, avg_epoch_loss=1.32]
[2m[36m(objectiveFNN pid=715691)[0m   return arr.astype(dtype, copy=True)
 88%|████████▊ | 22/25 [01:18<00:10,  3.51s/it, epoch=3/5, avg_epoch_loss=1.3] 
Running evaluation: 0it [00:00, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
  0%|          | 0/25 [00:00<?, ?it/s] 
Running evaluation: 41922it [00:01, 26225.08it/s]
  8%|▊         | 2/25 [00:21<02:07,  5.5