In [1]:
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import model, preprocessing, split_forecasts_by_week, \
forecast_by_week, train_test_split,update_deepAR_parameters, \
generate_model_results_by_hp_dict
from PythonFiles.PlottingFunctions import plot_coverage, print_forecasts_by_week, hyperparameter_boxplots, plot_model_results_by_hp, hp_color_plot
from PythonFiles.Configuration import Configuration
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from gluonts.mx import Trainer, DeepAREstimator
from gluonts.evaluation import make_evaluation_predictions, Evaluator
import ray
from ray.air import session
from ray import tune
from gluonts.mx.distribution import NegativeBinomialOutput
config = Configuration()
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

In [2]:
config.train_start_time = datetime(1999,1,1,0,0,0)#datetime(2010,1,1,0,0,0)
config.train_end_time = datetime(2016,9,30,23,0,0)
config.test_end_time = datetime(2018,9,30,23,0,0)
overall_evaluation_df = pd.DataFrame()

data_splits_dict = {}
output_dict = {}

locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(config, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0].astype(int)

In [3]:
# Data split with no additional features and training start in 2010
#data_splits_dict["without_features_2001"] = list(train_test_split(config, df, False))
data_splits_dict["with_features_2001"] = list(train_test_split(config, df, True))

# Change the beginning of the training period
#config.train_start_time = datetime(2010,1,1,0,0,0)
#data_splits_dict["without_features_2010"] = list(train_test_split(config, df, False))
#data_splits_dict["with_features_2010"] = list(train_test_split(config, df, True))

In [4]:
def evaluate(config, train, test, configuration):
    deeparestimator = update_deepAR_parameters(configuration, config)
    forecasts, tss = model(train, test, deeparestimator)
    # Evaluation with the quantiles of the configuration and calculation of the mean_WIS
    evaluator = Evaluator(quantiles=configuration.quantiles)    
    agg_metrics = evaluator(tss, forecasts)[0]
    mean_WIS = agg_metrics["mean_absolute_QuantileLoss"]/(configuration.parameters["prediction_length"]*411)
    return mean_WIS

def objective(config, train, test, configuration):
    score = evaluate(config, train, test, configuration)
    session.report({"mean_WIS":score})

In [None]:
hp_search_space = {
    "num_cells": tune.grid_search([10, 60, 140]),
    "num_layers": tune.grid_search([6, 12]),
    "context_length":tune.grid_search([1, 2, 4]),
    "cell_type":tune.grid_search(["lstm"]),
    "epochs":tune.grid_search([90, 140, 200]),
    "use_feat_static_real":tune.grid_search([False]),
    "use_feat_dynamic_real":tune.grid_search([True]),
    "use_feat_static_cat":tune.grid_search([False, True]),
    "cardinality":tune.sample_from(lambda spec:[2]*411 if spec.config.use_feat_static_cat else None),
}


train = data_splits_dict["with_features_2001"][0]
test = data_splits_dict["with_features_2001"][1]
configuration = Configuration()

tuner = tune.Tuner(
    tune.with_parameters(objective, train=train, test=test, configuration=configuration),
    tune_config=tune.TuneConfig(
        num_samples = 6,
        metric="mean_WIS",
        mode="min",
        max_concurrent_trials=12,
    ),
    param_space=hp_search_space,
)
results = tuner.fit()

print("Best hyperparameters found were: ", results.get_best_result().config)

results_df = results.get_dataframe()
print(results_df)
results_df.to_csv("Hyperparameter_results_18_05.csv")

2023-05-18 15:58:50,243	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-05-18 16:21:50
Running for:,00:22:49.11
Memory:,192.2/236.0 GiB

Trial name,status,loc,cardinality,cell_type,context_length,epochs,num_cells,num_layers,use_feat_dynamic_rea l,use_feat_static_cat,use_feat_static_real,iter,total time (s),mean_WIS
objective_210bc_00006,RUNNING,172.22.1.197:1751273,,lstm,1,200,10,6,True,False,False,,,
objective_210bc_00007,RUNNING,172.22.1.197:1751275,,lstm,2,200,10,6,True,False,False,,,
objective_210bc_00008,RUNNING,172.22.1.197:1751277,,lstm,4,200,10,6,True,False,False,,,
objective_210bc_00009,RUNNING,172.22.1.197:1751279,,lstm,1,90,60,6,True,False,False,,,
objective_210bc_00010,RUNNING,172.22.1.197:1751281,,lstm,2,90,60,6,True,False,False,,,
objective_210bc_00011,RUNNING,172.22.1.197:1751283,,lstm,4,90,60,6,True,False,False,,,
objective_210bc_00012,RUNNING,172.22.1.197:1751100,,lstm,1,140,60,6,True,False,False,,,
objective_210bc_00013,RUNNING,172.22.1.197:1751263,,lstm,2,140,60,6,True,False,False,,,
objective_210bc_00014,RUNNING,172.22.1.197:1751265,,lstm,4,140,60,6,True,False,False,,,
objective_210bc_00015,RUNNING,172.22.1.197:1751267,,lstm,1,200,60,6,True,False,False,,,


  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:06<00:00,  8.09it/s, epoch=1/90, avg_epoch_loss=1.36]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:05<00:00,  9.07it/s, epoch=1/140, avg_epoch_loss=1.23]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:05<00:00,  8.43it/s, epoch=1/200, avg_epoch_loss=1.33]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:05<00:00,  9.59it/s, epoch=2/90, avg_epoch_loss=1.11]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:06<00:00,  7.70it/s, ep

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,mean_WIS,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_210bc_00000,2023-05-18_16-12-31,True,,1f03a5f9fb7542498fc60c25a6d598df,"0_cardinality=None,cell_type=lstm,context_length=1,epochs=90,num_cells=10,num_layers=6,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=False",econ-stat-rr01,1,419.493,172.22.1.197,1751100,805.744,805.744,805.744,1684419151,0,,1,210bc_00000,0.00747633
objective_210bc_00001,2023-05-18_16-13-20,True,,3e750a982f004249b92ad8d21dc2b17b,"1_cardinality=None,cell_type=lstm,context_length=2,epochs=90,num_cells=10,num_layers=6,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=False",econ-stat-rr01,1,448.353,172.22.1.197,1751263,849.944,849.944,849.944,1684419200,0,,1,210bc_00001,0.00517058
objective_210bc_00002,2023-05-18_16-14-26,True,,806fcca8fb5748e99054055604144e5c,"2_cardinality=None,cell_type=lstm,context_length=4,epochs=90,num_cells=10,num_layers=6,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=False",econ-stat-rr01,1,446.207,172.22.1.197,1751265,916.603,916.603,916.603,1684419266,0,,1,210bc_00002,0.00598931
objective_210bc_00003,2023-05-18_16-17-35,True,,fa020aacbe6c47f99614ce06e8fd2721,"3_cardinality=None,cell_type=lstm,context_length=1,epochs=140,num_cells=10,num_layers=6,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=False",econ-stat-rr01,1,424.466,172.22.1.197,1751267,1104.46,1104.46,1104.46,1684419455,0,,1,210bc_00003,0.00474215
objective_210bc_00004,2023-05-18_16-18-25,True,,cd62ef250fb6452e987df69f417febfb,"4_cardinality=None,cell_type=lstm,context_length=2,epochs=140,num_cells=10,num_layers=6,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=False",econ-stat-rr01,1,426.577,172.22.1.197,1751269,1155.08,1155.08,1155.08,1684419505,0,,1,210bc_00004,0.00607562
objective_210bc_00005,2023-05-18_16-20-50,True,,4708705de7034c57b154eab945a1b072,"5_cardinality=None,cell_type=lstm,context_length=4,epochs=140,num_cells=10,num_layers=6,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=False",econ-stat-rr01,1,490.566,172.22.1.197,1751271,1299.76,1299.76,1299.76,1684419650,0,,1,210bc_00005,0.00516152


  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:06<00:00,  7.40it/s, epoch=138/140, avg_epoch_loss=0.702]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:08<00:00,  5.76it/s, epoch=126/200, avg_epoch_loss=0.621]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:04<00:00, 10.46it/s, epoch=137/200, avg_epoch_loss=0.632]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:06<00:00,  8.00it/s, epoch=123/140, avg_epoch_loss=0.615]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:05<00:00,  9.94it/s, epoch=142/200, avg_epoch_loss=0.641]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:05<00:00,  9.90it/s, epoch=139/140, avg_epoch_loss=0.672]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:05<00:00,  9.00it/s, epoch=127/200, avg_epoch_loss=0.631]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:05<00:00,  9.47it/s, epoch=138/200, avg_epoch_loss=0.616]
  0%|   