In [1]:
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import model, preprocessing, split_forecasts_by_week, plot_coverage, print_forecasts_by_week, forecast_by_week, train_test_split, update_deepAR_parameters
from PythonFiles.Configuration import Configuration
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from gluonts.mx import Trainer, DeepAREstimator
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.rolling_dataset import generate_rolling_dataset,StepStrategy
from gluonts.evaluation import make_evaluation_predictions, Evaluator
import ray
from ray.air import session
from ray import tune
from gluonts.mx.distribution import NegativeBinomialOutput
config = Configuration()
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

In [2]:
config.train_start_time = datetime(1999,1,1,0,0,0)#datetime(2010,1,1,0,0,0)
config.train_end_time = datetime(2016,9,30,23,0,0)
config.test_end_time = datetime(2018,9,30,23,0,0)
overall_evaluation_df = pd.DataFrame()

data_splits_dict = {}
output_dict = {}

locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(config, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0].astype(int)

In [3]:
# Data split with no additional features and training start in 2010
#data_splits_dict["without_features_2001"] = list(train_test_split(config, df, False))
data_splits_dict["with_features_2001"] = list(train_test_split(config, df, True))

# Change the beginning of the training period
#config.train_start_time = datetime(2010,1,1,0,0,0)
#data_splits_dict["without_features_2010"] = list(train_test_split(config, df, False))
#data_splits_dict["with_features_2010"] = list(train_test_split(config, df, True))

In [4]:
def evaluate(config, train, test, configuration):
    deeparestimator = update_deepAR_parameters(configuration, config)
    forecasts, tss = model(train, test, deeparestimator)
    # Evaluation with the quantiles of the configuration and calculation of the mean_WIS
    evaluator = Evaluator(quantiles=configuration.quantiles)    
    agg_metrics = evaluator(tss, forecasts)[0]
    mean_WIS = agg_metrics["mean_absolute_QuantileLoss"]/(configuration.parameters["prediction_length"]*411)
    return mean_WIS

def objective(config, train, test, configuration):
    score = evaluate(config, train, test, configuration)
    session.report({"mean_WIS":score})

In [None]:
hp_search_space = {
    "num_cells": tune.grid_search([60, 140]),
    "num_layers": tune.grid_search([8, 10]),
    "context_length":tune.grid_search([2, 4, 8]),
    "cell_type":tune.grid_search(["lstm"]),
    "epochs":tune.grid_search([40, 60, 80]),
    "use_feat_static_real":tune.grid_search([True, False]),
    "use_feat_dynamic_real":tune.grid_search([True]),
    "use_feat_static_cat":tune.grid_search([False, True]),
    "cardinality":tune.sample_from(lambda spec:[2]*411 if spec.config.use_feat_static_cat else None),
}


train = data_splits_dict["with_features_2001"][0]
test = data_splits_dict["with_features_2001"][1]
configuration = Configuration()

tuner = tune.Tuner(
    tune.with_parameters(objective, train=train, test=test, configuration=configuration),
    tune_config=tune.TuneConfig(
        num_samples = 5,
        metric="mean_WIS",
        mode="min",
        max_concurrent_trials=8,
    ),
    param_space=hp_search_space,
)
results = tuner.fit()

print("Best hyperparameters found were: ", results.get_best_result().config)

results_df = results.get_dataframe()
print(results_df)
results_df.to_csv("Hyperparameter_results_15_05.csv")

2023-05-15 08:37:40,057	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-05-15 10:04:20
Running for:,01:26:19.87
Memory:,63.6/236.0 GiB

Trial name,status,loc,cardinality,cell_type,context_length,epochs,num_cells,num_layers,use_feat_dynamic_rea l,use_feat_static_cat,use_feat_static_real,iter,total time (s),mean_WIS
objective_fff9b_00008,RUNNING,172.22.1.197:3099287,,lstm,8,80,60,8,True,False,True,,,
objective_fff9b_00009,RUNNING,172.22.1.197:3098938,,lstm,2,40,140,8,True,False,True,,,
objective_fff9b_00010,RUNNING,172.22.1.197:3099289,,lstm,4,40,140,8,True,False,True,,,
objective_fff9b_00011,RUNNING,172.22.1.197:3099291,,lstm,8,40,140,8,True,False,True,,,
objective_fff9b_00012,RUNNING,172.22.1.197:3099293,,lstm,2,60,140,8,True,False,True,,,
objective_fff9b_00013,RUNNING,172.22.1.197:3099295,,lstm,4,60,140,8,True,False,True,,,
objective_fff9b_00014,RUNNING,172.22.1.197:3099297,,lstm,8,60,140,8,True,False,True,,,
objective_fff9b_00015,RUNNING,172.22.1.197:3099299,,lstm,2,80,140,8,True,False,True,,,
objective_fff9b_00000,TERMINATED,172.22.1.197:3098938,,lstm,2,40,60,8,True,False,True,1.0,3408.15,764.027
objective_fff9b_00001,TERMINATED,172.22.1.197:3099287,,lstm,4,40,60,8,True,False,True,1.0,3338.7,837.327


  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 14%|█▍        | 7/50 [00:11<01:07,  1.58s/it, epoch=1/40, avg_epoch_loss=0.964]
 16%|█▌        | 8/50 [00:10<00:55,  1.33s/it, epoch=1/60, avg_epoch_loss=0.921]
 14%|█▍        | 7/50 [00:10<01:06,  1.55s/it, epoch=1/60, avg_epoch_loss=1.15]
 16%|█▌        | 8/50 [00:10<00:55,  1.33s/it, epoch=1/80, avg_epoch_loss=0.934]
 14%|█▍        | 7/50 [00:11<01:09,  1.62s/it, epoch=1/40, avg_epoch_loss=1.31]
 16%|█▌        | 8/50 [00:11<00:57,  1.38s/it, epoch=1/40, avg_epoch_loss=0.96]
 16%|█▌        | 8/50 [00:11<00:58,  1.40s/it, epoch=1/80, avg_epoch_loss=1.2]
 16%|█▌        | 8/50 [00:11<01:00,  1.43s/it, epoch=1/60, avg_epoch_loss=1.13]
 40%|████      | 20/50 [00:21<00:30,  1.00s/it

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,mean_WIS,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_fff9b_00000,2023-05-15_09-34-53,True,,91d24aaea9c545f991a7cda93ad03c23,"0_cardinality=None,cell_type=lstm,context_length=2,epochs=40,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,764.027,172.22.1.197,3098938,3408.15,3408.15,3408.15,1684136093,0,,1,fff9b_00000,0.00599909
objective_fff9b_00001,2023-05-15_09-33-48,True,,15ce2ad3393848b89f9bceda792d7e9b,"1_cardinality=None,cell_type=lstm,context_length=4,epochs=40,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,837.327,172.22.1.197,3099287,3338.7,3338.7,3338.7,1684136028,0,,1,fff9b_00001,0.00547719
objective_fff9b_00002,2023-05-15_09-38-32,True,,b5255df4cb614f4eb410a458a4130dac,"2_cardinality=None,cell_type=lstm,context_length=8,epochs=40,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,897.037,172.22.1.197,3099289,3622.85,3622.85,3622.85,1684136312,0,,1,fff9b_00002,0.00538468
objective_fff9b_00003,2023-05-15_09-45-53,True,,e204d0f1c46f42ffad0210873b18d5a8,"3_cardinality=None,cell_type=lstm,context_length=2,epochs=60,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,749.835,172.22.1.197,3099291,4064.2,4064.2,4064.2,1684136753,0,,1,fff9b_00003,0.00518775
objective_fff9b_00004,2023-05-15_09-49-40,True,,756e912dd1c940d5a3b7344564b12841,"4_cardinality=None,cell_type=lstm,context_length=4,epochs=60,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,814.162,172.22.1.197,3099293,4291.35,4291.35,4291.35,1684136980,0,,1,fff9b_00004,0.00561118
objective_fff9b_00005,2023-05-15_09-50-38,True,,1c4a6ebac3064bdd9a7878dafde7925c,"5_cardinality=None,cell_type=lstm,context_length=8,epochs=60,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,930.747,172.22.1.197,3099295,4348.37,4348.37,4348.37,1684137038,0,,1,fff9b_00005,0.00516748
objective_fff9b_00006,2023-05-15_09-59-00,True,,8726a2ec44d0403fa5c4259b868e2152,"6_cardinality=None,cell_type=lstm,context_length=2,epochs=80,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,759.274,172.22.1.197,3099297,4850.94,4850.94,4850.94,1684137540,0,,1,fff9b_00006,0.00594711
objective_fff9b_00007,2023-05-15_10-02-29,True,,6a2af16dfb3947f8825497b136194fc0,"7_cardinality=None,cell_type=lstm,context_length=4,epochs=80,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,834.357,172.22.1.197,3099299,5059.97,5059.97,5059.97,1684137749,0,,1,fff9b_00007,0.00570035


  0%|          | 0/50 [00:00<?, ?it/s]
 36%|███▌      | 18/50 [00:20<00:34,  1.09s/it, epoch=75/80, avg_epoch_loss=0.886]
 68%|██████▊   | 34/50 [00:32<00:15,  1.05it/s, epoch=78/80, avg_epoch_loss=0.869]
 12%|█▏        | 6/50 [00:10<01:18,  1.79s/it, epoch=1/80, avg_epoch_loss=0.995]
 94%|█████████▍| 47/50 [00:46<00:03,  1.03s/it, epoch=78/80, avg_epoch_loss=0.852]
 58%|█████▊    | 29/50 [00:36<00:26,  1.28s/it, epoch=75/80, avg_epoch_loss=0.848]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s, epoch=78/80, avg_epoch_loss=0.874]
  0%|          | 0/50 [00:00<?, ?it/s]
 12%|█▏        | 6/50 [00:23<01:18,  1.79s/it, epoch=1/80, avg_epoch_loss=1.04] 
 24%|██▍       | 12/50 [00:23<01:16,  2.02s/it, epoch=1/80, avg_epoch_loss=1.05]
 88%|████████▊ | 44/50 [00:47<00:06,  1.00s/it, epoch=75/80, avg_epoch_loss=0.884]
 32%|███▏      | 16/50 [00:10<00:22,  1.51it/s, epoch=79/80, avg_epoch_loss=0.966]
 50%|█████     | 25/50 [00:33<00:30,  1.22s/it, epoch=1/80, avg_epoch_loss=1.02]
Running evaluatio