In [1]:
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import model, preprocessing, split_forecasts_by_week, plot_coverage, print_forecasts_by_week, forecast_by_week, train_test_split, update_deepAR_parameters
from PythonFiles.Configuration import Configuration
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from gluonts.mx import Trainer, DeepAREstimator
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.rolling_dataset import generate_rolling_dataset,StepStrategy
from gluonts.evaluation import make_evaluation_predictions, Evaluator
import ray
from ray.air import session
from ray import tune
from gluonts.mx.distribution import NegativeBinomialOutput
config = Configuration()
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

In [2]:
config.train_start_time = datetime(1999,1,1,0,0,0)#datetime(2010,1,1,0,0,0)
config.train_end_time = datetime(2016,9,30,23,0,0)
config.test_end_time = datetime(2018,9,30,23,0,0)
overall_evaluation_df = pd.DataFrame()

data_splits_dict = {}
output_dict = {}

locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(config, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0].astype(int)

In [3]:
# Data split with no additional features and training start in 2010
#data_splits_dict["without_features_2001"] = list(train_test_split(config, df, False))
data_splits_dict["with_features_2001"] = list(train_test_split(config, df, True))

# Change the beginning of the training period
#config.train_start_time = datetime(2010,1,1,0,0,0)
#data_splits_dict["without_features_2010"] = list(train_test_split(config, df, False))
#data_splits_dict["with_features_2010"] = list(train_test_split(config, df, True))

In [4]:
def evaluate(config, train, test, configuration):
    deeparestimator = update_deepAR_parameters(configuration, config)
    forecasts, tss = model(train, test, deeparestimator)
    # Evaluation with the quantiles of the configuration and calculation of the mean_WIS
    evaluator = Evaluator(quantiles=configuration.quantiles)    
    agg_metrics = evaluator(tss, forecasts)[0]
    mean_WIS = agg_metrics["mean_absolute_QuantileLoss"]/(configuration.parameters["prediction_length"]*411)
    return mean_WIS

def objective(config, train, test, configuration):
    score = evaluate(config, train, test, configuration)
    session.report({"mean_WIS":score})

In [None]:
hp_search_space = {
    "num_cells": tune.grid_search([60, 140]),
    "num_layers": tune.grid_search([8, 10]),
    "context_length":tune.grid_search([2, 4, 8]),
    "cell_type":tune.grid_search(["lstm"]),
    "epochs":tune.grid_search([40, 60, 80]),
    "use_feat_static_real":tune.grid_search([True, False]),
    "use_feat_dynamic_real":tune.grid_search([True]),
    "use_feat_static_cat":tune.grid_search([False, True]),
    "cardinality":tune.sample_from(lambda spec:[2]*411 if spec.config.use_feat_static_cat else None),
}


train = data_splits_dict["with_features_2001"][0]
test = data_splits_dict["with_features_2001"][1]
configuration = Configuration()

tuner = tune.Tuner(
    tune.with_parameters(objective, train=train, test=test, configuration=configuration),
    tune_config=tune.TuneConfig(
        num_samples = 5,
        metric="mean_WIS",
        mode="min",
        max_concurrent_trials=10,
    ),
    param_space=hp_search_space,
)
results = tuner.fit()

print("Best hyperparameters found were: ", results.get_best_result().config)

results_df = results.get_dataframe()
print(results_df)
results_df.to_csv("Hyperparameter_results_12_05.csv")

2023-05-12 17:47:56,831	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-05-12 19:28:58
Running for:,01:40:54.21
Memory:,132.6/236.0 GiB

Trial name,status,loc,cardinality,cell_type,context_length,epochs,num_cells,num_layers,use_feat_dynamic_rea l,use_feat_static_cat,use_feat_static_real,iter,total time (s),mean_WIS
objective_605d1_00010,RUNNING,172.22.1.197:700997,,lstm,4,40,140,8,True,False,True,,,
objective_605d1_00011,RUNNING,172.22.1.197:701089,,lstm,8,40,140,8,True,False,True,,,
objective_605d1_00012,RUNNING,172.22.1.197:701091,,lstm,2,60,140,8,True,False,True,,,
objective_605d1_00013,RUNNING,172.22.1.197:701093,,lstm,4,60,140,8,True,False,True,,,
objective_605d1_00014,RUNNING,172.22.1.197:701095,,lstm,8,60,140,8,True,False,True,,,
objective_605d1_00015,RUNNING,172.22.1.197:701097,,lstm,2,80,140,8,True,False,True,,,
objective_605d1_00016,RUNNING,172.22.1.197:701099,,lstm,4,80,140,8,True,False,True,,,
objective_605d1_00017,RUNNING,172.22.1.197:701101,,lstm,8,80,140,8,True,False,True,,,
objective_605d1_00018,RUNNING,172.22.1.197:701103,,lstm,2,40,60,10,True,False,True,,,
objective_605d1_00019,RUNNING,172.22.1.197:701105,,lstm,4,40,60,10,True,False,True,,,


  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 14%|█▍        | 7/50 [00:10<01:04,  1.49s/it, epoch=1/40, avg_epoch_loss=1.07]
 14%|█▍        | 7/50 [00:10<01:02,  1.45s/it, epoch=1/40, avg_epoch_loss=1.3]
 16%|█▌        | 8/50 [00:10<00:55,  1.32s/it, epoch=1/80, avg_epoch_loss=1.12]
 16%|█▌        | 8/50 [00:10<00:56,  1.35s/it, epoch=1/80, avg_epoch_loss=0.934]
 14%|█▍        | 7/50 [00:10<01:05,  1.52s/it, epoch=1/60, avg_epoch_loss=0.892]
 14%|█▍        | 7/50 [00:10<01:04,  1.50s/it, epoch=1/60, avg_epoch_loss=1.49]
 16%|█▌        | 8/50 [00:10<00:57,  1.36s/it, epoch=1/40, avg_epoch_loss=1.13]
 16%|█▌        | 8/50 [00:11<00:58,  1.40s/it, ep

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,mean_WIS,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_605d1_00000,2023-05-12_18-32-39,True,,b3369f2cf5b24ded81a66b3abbf0cfc3,"0_cardinality=None,cell_type=lstm,context_length=2,epochs=40,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,769.604,172.22.1.197,700997,2670.64,2670.64,2670.64,1683909159,0,,1,605d1_00000,0.00774622
objective_605d1_00001,2023-05-12_18-34-33,True,,ec7af87e31d94575aa71b1a4cb0679ed,"1_cardinality=None,cell_type=lstm,context_length=4,epochs=40,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,828.759,172.22.1.197,701089,2780.13,2780.13,2780.13,1683909273,0,,1,605d1_00001,0.00608468
objective_605d1_00002,2023-05-12_18-38-07,True,,ff365174a669481881d4a0c0da298ee0,"2_cardinality=None,cell_type=lstm,context_length=8,epochs=40,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,928.757,172.22.1.197,701091,2994.31,2994.31,2994.31,1683909487,0,,1,605d1_00002,0.00568056
objective_605d1_00003,2023-05-12_18-48-41,True,,25a490c479ec4e3981910f0e644eb289,"3_cardinality=None,cell_type=lstm,context_length=2,epochs=60,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,771.05,172.22.1.197,701093,3627.93,3627.93,3627.93,1683910121,0,,1,605d1_00003,0.00578403
objective_605d1_00004,2023-05-12_18-52-26,True,,1d232d3a70594f9481c0d18d0ed26b08,"4_cardinality=None,cell_type=lstm,context_length=4,epochs=60,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,819.315,172.22.1.197,701095,3852.66,3852.66,3852.66,1683910346,0,,1,605d1_00004,0.0056963
objective_605d1_00005,2023-05-12_18-56-03,True,,0a1c3fdbcad04b76bee8e13592a2be9b,"5_cardinality=None,cell_type=lstm,context_length=8,epochs=60,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,936.426,172.22.1.197,701097,4070.35,4070.35,4070.35,1683910563,0,,1,605d1_00005,0.0060699
objective_605d1_00006,2023-05-12_19-07-23,True,,91792d9d5bd445378c0853cd8198d812,"6_cardinality=None,cell_type=lstm,context_length=2,epochs=80,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,758.936,172.22.1.197,701099,4750.55,4750.55,4750.55,1683911243,0,,1,605d1_00006,0.00533891
objective_605d1_00007,2023-05-12_19-08-19,True,,9383c0d749dc498196c1129fe41ec0fd,"7_cardinality=None,cell_type=lstm,context_length=4,epochs=80,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,827.329,172.22.1.197,701101,4806.48,4806.48,4806.48,1683911299,0,,1,605d1_00007,0.00569701
objective_605d1_00008,2023-05-12_19-15-57,True,,9ed59f63a269469881583b98cc9e0483,"8_cardinality=None,cell_type=lstm,context_length=8,epochs=80,num_cells=60,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,973.406,172.22.1.197,701103,5264.32,5264.32,5264.32,1683911757,0,,1,605d1_00008,0.00684118
objective_605d1_00009,2023-05-12_19-22-06,True,,5c341d65f0b74388b4e63a08f944f21c,"9_cardinality=None,cell_type=lstm,context_length=2,epochs=40,num_cells=140,num_layers=8,use_feat_dynamic_real=True,use_feat_static_cat=False,use_feat_static_real=True",econ-stat-rr01,1,769.661,172.22.1.197,701105,5633.29,5633.29,5633.29,1683912126,0,,1,605d1_00009,0.0055964


 44%|████▍     | 22/50 [00:22<00:28,  1.01s/it, epoch=70/80, avg_epoch_loss=0.837]
  0%|          | 0/50 [00:00<?, ?it/s]
 80%|████████  | 40/50 [00:32<00:08,  1.22it/s, epoch=75/80, avg_epoch_loss=0.891]
 44%|████▍     | 22/50 [00:22<00:26,  1.06it/s, epoch=76/80, avg_epoch_loss=0.856]
 74%|███████▍  | 37/50 [00:32<00:10,  1.19it/s, epoch=70/80, avg_epoch_loss=0.867]
 14%|█▍        | 7/50 [00:10<01:04,  1.50s/it, epoch=1/40, avg_epoch_loss=0.948]
100%|██████████| 50/50 [00:39<00:00,  1.25it/s, epoch=75/80, avg_epoch_loss=0.861]
  0%|          | 0/50 [00:00<?, ?it/s]
 74%|███████▍  | 37/50 [00:32<00:10,  1.25it/s, epoch=76/80, avg_epoch_loss=0.886]
100%|██████████| 50/50 [00:41<00:00,  1.20it/s, epoch=70/80, avg_epoch_loss=0.85] 
  0%|          | 0/50 [00:00<?, ?it/s]
 14%|█▍        | 7/50 [00:20<01:04,  1.50s/it, epoch=1/40, avg_epoch_loss=0.92] 
 32%|███▏      | 16/50 [00:21<00:44,  1.30s/it, epoch=1/40, avg_epoch_loss=0.916]
 28%|██▊       | 14/50 [00:10<00:25,  1.39it/s, epoch=76/8