In [1]:
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import model, preprocessing, split_forecasts_by_week, plot_coverage, print_forecasts_by_week, forecast_by_week, train_test_split, update_deepAR_parameters
from PythonFiles.Configuration import Configuration
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.mx import Trainer
from gluonts.evaluation import make_evaluation_predictions
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.rolling_dataset import generate_rolling_dataset,StepStrategy
from gluonts.evaluation import make_evaluation_predictions, Evaluator
import ray
from ray.air import session
from ray import tune
from gluonts.mx.distribution import NegativeBinomialOutput
config = Configuration()
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

config.train_start_time = datetime(1999,1,1,0,0,0)#datetime(2010,1,1,0,0,0)
config.train_end_time = datetime(2016,9,30,23,0,0)
config.test_end_time = datetime(2018,9,30,23,0,0)
overall_evaluation_df = pd.DataFrame()

data_splits_dict = {}
output_dict = {}

locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(config, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0].astype(int)
    
# Data split with no additional features and training start in 2010
#data_splits_dict["without_features_2001"] = list(train_test_split(config, df, False))
data_splits_dict["with_features_2001"] = list(train_test_split(config, df, True))

# Change the beginning of the training period
#config.train_start_time = datetime(2010,1,1,0,0,0)
#data_splits_dict["without_features_2010"] = list(train_test_split(config, df, False))
#data_splits_dict["with_features_2010"] = list(train_test_split(config, df, True))



In [2]:
def update_FNN_parameters(config, new_parameters):
    ''' 
    This function updates the DeepAR-configuration in the Configuration.py file. 
    Note that new_parameters must be a dict containing the exact keys used in config.parameters.
    '''
    parameters = config.fnnparameters.copy()
    for key in new_parameters.keys():
        if key in parameters.keys():
            parameters[key] = new_parameters[key]
        else:
            print(f"This key {key} isn't available in config.parameters! Thus the default config will maintain.")

    fnnestimator = SimpleFeedForwardEstimator(num_hidden_dimensions=parameters["num_hidden_dimensions"],
                                              prediction_length=parameters["prediction_length"],
                                              context_length=parameters["context_length"],
                                              distr_output=parameters["distr_output"],
                                              batch_size=parameters["batch_size"],
                                              batch_normalization=parameters["batch_normalization"],
                                              trainer=Trainer(epochs=parameters["epochs"],
                                                              num_batches_per_epoch=parameters["num_batches_per_epoch"],
                                                             ),
                                              )
    return fnnestimator

def evaluate(config, train, test, configuration):
    
    fnnestimator = update_FNN_parameters(configuration, config)
    
    predictor = fnnestimator.train(train)
    
    forecast_it, ts_it = make_evaluation_predictions(dataset=test, predictor=predictor,num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    
    # Evaluation with the quantiles of the configuration and calculation of the mean_WIS
    evaluator = Evaluator(quantiles=configuration.quantiles)    
    agg_metrics = evaluator(tss, forecasts)[0]
    mean_WIS = agg_metrics["mean_absolute_QuantileLoss"]/(config["prediction_length"]*411)
    return mean_WIS

def objectiveFNN(config, train, test, configuration):
    score = evaluate(config, train, test, configuration)
    session.report({"mean_WIS":score})

In [None]:
hp_search_space = {
    "num_hidden_dimensions": tune.grid_search([None]),
    "context_length":tune.grid_search([None]),
    "epochs":tune.grid_search([100]),
    "prediction_length" : tune.grid_search([4]), 
    "num_batches_per_epoch":tune.grid_search([50]),
    "batch_normalization":tune.grid_search([False]),
    "batch_size":tune.grid_search([32]),
    }

train = data_splits_dict["with_features_2001"][0]
test = data_splits_dict["with_features_2001"][1]
configuration = Configuration()

tuner = tune.Tuner(
    tune.with_parameters(objectiveFNN, train=train, test=test, configuration=configuration),
    tune_config=tune.TuneConfig(
        num_samples = 100,
        metric="mean_WIS",
        mode="min",
        max_concurrent_trials=5,
    ),
    param_space=hp_search_space,
)
results = tuner.fit()

print("Best hyperparameters found were: ", results.get_best_result().config)

results_df = results.get_dataframe()
print(results_df)
results_df.to_csv("default_FNN_results_12_05.csv")

0,1
Current time:,2023-05-12 19:28:01
Running for:,00:39:24.54
Memory:,134.5/236.0 GiB

Trial name,status,loc,batch_normalization,batch_size,context_length,epochs,num_batches_per_epoc h,num_hidden_dimension s,prediction_length
objectiveFNN_d711a_00000,RUNNING,172.22.1.197:745464,False,32,,100,50,,4
objectiveFNN_d711a_00001,RUNNING,172.22.1.197:745548,False,32,,100,50,,4
objectiveFNN_d711a_00002,RUNNING,172.22.1.197:745657,False,32,,100,50,,4
objectiveFNN_d711a_00003,RUNNING,172.22.1.197:745659,False,32,,100,50,,4
objectiveFNN_d711a_00004,RUNNING,172.22.1.197:745662,False,32,,100,50,,4


  0%|          | 0/50 [00:00<?, ?it/s] 
  0%|          | 0/50 [00:00<?, ?it/s] 
  0%|          | 0/50 [00:00<?, ?it/s] 
  0%|          | 0/50 [00:00<?, ?it/s] 
  0%|          | 0/50 [00:00<?, ?it/s] 
 16%|█▌        | 8/50 [00:11<00:58,  1.40s/it, epoch=1/100, avg_epoch_loss=1.31]
 16%|█▌        | 8/50 [00:11<01:00,  1.44s/it, epoch=1/100, avg_epoch_loss=1.57]
 14%|█▍        | 7/50 [00:10<01:03,  1.47s/it, epoch=1/100, avg_epoch_loss=1.61]
 16%|█▌        | 8/50 [00:11<00:58,  1.39s/it, epoch=1/100, avg_epoch_loss=1.9]
 16%|█▌        | 8/50 [00:11<00:58,  1.38s/it, epoch=1/100, avg_epoch_loss=1.71]
 36%|███▌      | 18/50 [00:21<00:37,  1.17s/it, epoch=1/100, avg_epoch_loss=1.38]
 38%|███▊      | 19/50 [00:22<00:34,  1.13s/it, epoch=1/100, avg_epoch_loss=1.69]
 28%|██▊       | 14/50 [00:20<00:53,  1.49s/it, epoch=1/100, avg_epoch_loss=1.58]
 34%|███▍      | 17/50 [00:21<00:40,  1.24s/it, epoch=1/100, avg_epoch_loss=1.67]
 38%|███▊      | 19/50 [00:21<00:33,  1.08s/it, epoch=1/100, avg_epo