In [1]:
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import model, preprocessing, split_forecasts_by_week, plot_coverage, print_forecasts_by_week, forecast_by_week, train_test_split, update_deepAR_parameters
from PythonFiles.Configuration import Configuration
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from gluonts.mx import Trainer, DeepAREstimator
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.rolling_dataset import generate_rolling_dataset,StepStrategy
from gluonts.evaluation import make_evaluation_predictions, Evaluator
import ray
from ray.air import session
from ray import tune
from gluonts.mx.distribution import NegativeBinomialOutput
config = Configuration()
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

In [2]:
def evaluate(config, train, test, configuration):
    deeparestimator = update_deepAR_parameters(configuration, config)
    forecasts, tss = model(train, test, deeparestimator)
    # Evaluation with the quantiles of the configuration and calculation of the mean_WIS
    evaluator = Evaluator(quantiles=configuration.quantiles)    
    agg_metrics = evaluator(tss, forecasts)[0]
    mean_WIS = agg_metrics["mean_absolute_QuantileLoss"]/(configuration.parameters["prediction_length"]*411)
    return mean_WIS

def objective(config, train, test, configuration):
    score = evaluate(config, train, test, configuration)
    session.report({"mean_WIS":score})

In [4]:
config.train_start_time = datetime(1999,1,1,0,0,0)#datetime(2010,1,1,0,0,0)
config.train_end_time = datetime(2016,9,30,23,0,0)
config.test_end_time = datetime(2018,9,30,23,0,0)
overall_evaluation_df = pd.DataFrame()

data_splits_dict = {}
output_dict = {}

locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(config, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0].astype(int)

data_splits_dict["with_features_2001"] = list(train_test_split(config, df, True))




train = data_splits_dict["with_features_2001"][0]
test = data_splits_dict["with_features_2001"][1]
configuration = Configuration()

'\ntuner = tune.Tuner(\n    tune.with_parameters(objective, train=train, test=test, configuration=configuration),\n    tune_config=tune.TuneConfig(\n        num_samples = 5,\n        metric="mean_WIS",\n        mode="min",\n        max_concurrent_trials=18,\n    ),\n    param_space=hp_search_space,\n)\nresults = tuner.fit()\n\nprint("Best hyperparameters found were: ", results.get_best_result().config)\n\nresults_df = results.get_dataframe()\nprint(results_df)\nresults_df.to_csv("Hyperparameter_results_26_04.csv")'

In [8]:
from ray import tune, air
from ray.tune import ResultGrid
experiment_path = f"/home/reffert/ray_results/objective_2023-04-26_11-27-27"
print(f"Loading results from {experiment_path}...")
ray.init()
restored_tuner = tune.Tuner.restore(experiment_path, trainable=tune.with_parameters(objective, train=train, test=test, configuration=configuration))
result_grid = restored_tuner.get_results()

Loading results from /home/reffert/ray_results/objective_2023-04-26_11-27-27...


2023-05-09 10:47:00,123	INFO worker.py:1553 -- Started a local Ray instance.
2023-05-09 10:47:13,285	INFO experiment_analysis.py:789 -- No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of sync, as checkpointing is periodic.


In [14]:
results_df = result_grid.get_dataframe()
print(results_df)
results_df.to_csv("Restored_Hyperparameter_results_26_04.csv")

        mean_WIS  time_this_iter_s   done  timesteps_total  episodes_total  \
0     496.416393       1013.079154  False              NaN             NaN   
1     480.664316        945.227754  False              NaN             NaN   
2     530.478650       1166.700580  False              NaN             NaN   
3     521.395347       1116.161916  False              NaN             NaN   
4     488.141002       1363.722267  False              NaN             NaN   
...          ...               ...    ...              ...             ...   
3285  402.682369       2392.667870  False              NaN             NaN   
3286  611.189208       3448.244290  False              NaN             NaN   
3287  479.598384       3336.280444  False              NaN             NaN   
3288  571.236422       2320.850677  False              NaN             NaN   
3289  444.251225       2063.163950  False              NaN             NaN   

      training_iteration     trial_id                     exper