In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import os
os.chdir('/home/reffert/DeepAR_InfluenzaForecast')
from PythonFiles.model import preprocessing, train_test_split
from PythonFiles.Configuration import Configuration
from PythonFiles.HpTuning import get_data, objectiveDeepAR
from ray.air import session
from ray import tune
configuration = Configuration()

In [4]:
configuration.train_start_time = datetime(1999,1,1,0,0,0)
configuration.train_end_time = datetime(2018,9,30,23,0,0)
configuration.test_end_time = datetime(2020,9,30,23,0,0)
# import the data
influenza_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/influenza.csv", sep=',')
population_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/PopulationVector.csv", sep=',')
neighbourhood_df = pd.read_csv("/home/reffert/DeepAR_InfluenzaForecast/Notebooks/DataProcessing/AdjacentMatrix.csv", sep=',', index_col=0)

data_splits_dict = {}
locations = list(influenza_df.location.unique())
#Process the df into a uniformly spaced df
df = influenza_df.loc[influenza_df.location.isin(locations), ['value', 'location', 'date','week']]
df = preprocessing(configuration, df, check_count=False, output_type="corrected_df")
for location in locations:
    df.loc[df.location == location, "population"] = int(population_df.loc[population_df.Location == location, "2011"].values[0])
    df.loc[df.location == location, locations] = neighbourhood_df.loc[neighbourhood_df.index==location,locations].values[0].astype(int)
data_splits_dict[f"with_features_2001"] = list(train_test_split(configuration, df, True))
train = data_splits_dict["with_features_2001"][0]
test = data_splits_dict["with_features_2001"][1]

In [None]:
hp_search_space = {
    "num_cells": tune.grid_search([140]),
    "num_layers": tune.grid_search([6]),
    "context_length":tune.grid_search([2]),
    "cell_type":tune.grid_search(["lstm"]),
    "epochs":tune.grid_search([200]),
    "use_feat_static_real":tune.grid_search([False]),
    "use_feat_dynamic_real":tune.grid_search([True]),
    "use_feat_static_cat":tune.grid_search([False]), 
    "cardinality":tune.sample_from(lambda spec:[2]*411 if spec.config.use_feat_static_cat else None),
}

tuner = tune.Tuner(
    tune.with_parameters(objectiveDeepAR, train=train, test=test, configuration=configuration),
    tune_config=tune.TuneConfig(
        num_samples = 100,
        metric="mean_WIS",
        mode="min",
        max_concurrent_trials=12,
    ),
    param_space=hp_search_space,
)
results = tuner.fit()

print("Best hyperparameters found were: ", results.get_best_result().config)

results_df = results.get_dataframe()
print(results_df)
results_df.to_csv("Tuned_Deepar_Hyperparameter_results_06_06.csv")

2023-06-06 17:50:37,276	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-06-06 18:11:50
Running for:,00:21:03.23
Memory:,141.0/236.0 GiB

Trial name,status,loc,cardinality,cell_type,context_length,epochs,num_cells,num_layers,use_feat_dynamic_rea l,use_feat_static_cat,use_feat_static_real
objectiveDeepAR_e65fc_00000,RUNNING,172.22.1.197:3313546,,lstm,2,200,140,6,True,False,False
objectiveDeepAR_e65fc_00001,RUNNING,172.22.1.197:3313949,,lstm,2,200,140,6,True,False,False
objectiveDeepAR_e65fc_00002,RUNNING,172.22.1.197:3314069,,lstm,2,200,140,6,True,False,False
objectiveDeepAR_e65fc_00003,RUNNING,172.22.1.197:3314186,,lstm,2,200,140,6,True,False,False
objectiveDeepAR_e65fc_00004,RUNNING,172.22.1.197:3314304,,lstm,2,200,140,6,True,False,False
objectiveDeepAR_e65fc_00005,RUNNING,172.22.1.197:3314427,,lstm,2,200,140,6,True,False,False
objectiveDeepAR_e65fc_00006,RUNNING,172.22.1.197:3314429,,lstm,2,200,140,6,True,False,False
objectiveDeepAR_e65fc_00007,RUNNING,172.22.1.197:3314431,,lstm,2,200,140,6,True,False,False
objectiveDeepAR_e65fc_00008,RUNNING,172.22.1.197:3314433,,lstm,2,200,140,6,True,False,False
objectiveDeepAR_e65fc_00009,RUNNING,172.22.1.197:3314435,,lstm,2,200,140,6,True,False,False


  0%|          | 0/50 [00:00<?, ?it/s][0m 
  0%|          | 0/50 [00:00<?, ?it/s][0m 
 56%|█████▌    | 28/50 [00:10<00:07,  2.79it/s, epoch=1/200, avg_epoch_loss=1.12]
  0%|          | 0/50 [00:00<?, ?it/s][0m 
100%|██████████| 50/50 [00:17<00:00,  2.83it/s, epoch=1/200, avg_epoch_loss=1.01]
  0%|          | 0/50 [00:00<?, ?it/s][0m 
 76%|███████▌  | 38/50 [00:10<00:03,  3.76it/s, epoch=1/200, avg_epoch_loss=0.952]
100%|██████████| 50/50 [00:12<00:00,  3.93it/s, epoch=1/200, avg_epoch_loss=0.95] 
  0%|          | 0/50 [00:00<?, ?it/s][0m 
  0%|          | 0/50 [00:00<?, ?it/s][0m 
 82%|████████▏ | 41/50 [00:10<00:02,  4.07it/s, epoch=1/200, avg_epoch_loss=1.03]
  0%|          | 0/50 [00:00<?, ?it/s][0m 
 82%|████████▏ | 41/50 [00:10<00:02,  3.96it/s, epoch=2/200, avg_epoch_loss=0.798]
100%|██████████| 50/50 [00:12<00:00,  4.06it/s, epoch=1/200, avg_epoch_loss=1.01]
  0%|          | 0/50 [00:00<?, ?it/s][0m 
100%|██████████| 50/50 [00:12<00:00,  3.99it/s, epoch=2/200, avg_epoch_