In [None]:
!pip install torch==2.0.1 pytorch-lightning==2.0.2 pytorch_forecasting==1.0.0 torchaudio==2.0.2 torchdata==0.6.1 torchtext==0.15.2 torchvision==0.15.2 optuna==3.4

In [None]:
import numpy as np
from scipy.interpolate import CubicSpline
# imports for training
import lightning.pytorch as pl
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
# import dataset, network to train and metric to optimize
from pytorch_forecasting import Baseline, TimeSeriesDataSet, TemporalFusionTransformer, QuantileLoss
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from lightning.pytorch.tuner import Tuner
import pandas as pd
from pytorch_forecasting.data import GroupNormalizer
import pickle
import tensorflow as tf
import tensorboard as tb
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

def mean_weighted_quantile_loss(
    y_pred: np.ndarray, y_true: np.ndarray, quantiles: list
) -> float:
    y_true_rep = y_true[:, None].repeat(len(quantiles), axis=1)
    quantiles = np.array([float(q) for q in quantiles])
    # print(quantiles_repeated.shape, y_pred.shape, y_true.shape)
    quantile_losses = 2 * np.sum(
        np.abs(
            (y_pred - y_true_rep)
            * ((y_true_rep <= y_pred) - quantiles[:, None])
        ),
        axis=-1,
    )  # shape [num_time_series, num_quantiles]
    denom = np.sum(np.abs(y_true_rep))  # shape [1]
    weighted_losses = quantile_losses.sum(0) / denom  # shape [num_quantiles]
    return weighted_losses

def custom_sort_key(s):
    parts = s.split('_')
    return int(parts[1])

In [None]:
# create a function

def tft_eval(dataset_name,dataset_type,forecast_horizon, batch_size):
  pl.seed_everything(42)

  data = pd.read_csv(dataset_name+'.csv')
  data["time_idx"] = data.index.to_list()

  if dataset_type == "sim":
    d_t = 'time'
    data = data.rename(columns={data.columns[0]: 'time'})
    data_melted = pd.melt(data, id_vars=[d_t,'time_idx'])
    print(len(data_melted.index))
    print(len(data.index))
    print(len(data.columns))
    print(int((len(data.columns)-2)/2))
    print(int((len(data.columns)-2)-int((len(data.columns)-2)/2)))
    data_melted['tnc'] = ['0'] * int(len(data.index)) * int((len(data.columns)-2)/2) +\
     ['1'] * int(len(data.index)) * int((len(data.columns)-2)-int((len(data.columns)-2)/2))
  if dataset_type == "calls911":
    d_t = 'date'
    data_melted = pd.melt(data, id_vars=[d_t,'time_idx'])
    treated = ["ABINGTON",  "AMBLER",  "CHELTENHAM",  "COLLEGEVILLE",  "CONSHOHOCKEN",
                  "EAST GREENVILLE",  "EAST NORRITON",  "FRANCONIA" , "GREEN LANE", "HATFIELD TOWNSHIP",
                  "HORSHAM" , "JENKINTOWN",  "LANSDALE",  "LIMERICK",  "LOWER GWYNEDD",
                  "LOWER MERION",  "LOWER MORELAND",  "LOWER POTTSGROVE",  "LOWER PROVIDENCE",  "LOWER SALFORD",
                  "MARLBOROUGH",  "MONTGOMERY",  "NARBERTH",  "PENNSBURG",  "PERKIOMEN",
                  "PLYMOUTH",  "POTTSTOWN",  "RED HILL",  "ROCKLEDGE",  "ROYERSFORD",
                  "SCHWENKSVILLE",  "SKIPPACK",  "SOUDERTON",  "TELFORD",  "TOWAMENCIN",
                  "UPPER DUBLIN",  "UPPER FREDERICK",  "UPPER GWYNEDD",  "UPPER HANOVER",  "UPPER MERION",
                  "UPPER MORELAND",  "UPPER POTTSGROVE",  "UPPER PROVIDENCE",  "UPPER SALFORD",  "WEST CONSHOHOCKEN",
                  "WEST NORRITON",  "WEST POTTSGROVE",  "WHITEMARSH",  "WHITPAIN",  "WORCESTER"]
    control = ["BRIDGEPORT", "BRYN ATHYN", "DOUGLASS", "HATBORO", "HATFIELD BORO",
                      "LOWER FREDERICK", "NEW HANOVER", "NORRISTOWN", "NORTH WALES", "SALFORD",
                      "SPRINGFIELD", "TRAPPE"]
    # Create a binary mask indicating whether each column is treated (1) or control (0)
    data_melted['tnc'] = ['1' if col in treated else '0' for col in data_melted.variable]

  # define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
  max_encoder_length = 15
  max_prediction_length = forecast_horizon
  # training_cutoff = "2020-01-01"  # day for cutoff
  training_cutoff = data_melted["time_idx"].max() - max_prediction_length*2


  training = TimeSeriesDataSet(
      # data[lambda x: x.date <= training_cutoff],
      data_melted[lambda x: x.time_idx <= training_cutoff],
      group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
      target= "value",  # column name of target to predict
      time_idx= "time_idx",  # column name of time of observation
      max_encoder_length=max_encoder_length,  # how much history to use
      max_prediction_length=max_prediction_length,  # how far to predict into future
      static_categoricals=["tnc","variable"],
      time_varying_known_reals=["time_idx"],
      time_varying_unknown_reals=["value"],
      add_relative_time_idx=True,
      add_target_scales=True,
      add_encoder_length=True,
      target_normalizer=GroupNormalizer(
          groups=["tnc","variable"], transformation="relu"),
  )

  # create validation dataset using the same normalization techniques as for the training dataset
  validation = TimeSeriesDataSet.from_dataset(training, data_melted[lambda x: x.time_idx <=\
              data_melted["time_idx"].max() - max_prediction_length], stop_randomization=True)
  # convert datasets to dataloaders for training
  train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
  val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=2)


  # Early Stopping
  MIN_DELTA  = 1e-4
  PATIENCE = 10

  #PL Trainer
  MAX_EPOCHS = 15
  # GPUS = 1
  LIMIT_TRAIN_BATCHES = batch_size

  # OUTPUT_SIZE=3
  REDUCE_ON_PLATEAU_PATIENCE=5

  if dataset_name == 'sim_101_60_nl_he':
    best_params = {'gradient_clip_val': 0.4225449658914148,
                   'hidden_size': 38,
                   'dropout': 0.14244586345854338,
                   'hidden_continuous_size': 35,
                   'attention_head_size': 3,
                   'learning_rate': 0.032221938240178535}
  elif dataset_name == 'sim_101_222_nl_ho':
    best_params =  {'gradient_clip_val': 0.11974316071920273,
                   'hidden_size': 45,
                   'dropout': 0.15032399710839242,
                   'hidden_continuous_size': 32,
                   'attention_head_size': 4,
                   'learning_rate': 0.01918316565499633}    
  elif dataset_name == 'sim_500_222_l_he':
    best_params = {'gradient_clip_val': 0.749550431700117,
                   'hidden_size': 55,
                   'dropout': 0.3499898055165465,
                   'hidden_continuous_size': 52,
                   'attention_head_size': 4,
                   'learning_rate': 0.03637234644558992}
  else:
    # Load the study from the saved file
    with open("pytorch_lightning_optuna_"+dataset_name+".pkl", "rb") as fin:
        study = pickle.load(fin)
    # show best hyperparameters
    print(study.best_trial.params)
    best_params = study.best_trial.params

  #PL Trainer
  GRADIENT_CLIP_VAL=best_params['gradient_clip_val']
  #Fusion Transformer
  LR = best_params['learning_rate']
  HIDDEN_SIZE = best_params['hidden_size']
  DROPOUT = best_params['dropout']
  ATTENTION_HEAD_SIZE = best_params['attention_head_size']
  HIDDEN_CONTINUOUS_SIZE = best_params['hidden_continuous_size']


  early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=MIN_DELTA, patience=PATIENCE, verbose=False, mode="min")
  lr_logger = LearningRateMonitor()  # log the learning rate
  logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

  trainer = pl.Trainer(
      max_epochs=MAX_EPOCHS,
      accelerator="cpu",
      enable_model_summary=True,
      gradient_clip_val=GRADIENT_CLIP_VAL,
      limit_train_batches=LIMIT_TRAIN_BATCHES,  # coment in for training, running valiation every 30 batches
      # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
      callbacks=[lr_logger, early_stop_callback],
      logger=logger,
      # log_every_n_steps=10
  )

  tft = TemporalFusionTransformer.from_dataset(
      training,
      learning_rate=LR,
      hidden_size=HIDDEN_SIZE,
      attention_head_size=ATTENTION_HEAD_SIZE,
      dropout=DROPOUT,
      hidden_continuous_size=HIDDEN_CONTINUOUS_SIZE,
      # output_size=OUTPUT_SIZE,
      loss=QuantileLoss(),
      # log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
      optimizer="Ranger",
      reduce_on_plateau_patience=REDUCE_ON_PLATEAU_PATIENCE,
  )
  print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

  # fit network
  trainer.fit(
      tft,
      train_dataloaders=train_dataloader,
      val_dataloaders=val_dataloader,
  )

  # load the best model according to the validation loss
  # (given that we use early stopping, this is not necessarily the last epoch)
  best_model_path = trainer.checkpoint_callback.best_model_path
  print(best_model_path)
  best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

  # predict the treated and control
  predicted_result_1 = {}
  predicted_result_5 = {}
  predicted_result_9 = {}
  predicted_result = {}
  data = data.drop("time_idx", axis=1)

  for i in data.columns[1:]:
      raw_prediction_treated = best_tft.predict(
          data_melted[lambda x: (training_cutoff - max_encoder_length <\
                          x.time_idx)][data_melted.variable==i],
          mode="quantiles"
          # return_x=True,
          # return_y=True,
      )
      predicted_result_1[i]=raw_prediction_treated[0][:,1].tolist()
      predicted_result_5[i]=raw_prediction_treated[0][:,3].tolist()
      predicted_result_9[i]=raw_prediction_treated[0][:,5].tolist()
      predicted_result[i] = raw_prediction_treated[0].mean(axis=1).tolist()
  # best_tft.plot_prediction(raw_prediction_treated.index, raw_prediction_treated.output, idx=0)

  y_pred_1 = pd.DataFrame(predicted_result_1).T
  y_pred_1.to_csv("predicted/"+dataset_name+"_tft_0.1.csv", header=False, index=False)
  y_pred_5 = pd.DataFrame(predicted_result_5).T
  y_pred_5.to_csv("predicted/"+dataset_name+"_tft_0.5.csv", header=False, index=False)
  y_pred_9 = pd.DataFrame(predicted_result_9).T
  y_pred_9.to_csv("predicted/"+dataset_name+"_tft_0.9.csv", header=False, index=False)
  y_pred_mean = pd.DataFrame(predicted_result).T
  y_pred_mean.to_csv("predicted/"+dataset_name+"_tft_mean.csv", header=False, index=False)

  # # doesn't need to train again, just read in
  # y_pred_1 =pd.read_csv("predicted/"+dataset_name+"_tft_0.1.csv", header=None)
  # y_pred_5 =pd.read_csv("predicted/"+dataset_name+"_tft_0.5.csv", header=None)
  # y_pred_9 =pd.read_csv("predicted/"+dataset_name+"_tft_0.9.csv", header=None)

  # predicted treated units compared to actual counterfactual; predicted control units compared to actual control units
  length_of_series = len(data.index)
  no_of_series = len(data.columns[1:-1])
  if dataset_type == "calls911":
    y_pred_1['names'] = data.columns[1:-1]
    y_pred_1.set_index('names', inplace=True)
    y_pred_1_for_errors = y_pred_1.loc[control,:]
    y_pred_5['names'] = data.columns[1:-1]
    y_pred_5.set_index('names', inplace=True)
    y_pred_5_for_errors = y_pred_5.loc[control,:]
    y_pred_9['names'] = data.columns[1:-1]
    y_pred_9.set_index('names', inplace=True)
    y_pred_9_for_errors = y_pred_9.loc[control,:]

    data_row = data.loc[:,control]
    no_of_series_errors = len(control)
  if dataset_type == "sim":
    y_pred_1_for_errors = y_pred_1
    y_pred_5_for_errors = y_pred_5
    y_pred_9_for_errors = y_pred_9
    data_row = data.iloc[:,1:-1]
    no_of_series_errors = no_of_series
    # The columns of data_row need to be rename
    # Define the pattern to replace
    linear_to_l = r'_linear'
    nonlinear_to_nl = r'_nonlinear'
    heterogeneous_to_he = r'_heterogeneous'
    homogeneous_to_ho = r'_homogeneous'

    # Use regular expression to replace the part in column names
    data_row.columns = data_row.columns.str.replace(linear_to_l, '_l')
    data_row.columns = data_row.columns.str.replace(nonlinear_to_nl, '_nl')
    data_row.columns = data_row.columns.str.replace(heterogeneous_to_he, '_he')
    data_row.columns = data_row.columns.str.replace(homogeneous_to_ho, '_ho')

    data_true_counterfactual = pd.read_csv(dataset_name+'_true_counterfactual.csv')
    data_true_counterfactual['time'] = data_true_counterfactual['time']+length_of_series-forecast_horizon-1
    data_true_counterfactual = data_true_counterfactual.pivot(index='time', columns='series_id')['value']
    data_true_counterfactual = data_true_counterfactual.loc[:,sorted(data_true_counterfactual.columns, key=custom_sort_key)]
    # Replace values in data_row using the mapping
    data_row.loc[data_true_counterfactual.index, data_true_counterfactual.columns] = data_true_counterfactual

  data_row_A = data_row.iloc[length_of_series-forecast_horizon:, :].T
  data_row_B = data_row.iloc[:length_of_series-forecast_horizon, :].T

  errors_directory = 'errors/'

  errors_file_name_mean_median = 'mean_median_' + dataset_name + '_tft'
  SMAPE_file_name_all_errors = 'all_smape_errors_' + dataset_name + '_tft'
  MASE_file_name_all_errors = 'all_mase_errors_' + dataset_name + '_tft'

  errors_file_full_name_mean_median = errors_directory + errors_file_name_mean_median+'.txt'
  SMAPE_file_full_name_all_errors = errors_directory + SMAPE_file_name_all_errors
  MASE_file_full_name_all_errors = errors_directory + MASE_file_name_all_errors

  # SMAPE
  time_series_wise_SMAPE = 2 * np.abs(y_pred_5_for_errors - np.array(data_row_A)) /\
      (np.abs(y_pred_5_for_errors) + np.abs(np.array(data_row_A)))
  SMAPEPerSeries = np.mean(time_series_wise_SMAPE, axis=1)
  mean_SMAPE = np.mean(SMAPEPerSeries)
  mean_SMAPE_str = f"mean_SMAPE:{mean_SMAPE}"
  print(mean_SMAPE_str)
  np.savetxt(SMAPE_file_full_name_all_errors+'.txt', SMAPEPerSeries, delimiter=",", fmt='%f')

  mase_vector = []
  for i in range(no_of_series_errors):
      lagged_diff = [data_row_B.iloc[i,j] - \
                  data_row_B.iloc[i,j - forecast_horizon]\
                    for j in range(forecast_horizon,\
                      len(data_row_B.columns))]
      mase_vector.append(np.mean(np.abs(np.array(np.array(data_row_A.iloc[i]))\
                - np.array(y_pred_5_for_errors.iloc[i])) / np.mean(np.abs(lagged_diff))))

  mean_MASE = np.mean(mase_vector)
  mean_MASE_str = f"mean_MASE:{mean_MASE}"
  print(mean_MASE_str)

  np.savetxt(MASE_file_full_name_all_errors+'.txt', mase_vector, delimiter=",", fmt='%f')

  # Writing the SMAPE results to file
  with open(errors_file_full_name_mean_median, 'w') as f:
      # f.write('\n'.join([mean_SMAPE_str, median_SMAPE_str, std_SMAPE_str]))
      f.write('\n'.join([mean_SMAPE_str]))

  # Writing the MASE results to file
  with open(errors_file_full_name_mean_median, 'a') as f:
      # f.write('\n'.join([mean_MASE_str, median_MASE_str, std_MASE_str]))
      f.write('\n'.join([mean_MASE_str]))

  # CRPS
  quantiles = ['0.1', '0.5', '0.9']
  cs = CubicSpline(quantiles, [y_pred_1_for_errors,y_pred_5_for_errors,y_pred_9_for_errors], bc_type='natural')
  crps_y_pred = np.transpose(cs(quantiles), (1, 0, 2))

  # Calculating the CRPS
  crps_qs = mean_weighted_quantile_loss(crps_y_pred, np.array(data_row_A), quantiles)

  mean_CRPS = np.mean(crps_qs)

  mean_CRPS_str = f"mean_CRPS:{mean_CRPS}"
  all_CRPS_qs = f"CRPS for different quantiles:{crps_qs}"
  # std_CRPS_str = f"std_CRPS:{std_CRPS}"

  print(mean_CRPS_str)
  print(all_CRPS_qs)

  CRPS_file_name_cs = dataset_name + '_tft'+ '_cs'
  CRPS_file_cs = 'predicted/' + CRPS_file_name_cs


  # Writing the CRPS results to file
  with open(errors_file_full_name_mean_median, 'a') as f:
      # f.write('\n'.join([mean_CRPS_str, median_CRPS_str, std_CRPS_str]))
      f.write('\n'.join([mean_CRPS_str]))
  with open(CRPS_file_cs+'.pickle', 'wb') as f:
      pickle.dump(crps_y_pred, f)

In [None]:
#### before loop
dataset_name = 'calls911_benchmarks'
dataset_type = 'calls911'
forecast_horizon=7
tft_eval(dataset_name,dataset_type,forecast_horizon, 31)

In [None]:
# 'sim_10_60_l_he', 'sim_10_60_l_ho',\
#                      'sim_10_60_nl_he', 'sim_10_60_nl_ho',\
#                      'sim_10_222_l_he', 'sim_10_222_l_ho',\
#                      'sim_10_222_nl_he', 'sim_10_222_nl_ho',\
#                      'sim_101_60_l_he', 'sim_101_60_l_ho',\
#                      'sim_101_60_nl_he', 'sim_101_60_nl_ho',\
dataset_name_test = ['sim_101_222_l_he', 'sim_101_222_l_ho',\
                     'sim_101_222_nl_he', 'sim_101_222_nl_ho',\
                     'sim_500_60_l_he', 'sim_500_60_l_ho',\
                     'sim_500_60_nl_he', 'sim_500_60_nl_ho',\
                     'sim_500_222_l_he', 'sim_500_222_l_ho',\
                     'sim_500_222_nl_he', 'sim_500_222_nl_ho'] # 
# [31]*12 + 
batch_size_test = [124]*12
dataset_type = 'sim'
forecast_horizon=12
for i in range(len(dataset_name_test)):
  print(dataset_name_test[i], batch_size_test[i])
  tft_eval(dataset_name_test[i],dataset_type,forecast_horizon, batch_size_test[i])

In [None]:
pl.seed_everything(42)
# early_stop = EarlyStopping(monitor="val_acc", mode="max")
# checkpoint = ModelCheckpoint(monitor="val_loss")
# # early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
# # lr_logger = LearningRateMonitor()  # log the learning rate
# logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

# trainer = pl.Trainer(
#     # max_epochs=1000,
#     accelerator="cpu",
#     enable_model_summary=True,
#     # gradient_clip_val=0.1,
#     limit_train_batches=batch_size,  # coment in for training, running valiation every 30 batches
#     # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
#     callbacks=[early_stop, checkpoint],
#     logger=logger,
# )

# create study
study1 = optimize_hyperparameters(
    train_dataloader1,
    val_dataloader1,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(12, 32),
    hidden_continuous_size_range=(12, 32),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_10_60_l_he.pkl", "wb") as fout:
    pickle.dump(study1, fout)

# show best hyperparameters
print(study1.best_trial.params)

# save study results - also we can resume tuning at a later point in time
with open("/drive/MyDrive/pytorch_lightning_optuna_sim_10_60_l_he.pkl", "wb") as fout:
    pickle.dump(study1, fout)


In [None]:
data2 = pd.read_csv('sim_10_60_l_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study1 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(12, 32),
    hidden_continuous_size_range=(12, 32),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_10_60_l_he.pkl", "wb") as fout:
    pickle.dump(study1, fout)

# show best hyperparameters
print(study1.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_10_60_l_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study2 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(12, 32),
    hidden_continuous_size_range=(12, 32),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_10_60_l_ho.pkl", "wb") as fout:
    pickle.dump(study2, fout)

# show best hyperparameters
print(study2.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_10_60_nl_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study3 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(12, 32),
    hidden_continuous_size_range=(12, 32),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_10_60_nl_he.pkl", "wb") as fout:
    pickle.dump(study3, fout)

# show best hyperparameters
print(study3.best_trial.params)

data2 = pd.read_csv('sim_10_60_nl_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study4 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(12, 32),
    hidden_continuous_size_range=(12, 32),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_10_60_nl_ho.pkl", "wb") as fout:
    pickle.dump(study4, fout)

# show best hyperparameters
print(study4.best_trial.params)

In [None]:
from pytorch_forecasting.data import GroupNormalizer
import pickle
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

data2 = pd.read_csv('sim_10_222_l_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study5 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(30, 128),
    hidden_continuous_size_range=(30, 128),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_10_222_l_he.pkl", "wb") as fout:
    pickle.dump(study5, fout)

# show best hyperparameters
print(study5.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_10_222_l_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study6 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_10_222_l_ho.pkl", "wb") as fout:
    pickle.dump(study6, fout)

# show best hyperparameters
print(study6.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_10_222_nl_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study7 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_10_222_nl_he.pkl", "wb") as fout:
    pickle.dump(study7, fout)

# show best hyperparameters
print(study7.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_10_222_nl_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study8 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_10_222_nl_ho.pkl", "wb") as fout:
    pickle.dump(study8, fout)

# show best hyperparameters
print(study8.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_101_60_l_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study9 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(12, 64),
    hidden_continuous_size_range=(12, 64),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_101_60_l_he.pkl", "wb") as fout:
    pickle.dump(study9, fout)

# show best hyperparameters
print(study9.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_101_60_l_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study10 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(30, 40),
    hidden_continuous_size_range=(30, 40),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_101_60_l_ho.pkl", "wb") as fout:
    pickle.dump(study10, fout)

# show best hyperparameters
print(study10.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_101_60_nl_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study11 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.01, 0.5),
    hidden_size_range=(30, 40),
    hidden_continuous_size_range=(30, 40),
    attention_head_size_range=(1, 3),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_101_60_nl_he.pkl", "wb") as fout:
    pickle.dump(study11, fout)

# show best hyperparameters
print(study11.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_101_60_nl_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 31
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=2)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

pl.seed_everything(42)

# create study
study12 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=15,
    max_epochs=30,
    gradient_clip_val_range=(0.1, 0.7),
    hidden_size_range=(30, 40),
    hidden_continuous_size_range=(30, 40),
    attention_head_size_range=(2, 4),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.5),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_101_60_nl_ho.pkl", "wb") as fout:
    pickle.dump(study12, fout)

# show best hyperparameters
print(study12.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_101_222_l_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study13 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.1, 0.7),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(2, 4),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.5),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_101_222_l_he.pkl", "wb") as fout:
    pickle.dump(study13, fout)

# show best hyperparameters
print(study13.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_101_222_l_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study14 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.1, 0.7),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(2, 4),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.5),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_101_222_l_ho.pkl", "wb") as fout:
    pickle.dump(study14, fout)

# show best hyperparameters
print(study14.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_101_222_nl_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study15 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.1, 0.7),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(2, 4),
    learning_rate_range=(0.01, 0.5),
    dropout_range=(0.1, 0.5),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_101_222_nl_he.pkl", "wb") as fout:
    pickle.dump(study15, fout)

# show best hyperparameters
print(study15.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_101_222_nl_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study16 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.1, 0.7),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(2, 4),
    learning_rate_range=(0.01, 0.3),
    dropout_range=(0.1, 0.5),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_101_222_nl_ho.pkl", "wb") as fout:
    pickle.dump(study16, fout)

# show best hyperparameters
print(study16.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_500_222_l_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study17 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.1, 0.9),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(3, 5),
    learning_rate_range=(0.01, 0.3),
    dropout_range=(0.1, 0.9),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_500_222_l_he.pkl", "wb") as fout:
    pickle.dump(study17, fout)

# show best hyperparameters
print(study17.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_500_222_l_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study18 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.1, 0.9),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(3, 5),
    learning_rate_range=(0.01, 0.3),
    dropout_range=(0.1, 0.9),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)
# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_500_222_l_ho.pkl", "wb") as fout:
    pickle.dump(study18, fout)

# show best hyperparameters
print(study18.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_500_222_nl_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study19 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.1, 0.9),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(3, 5),
    learning_rate_range=(0.01, 0.3),
    dropout_range=(0.1, 0.9),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_500_222_nl_he.pkl", "wb") as fout:
    pickle.dump(study19, fout)

# show best hyperparameters
print(study19.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_500_222_nl_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study20 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.1, 0.9),
    hidden_size_range=(30, 64),
    hidden_continuous_size_range=(30, 64),
    attention_head_size_range=(3, 5),
    learning_rate_range=(0.01, 0.3),
    dropout_range=(0.1, 0.9),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_500_222_nl_ho.pkl", "wb") as fout:
    pickle.dump(study20, fo ut)

# show best hyperparameters
print(study20.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_500_60_l_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study21 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.2, 0.5),
    hidden_size_range=(30, 40),
    hidden_continuous_size_range=(30, 40),
    attention_head_size_range=(3, 4),
    learning_rate_range=(0.01, 0.3),
    dropout_range=(0.2, 0.6),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_500_60_l_he.pkl", "wb") as fout:
    pickle.dump(study21, fout)

# show best hyperparameters
print(study21.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_500_60_l_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study22 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.2, 0.5),
    hidden_size_range=(30, 40),
    hidden_continuous_size_range=(30, 40),
    attention_head_size_range=(3, 4),
    learning_rate_range=(0.01, 0.3),
    dropout_range=(0.2, 0.6),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_500_60_l_ho.pkl", "wb") as fout:
    pickle.dump(study22, fout)

# show best hyperparameters
print(study22.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_500_60_nl_he_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study23 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.2, 0.5),
    hidden_size_range=(30, 40),
    hidden_continuous_size_range=(30, 40),
    attention_head_size_range=(3, 4),
    learning_rate_range=(0.01, 0.3),
    dropout_range=(0.2, 0.6),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_500_60_nl_he.pkl", "wb") as fout:
    pickle.dump(study23, fout)

# show best hyperparameters
print(study23.best_trial.params)

In [None]:
data2 = pd.read_csv('sim_500_60_nl_ho_train.csv')
data2 = data2.sort_values(by=['series_id','time'], ignore_index=True)
data2["time_idx"] = data2.index.to_list()
data2 = data2[['time', 'time_idx', 'series_id', 'value', 'c_t']]
data2 = data2.rename(columns={'series_id': 'variable'})
data2['tnc'] = ['1' if i == 'treated' else '0' for i in data2['c_t']]
data2_processed = data2[['time', 'time_idx', 'variable', 'value','tnc']]
# define the dataset, i.e. add metadata to pandas dataframe for the model to understand it
max_encoder_length = 15
max_prediction_length = 12

training2 = TimeSeriesDataSet(
    # data[lambda x: x.date <= training_cutoff],
    data2_processed[lambda x: x.time_idx <= data2_processed["time_idx"].max() - max_prediction_length*2],
    group_ids=["tnc","variable"],  # column name(s) for timeseries IDs
    target= "value",  # column name of target to predict
    time_idx= "time_idx",  # column name of time of observation
    max_encoder_length=max_encoder_length,  # how much history to use
    max_prediction_length=max_prediction_length,  # how far to predict into future
    static_categoricals=["tnc","variable"],
    time_varying_unknown_reals=["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    target_normalizer=GroupNormalizer(
        groups=["tnc","variable"], transformation="relu"),
)


# create validation dataset using the same normalization techniques as for the training dataset
validation2 = TimeSeriesDataSet.from_dataset(training2, data2_processed[lambda x: x.time_idx <= \
              data2_processed["time_idx"].max() - max_prediction_length], stop_randomization=True)

# convert datasets to dataloaders for training
batch_size = 124
train_dataloader2 = training2.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader2 = validation2.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

pl.seed_everything(42)

# create study
study24 = optimize_hyperparameters(
    train_dataloader2,
    val_dataloader2,
    model_path="optuna_test",
    n_trials=8,
    max_epochs=15,
    gradient_clip_val_range=(0.2, 0.5),
    hidden_size_range=(30, 40),
    hidden_continuous_size_range=(30, 40),
    attention_head_size_range=(3, 4),
    learning_rate_range=(0.01, 0.3),
    dropout_range=(0.2, 0.6),
    trainer_kwargs=dict(limit_train_batches=batch_size),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("pytorch_lightning_optuna_sim_500_60_nl_ho.pkl", "wb") as fout:
    pickle.dump(study24, fout)

# show best hyperparameters
print(study24.best_trial.params)