In [1]:
import argparse
import datetime
from datetime import date
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

## disable all warning messages
import warnings
warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # INFO and WARNING messages are not printed

## load local files
import data_formatters.base
import expt_settings.configs
import libs.hyperparam_opt
import libs.tft_model
import libs.utils as utils

## version compatibility
import tensorflow.compat.v1 as tf
tf.logging.set_verbosity(tf.logging.ERROR) # deprecation warning is not printed

ExperimentConfig = expt_settings.configs.ExperimentConfig # defines experiment configs and paths to outputs. experiment config detail is in data_formatter.
ModelClass = libs.tft_model.TemporalFusionTransformer # full TFT architecture with training, evaluation and prediction using Pandas Dataframe inputs
HyperparamOptManager = libs.hyperparam_opt.HyperparamOptManager # classes used for hyperparameter optimisation on a single machine/GPU
# DistributedHyperparamOptManager = libs.hyperparam_opt.DistributedHyperparamOptManager # for multi GPU --- many errors here, not using at this moment



2023-02-18 01:01:57.216967: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
### sample experiment

name='acq_10'
output_folder='output_2023_02'
use_tensorflow_with_gpu=True
gpu_num='3'

config = ExperimentConfig(experiment=name, root_folder=output_folder)
formatter = config.make_data_formatter()

expt_name=name
use_gpu=use_tensorflow_with_gpu
gpu_number=gpu_num
data_csv_path=config.data_csv_path
data_formatter=formatter
num_repeats=1
use_testing_mode=False

In [3]:
## check whether imported data formatter is a correct class instance.
if not isinstance(data_formatter, data_formatters.base.GenericDataFormatter):
  raise ValueError(
      "Data formatters should inherit from" +
      "AbstractDataFormatter! Type={}".format(type(data_formatter)))


## Tensorflow setup and
## specifies whether to run graph on gpu or cpu and which GPU ID to use for multi GPU machines.
print("\n\n*** Tensorflow setup ***")
if use_gpu:
      if gpu_number == 'all':
            default_keras_session = tf.keras.backend.get_session() # use all available GPUs
            tf_config = utils.get_default_tensorflow_config(tf_device="gpu")
      else:
            tf_config = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=int(gpu_number))
            tf_config.gpu_options.allow_growth = True # single GPU
            default_keras_session = tf.keras.backend.get_session() # use selected GPU
else:
  tf_config = utils.get_default_tensorflow_config(tf_device="cpu")

tf_config



*** Tensorflow setup ***
Selecting GPU ID=3


gpu_options {
  allow_growth: true
}

In [4]:
print("\n\n*** Training from defined parameters for experiment: {} ***".format(expt_name))
print("Loading & splitting data...") ## data_csv_path is in configs file
raw_data = pd.read_csv(data_csv_path, index_col=0) # first column of raw data is index column Unnamed:0
print(raw_data[raw_data['merchant_index']==1001]) # 2016-10-09 - 2020-02-23
raw_data



*** Training from defined parameters for experiment: acq_10 ***
Loading & splitting data...
       acq_week  N_week_cohort  initial_order  initial_order_per_cust  \
155  2016-10-09           63.0           63.0                     1.0   
156  2016-10-16           87.0           87.0                     1.0   
157  2016-10-23           68.0           68.0                     1.0   
158  2016-10-30           94.0           94.0                     1.0   
159  2016-11-06           61.0           61.0                     1.0   
..          ...            ...            ...                     ...   
327  2020-01-26           27.0           27.0                     1.0   
328  2020-02-02           26.0           26.0                     1.0   
329  2020-02-09           21.0           21.0                     1.0   
330  2020-02-16           29.0           29.0                     1.0   
331  2020-02-23           19.0           19.0                     1.0   

     initial_spend  initial_a

Unnamed: 0,acq_week,N_week_cohort,initial_order,initial_order_per_cust,initial_spend,initial_aov,orders,orders_per_cust,spend,aov,year,holidays,weekofyear,linear,linear_1,merchant_index,merchant,category,subcategory
0,2017-03-12,534.0,534.0,1.0,2788.40100,5.221725,931.0,1.743446,4827.36100,5.185135,0.0,0.0,10.0,0.000000,0.000000,1000,jam_city_inc,Home Entertainment,Gaming
1,2017-03-19,419.0,419.0,1.0,1742.82000,4.159475,635.0,1.515513,2590.12000,4.078929,0.0,0.0,11.0,0.006494,0.000042,1000,jam_city_inc,Home Entertainment,Gaming
2,2017-03-26,203.0,203.0,1.0,1044.47100,5.145177,320.0,1.576355,1505.30100,4.704066,0.0,0.0,12.0,0.012987,0.000169,1000,jam_city_inc,Home Entertainment,Gaming
3,2017-04-02,182.0,182.0,1.0,685.54000,3.766703,272.0,1.494505,1125.64000,4.138382,0.0,0.0,13.0,0.019481,0.000379,1000,jam_city_inc,Home Entertainment,Gaming
4,2017-04-09,118.0,118.0,1.0,457.82000,3.879831,156.0,1.322034,652.44000,4.182308,0.0,0.0,14.0,0.025974,0.000675,1000,jam_city_inc,Home Entertainment,Gaming
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1743,2020-01-26,367.0,367.0,1.0,25638.89000,69.860736,564.0,1.536785,48548.33235,86.078603,1.0,0.0,4.0,0.977273,0.955062,1009,gaylord_hotels,Travel & Transportation,Lodging & Accommodation
1744,2020-02-02,463.0,463.0,1.0,41277.07235,89.151344,827.0,1.786177,92661.78940,112.045695,1.0,0.0,5.0,0.982955,0.966200,1009,gaylord_hotels,Travel & Transportation,Lodging & Accommodation
1745,2020-02-09,547.0,547.0,1.0,30720.27000,56.161371,877.0,1.603291,71700.73470,81.756824,1.0,0.0,6.0,0.988636,0.977402,1009,gaylord_hotels,Travel & Transportation,Lodging & Accommodation
1746,2020-02-16,604.0,604.0,1.0,37580.26000,62.218974,948.0,1.569536,66627.84235,70.282534,1.0,0.0,7.0,0.994318,0.988669,1009,gaylord_hotels,Travel & Transportation,Lodging & Accommodation


In [5]:
print(" - Data is located in: ", data_csv_path)
train, valid, test = data_formatter.split_data(raw_data) # set validation start, test start, test end in data_formatter file
train_samples, valid_samples = data_formatter.get_num_samples_for_calibration() # if subsampling data

print(train_samples) # -1 for using all available samples
print(valid_samples)

 - Data is located in:  output_2023_02/../data/preprocessed_data/tft_google/company_0_10_acq_initaov.csv
Formatting train-valid-test splits
 - validation starts:  2018-11-18 / test starts:  2019-04-01 / test ends:  2020-02-23
Setting scalers with training data...
-1
-1


In [6]:
print(train[train['merchant_index']==1001]) # ~ 2018-11-11
train


       acq_week  N_week_cohort  initial_order  initial_order_per_cust  \
155  2016-10-09      -0.515361           63.0                     1.0   
156  2016-10-16      -0.466432           87.0                     1.0   
157  2016-10-23      -0.505168           68.0                     1.0   
158  2016-10-30      -0.452161           94.0                     1.0   
159  2016-11-06      -0.519439           61.0                     1.0   
..          ...            ...            ...                     ...   
260  2018-10-14      -0.545942           48.0                     1.0   
261  2018-10-21      -0.566329           38.0                     1.0   
262  2018-10-28      -0.576522           33.0                     1.0   
263  2018-11-04      -0.584677           29.0                     1.0   
264  2018-11-11      -0.574484           34.0                     1.0   

     initial_spend  initial_aov  orders  orders_per_cust    spend        aov  \
155        1195.57    18.977302    66.0    

Unnamed: 0,acq_week,N_week_cohort,initial_order,initial_order_per_cust,initial_spend,initial_aov,orders,orders_per_cust,spend,aov,year,holidays,weekofyear,linear,linear_1,merchant_index,merchant,category,subcategory
0,2017-03-12,0.444869,534.0,1.0,2788.40100,5.221725,931.0,1.743446,4827.36100,5.185135,-1.846077,0,1,-1.714125,-1.109625,1000,jam_city_inc,4,1
1,2017-03-19,0.210418,419.0,1.0,1742.82000,4.159475,635.0,1.515513,2590.12000,4.078929,-1.846077,0,2,-1.677920,-1.109256,1000,jam_city_inc,4,1
2,2017-03-26,-0.229942,203.0,1.0,1044.47100,5.145177,320.0,1.576355,1505.30100,4.704066,-1.846077,0,3,-1.641716,-1.108148,1000,jam_city_inc,4,1
3,2017-04-02,-0.272755,182.0,1.0,685.54000,3.766703,272.0,1.494505,1125.64000,4.138382,-1.846077,0,4,-1.605511,-1.106301,1000,jam_city_inc,4,1
4,2017-04-09,-0.403232,118.0,1.0,457.82000,3.879831,156.0,1.322034,652.44000,4.182308,-1.846077,0,5,-1.569307,-1.103716,1000,jam_city_inc,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1676,2018-10-14,0.257308,442.0,1.0,46717.51235,105.695729,640.0,1.447964,82232.02705,128.487542,1.108743,0,35,1.612161,2.007793,1009,gaylord_hotels,7,3
1677,2018-10-21,0.840378,728.0,1.0,110975.37410,152.438701,1315.0,1.806319,183242.28350,139.347744,1.108743,0,36,1.643840,2.067455,1009,gaylord_hotels,7,3
1678,2018-10-28,0.520301,571.0,1.0,73077.58705,127.981764,878.0,1.537653,147686.66880,168.208051,1.108743,0,37,1.675519,2.127683,1009,gaylord_hotels,7,3
1679,2018-11-04,0.457101,540.0,1.0,62131.83470,115.058953,807.0,1.494444,111407.18175,138.051031,1.108743,0,38,1.707198,2.188476,1009,gaylord_hotels,7,3


In [7]:
valid

Unnamed: 0,acq_week,N_week_cohort,initial_order,initial_order_per_cust,initial_spend,initial_aov,orders,orders_per_cust,spend,aov,year,holidays,weekofyear,linear,linear_1,merchant_index,merchant,category,subcategory
88,2018-11-18,-0.344110,147.0,1.0,764.06000,5.197687,229.0,1.557823,1385.20000,6.048908,0.123803,0,40,1.471869,1.750373,1000,jam_city_inc,4,1
89,2018-11-25,-0.399155,120.0,1.0,683.23000,5.693583,199.0,1.658333,1285.06100,6.457593,0.123803,0,41,1.508073,1.815742,1000,jam_city_inc,4,1
90,2018-12-02,-0.444006,98.0,1.0,432.95000,4.417857,178.0,1.816327,915.24000,5.141798,0.123803,0,42,1.544278,1.881850,1000,jam_city_inc,4,1
91,2018-12-09,-0.505168,68.0,1.0,354.51000,5.213382,118.0,1.735294,611.42000,5.181525,0.123803,0,43,1.580482,1.948696,1000,jam_city_inc,4,1
92,2018-12-16,-0.454200,93.0,1.0,401.02000,4.312043,152.0,1.634409,801.69000,5.274276,0.123803,0,45,1.616687,2.016282,1000,jam_city_inc,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1696,2019-03-03,0.424482,524.0,1.0,54141.86470,103.324169,849.0,1.620229,82822.15705,97.552600,2.586152,0,51,2.245740,3.308484,1009,gaylord_hotels,7,3
1697,2019-03-10,0.414288,519.0,1.0,78430.42940,151.118361,820.0,1.579961,124055.87410,151.287651,2.586152,0,1,2.277418,3.379457,1009,gaylord_hotels,7,3
1698,2019-03-17,0.473411,548.0,1.0,81608.28940,148.920236,917.0,1.673358,137135.46645,149.547946,2.586152,0,2,2.309097,3.450995,1009,gaylord_hotels,7,3
1699,2019-03-24,0.815913,716.0,1.0,64827.59235,90.541330,1224.0,1.709497,91828.03235,75.022902,2.586152,0,3,2.340776,3.523098,1009,gaylord_hotels,7,3


In [8]:
test

Unnamed: 0,acq_week,N_week_cohort,initial_order,initial_order_per_cust,initial_spend,initial_aov,orders,orders_per_cust,spend,aov,year,holidays,weekofyear,linear,linear_1,merchant_index,merchant,category,subcategory
108,2019-04-07,-0.411387,114.0,1.0,707.72100,6.208079,164.0,1.438596,1095.67100,6.680921,2.093682,0,5,2.195958,3.198099,1000,jam_city_inc,4,1
109,2019-04-14,-0.429736,105.0,1.0,586.17000,5.582571,151.0,1.438095,842.32000,5.578278,2.093682,0,6,2.232163,3.278241,1000,jam_city_inc,4,1
110,2019-04-21,-0.476626,82.0,1.0,396.30000,4.832927,130.0,1.585366,810.75000,6.236538,2.093682,0,7,2.268367,3.359121,1000,jam_city_inc,4,1
111,2019-04-28,-0.413426,113.0,1.0,560.63000,4.961327,143.0,1.265487,751.37000,5.254336,2.093682,0,8,2.304572,3.440741,1000,jam_city_inc,4,1
112,2019-05-05,-0.431774,104.0,1.0,561.25000,5.396635,140.0,1.346154,829.58000,5.925571,2.093682,0,9,2.340776,3.523098,1000,jam_city_inc,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1743,2020-01-26,0.104405,367.0,1.0,25638.89000,69.860736,564.0,1.536785,48548.33235,86.078603,4.063562,0,33,3.734649,7.255517,1009,gaylord_hotels,7,3
1744,2020-02-02,0.300121,463.0,1.0,41277.07235,89.151344,827.0,1.786177,92661.78940,112.045695,4.063562,0,44,3.766328,7.353069,1009,gaylord_hotels,7,3
1745,2020-02-09,0.471372,547.0,1.0,30720.27000,56.161371,877.0,1.603291,71700.73470,81.756824,4.063562,0,48,3.798006,7.451187,1009,gaylord_hotels,7,3
1746,2020-02-16,0.587578,604.0,1.0,37580.26000,62.218974,948.0,1.569536,66627.84235,70.282534,4.063562,0,49,3.829685,7.549869,1009,gaylord_hotels,7,3


In [9]:
## Sets up default params
# data specific data formatter should define these 5 fixed_params: total time steps of TFT, num LSTM encoder steps, max num epochs, early stopping patience, CPU multiprocessing workers
fixed_params = data_formatter.get_experiment_params() # Returns fixed model parameters for experiments.
# data specific data formatter can flexibly have model_params:
params = data_formatter.get_default_model_params() # Returns default optimised model parameters.

print(fixed_params)
print(params)

{'total_time_steps': 15, 'num_encoder_steps': 12, 'num_epochs': 10, 'early_stopping_patience': 15, 'multiprocessing_workers': 5, 'column_definition': [('merchant_index', <DataTypes.CATEGORICAL: 1>, <InputTypes.ID: 4>), ('acq_week', <DataTypes.DATE: 2>, <InputTypes.TIME: 5>), ('N_week_cohort', <DataTypes.REAL_VALUED: 0>, <InputTypes.TARGET: 0>), ('year', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('linear', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('linear_1', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('holidays', <DataTypes.CATEGORICAL: 1>, <InputTypes.KNOWN_INPUT: 2>), ('weekofyear', <DataTypes.CATEGORICAL: 1>, <InputTypes.KNOWN_INPUT: 2>), ('category', <DataTypes.CATEGORICAL: 1>, <InputTypes.STATIC_INPUT: 3>), ('subcategory', <DataTypes.CATEGORICAL: 1>, <InputTypes.STATIC_INPUT: 3>)], 'input_size': 8, 'output_size': 1, 'category_counts': [2, 52, 8, 9], 'input_obs_loc': [0], 'static_input_loc': [6, 7], 'known_regular_inputs': [1, 2, 3], 

In [10]:
## Folder path where models are serialized
model_folder = os.path.join(config.model_folder, datetime.datetime.now().strftime("%Y-%m-%d_%H_%M"))
params["model_folder"] = model_folder
print("Model will be saved in: ", model_folder)

## Parameter overrides for testing only! Small sizes used to speed up script.
if use_testing_mode:
  fixed_params["num_epochs"] = 1
  params["hidden_layer_size"] = 5
  train_samples, valid_samples = 100, 10

## Sets up hyperparam manager
print("\n*** Loading hyperparm manager ***")

# opt_manager = DistributedHyperparamOptManager({k: [params[k]] for k in params}, fixed_params, model_folder) ## Error- nont using distributed optimization
opt_manager = HyperparamOptManager({k: [params[k]] for k in params}, fixed_params, model_folder)
opt_manager

Model will be saved in:  output_2023_02/saved_models/acq_10/2023-02-18_01_02

*** Loading hyperparm manager ***


<libs.hyperparam_opt.HyperparamOptManager at 0x7fe2485765b0>

In [11]:
print(opt_manager.hyperparam_folder)
print(opt_manager.param_ranges)

print(opt_manager.results)
print(opt_manager.saved_params)


output_2023_02/saved_models/acq_10/2023-02-18_01_02
{'dropout_rate': [0.2], 'hidden_layer_size': [160], 'learning_rate': [0.001], 'minibatch_size': [256], 'max_gradient_norm': [1.0], 'num_heads': [4], 'stack_size': [1], 'model_folder': ['output_2023_02/saved_models/acq_10/2023-02-18_01_02']}
Empty DataFrame
Columns: []
Index: []
Empty DataFrame
Columns: []
Index: []


In [12]:
params=opt_manager.get_next_parameters()
params

{'dropout_rate': 0.2,
 'hidden_layer_size': 160,
 'learning_rate': 0.001,
 'max_gradient_norm': 1.0,
 'minibatch_size': 256,
 'model_folder': 'output_2023_02/saved_models/acq_10/2023-02-18_01_02',
 'num_heads': 4,
 'stack_size': 1,
 'total_time_steps': 15,
 'num_encoder_steps': 12,
 'num_epochs': 10,
 'early_stopping_patience': 15,
 'multiprocessing_workers': 5,
 'column_definition': [('merchant_index',
   <DataTypes.CATEGORICAL: 1>,
   <InputTypes.ID: 4>),
  ('acq_week', <DataTypes.DATE: 2>, <InputTypes.TIME: 5>),
  ('N_week_cohort', <DataTypes.REAL_VALUED: 0>, <InputTypes.TARGET: 0>),
  ('year', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>),
  ('linear', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>),
  ('linear_1', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>),
  ('holidays', <DataTypes.CATEGORICAL: 1>, <InputTypes.KNOWN_INPUT: 2>),
  ('weekofyear', <DataTypes.CATEGORICAL: 1>, <InputTypes.KNOWN_INPUT: 2>),
  ('category', <DataTypes.CATEGORICAL: 1>, <I

In [13]:
tf.reset_default_graph()
with tf.Graph().as_default(), tf.Session(config=tf_config) as sess:
    # set session on keras
    tf.keras.backend.set_session(sess)

    # set initial parameter, model, training data
    params = opt_manager.get_next_parameters() # get initialized parameters from random search in new iteration
    model = ModelClass(params, use_cudnn=use_gpu)
model



 Serialization. Resetting temporary folder for Keras training outputs...


*** TemporalFusionTransformer params in initiating process***
# dropout_rate = 0.2
# hidden_layer_size = 160
# learning_rate = 0.001
# max_gradient_norm = 1.0
# minibatch_size = 256
# model_folder = output_2023_02/saved_models/acq_10/2023-02-18_01_02
# num_heads = 4
# stack_size = 1
# total_time_steps = 15
# num_encoder_steps = 12
# num_epochs = 10
# early_stopping_patience = 15
# multiprocessing_workers = 5
# column_definition = [('merchant_index', <DataTypes.CATEGORICAL: 1>, <InputTypes.ID: 4>), ('acq_week', <DataTypes.DATE: 2>, <InputTypes.TIME: 5>), ('N_week_cohort', <DataTypes.REAL_VALUED: 0>, <InputTypes.TARGET: 0>), ('year', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('linear', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('linear_1', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('holidays', <DataTypes.CATEGORICAL: 1>, <InputTypes.KNOWN_INPUT: 2>), ('weekofyear',

<libs.tft_model.TemporalFusionTransformer at 0x7fe24932d0d0>

In [14]:
model.training_data_cached()

False

In [15]:
model._batch_data
model._batch_sampled_data

<bound method TemporalFusionTransformer._batch_sampled_data of <libs.tft_model.TemporalFusionTransformer object at 0x7fe24932d0d0>>

In [16]:
model.cache_batched_data(train, "train", num_samples=-1)
model.cache_batched_data(valid, "valid", num_samples=-1)

Cached data "train" updated
Cached data "valid" updated


In [17]:
train_data, valid_data, data, labels, active_flags, val_data, val_labels, val_flags = model.get_data_to_fit()


Getting batched_data
Using cached training data
Using cached validation data
Using keras standard fit


In [18]:
train_data

{'identifier': array([[[1000],
         [1000],
         [1000],
         ...,
         [1000],
         [1000],
         [1000]],
 
        [[1000],
         [1000],
         [1000],
         ...,
         [1000],
         [1000],
         [1000]],
 
        [[1000],
         [1000],
         [1000],
         ...,
         [1000],
         [1000],
         [1000]],
 
        ...,
 
        [[1009],
         [1009],
         [1009],
         ...,
         [1009],
         [1009],
         [1009]],
 
        [[1009],
         [1009],
         [1009],
         ...,
         [1009],
         [1009],
         [1009]],
 
        [[1009],
         [1009],
         [1009],
         ...,
         [1009],
         [1009],
         [1009]]]),
 'time': array([[['2017-03-12'],
         ['2017-03-19'],
         ['2017-03-26'],
         ...,
         ['2017-06-04'],
         ['2017-06-11'],
         ['2017-06-18']],
 
        [['2017-03-19'],
         ['2017-03-26'],
         ['2017-04-02'],
       

In [None]:
#====================================================
print("\n\n*** Running calibration ***")
## For each iteration, we start with different initialization to find best parameters set
## that provides smallest local minimum of validation loss.
num_repeats = num_repeats # Training -- one iteration only
best_loss = np.Inf


tf.reset_default_graph()
with tf.Graph().as_default(), tf.Session(config=tf_config) as sess:
    # set session on keras
    tf.keras.backend.set_session(sess)

    # set initial parameter, model, training data
    params = opt_manager.get_next_parameters() # get initialized parameters from random search in new iteration
    model = ModelClass(params, use_cudnn=use_gpu)
    if not model.training_data_cached():
        # Data to batch and cache & Maximum number of samples to extract (-1 to use all data)
        # model.cache_batched_data(train, "train", num_samples=train_samples)
        # model.cache_batched_data(valid, "valid", num_samples=valid_samples)
        model.cache_batched_data(train, "train", num_samples=-1)
        model.cache_batched_data(valid, "valid", num_samples=-1)

    # run session with initialization
    sess.run(tf.global_variables_initializer())

    model.fit()

    val_loss = model.evaluate()

    if val_loss < best_loss:
        opt_manager.update_score(params, val_loss, model)
        best_loss = val_loss

    tf.keras.backend.set_session(default_keras_session)

    print('\n* Iteration ' + str(_) + ' is done.')


In [None]:



#====================================================
print("\n\n*** Running tests ***")

tf.reset_default_graph()
with tf.Graph().as_default(), tf.Session(config=tf_config) as sess:
  # set session on keras
  tf.keras.backend.set_session(sess)
  
  best_params = opt_manager.get_best_params()
  model = ModelClass(best_params, use_cudnn=use_gpu)

  model.load(opt_manager.hyperparam_folder)

  print("\nComputing best validation loss")
  val_loss = model.evaluate(valid)


  print("\nSaving results")
  ## prepare valid_fortest data
  test_start = model.predict(test, return_targets=True)["p50"]['forecast_time'].iloc[0]
  cal_end = model.predict(pd.concat([train,valid]), return_targets=True)["p50"]['forecast_time'].iloc[-1]
  gap = int(str((pd.to_datetime(test_start) - pd.to_datetime(cal_end))/7).split()[0])-1
  
  if expt_name in ['censored_spend_10', 'censored_spend_100', 'censored_spend_1000']:
    valid_fortest = valid[valid['week'] > str(pd.to_datetime(valid['week'].iloc[-1]) - datetime.timedelta(weeks=gap)).split()[0]]
  else: 
    valid_fortest = valid[valid['acq_week'] > str(pd.to_datetime(valid['acq_week'].iloc[-1]) - datetime.timedelta(weeks=gap)).split()[0]]


  print("\nComputing test loss")
  output_map = model.predict(pd.concat([valid_fortest,test]), return_targets=True)
  targets = data_formatter.format_predictions(output_map["targets"])
  p10_forecast = data_formatter.format_predictions(output_map["p10"])
  p50_forecast = data_formatter.format_predictions(output_map["p50"])
  p90_forecast = data_formatter.format_predictions(output_map["p90"])

  output_withtrain_map = model.predict(pd.concat([train,valid]), return_targets=True)
  targets_cal = data_formatter.format_predictions(output_withtrain_map["targets"])
  p10_forecast_cal = data_formatter.format_predictions(output_withtrain_map["p10"])
  p50_forecast_cal = data_formatter.format_predictions(output_withtrain_map["p50"])
  p90_forecast_cal = data_formatter.format_predictions(output_withtrain_map["p90"])

  ### save files
  if not os.path.exists(f'{config.results_folder}/{version_name}'): os.makedirs(f'{config.results_folder}/{version_name}')
  targets.to_csv(f'{config.results_folder}/{version_name}/target.csv')
  targets_cal.to_csv(f'{config.results_folder}/{version_name}/target_cal.csv')
  p90_forecast.to_csv(f'{config.results_folder}/{version_name}/pred_q90.csv')
  p90_forecast_cal.to_csv(f'{config.results_folder}/{version_name}/pred_q90_cal.csv')
  p50_forecast.to_csv(f'{config.results_folder}/{version_name}/pred_q50.csv')
  p50_forecast_cal.to_csv(f'{config.results_folder}/{version_name}/pred_q50_cal.csv') 
  p10_forecast.to_csv(f'{config.results_folder}/{version_name}/pred_q10.csv')
  p10_forecast_cal.to_csv(f'{config.results_folder}/{version_name}/pred_q10_cal.csv')


  

  def extract_numerical_data(data):
    """Strips out forecast time and identifier columns."""
    return data[[
        col for col in data.columns
        if col not in {"forecast_time", "identifier"}
    ]]

  p10_loss = utils.numpy_normalised_quantile_loss(
      extract_numerical_data(targets), extract_numerical_data(p10_forecast),
      0.1)
  p50_loss = utils.numpy_normalised_quantile_loss(
      extract_numerical_data(targets), extract_numerical_data(p50_forecast),
      0.5)
  p90_loss = utils.numpy_normalised_quantile_loss(
      extract_numerical_data(targets), extract_numerical_data(p90_forecast),
      0.9)

  tf.keras.backend.set_session(default_keras_session)



print("Training completed @ {}".format(datetime.datetime.now()))
print("Best validation loss = {x:.5f}".format(x=val_loss))
print("Params:")

for k in best_params:
  print(k, " = ", best_params[k])
print()
print("Normalised Quantile Loss for Test Data: P50={x1:.5f}, P90={x2:.5f}, P10={x3:.5f}".format(
    x1=p50_loss.mean(), x2=p90_loss.mean(), x3=p10_loss.mean()))






