In [15]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability.python.distributions import kullback_leibler
from sklearn.preprocessing import MinMaxScaler
from scipy import interpolate
from scipy.stats import norm
import numpy as np
import pandas as pd
import datetime as dt
import tqdm

from lib.Metrics import Metrics
from lib.IRNN_Bayes import IRNN_Bayes
from lib.IRNN import IRNN
from lib.train_functions import fit
from lib.regional_data_builder import DataConstructor
from lib.utils import *

tfd = tfp.distributions


In [16]:

# Class to evaluate a set of hyper parameters. 
# Using a class allows the __call__ function to be used for different models with different configurations
# class is used wih bayesian optimization to find the best hyper parameters
class Eval_Fn:
    def __init__(self, root='', model=None, country='US', n_folds = 5, season = 2017, gamma=28, plot=True, verbose=True, n_queries=49, min_score=-25, **kwargs):
        self.model = model  
        self.n_folds = n_folds      # number of folda (k fold cross validation)
        self.season = season
        self.gamma = gamma      
        self.plot = plot            # save plots validation set forecasts  
        self.verbose = verbose      # print during training
        self.min_score = min_score  # score to give hyper paremeters if they break and get -infinity
        self.root = root            # save directory
        self.country = country
        # get data for training, text data unused.
        
        self._data = DataConstructor(test_season = season, country=country, full_year=False, gamma = 28, window_size = 54, teacher_forcing=True, n_queries = 99)
        self.x_train, self.y_train, self.x_test, self.y_test = self._data()

#         self.x_train, self.y_train, self.x_test, self.y_test = self._data(self.model.model_type, self.model.forecast_type, self.model.query_forecast)
        self.x_train = tf.cast(self.x_train, tf.float32)
        self.y_train = tf.cast(self.y_train, tf.float32)
        self.x_test = tf.cast(self.x_test, tf.float32)
        self.y_test = tf.cast(self.y_test, tf.float32)

    def __call__(self, batch_size = 32, **kwargs):
        tf.keras.backend.clear_session()
        score = {}
        plt.clf()

        for fold in range(self.n_folds):
            try:
                if 'n_op' in kwargs:
                    kwargs['n_op'] = int(kwargs['n_op'])
                # split data into train and validation folds
                if isinstance(self.x_train, list):
                    x_val = [d[-(365*(fold+1)): -(365*(fold)+1)] for d in self.x_train]
                    x_tr = [d[:-(365*(fold+1))] for d in self.x_train]

                else:
                    x_val = self.x_train[-(365*(fold+1)): -(365*(fold)+1)]
                    x_tr = self.x_train[:-(365*(fold+1))]

                y_val = self.y_train[-(365*(fold+1)): -(365*(fold)+1)]
                y_tr = self.y_train[:-(365*(fold+1))]

                val_dates = self._data.train_dates[-365*(fold+1): -(365*fold)-1]
                train_dates = self._data.train_dates[:-365*(fold+1)]

                x_val = x_val[:,:,-kwargs['n_op']:]
                y_val = y_val[:,:,-kwargs['n_op']:].numpy()

                train_dataset = tf.data.Dataset.from_tensor_slices((x_tr[:,:,-kwargs['n_op']:], y_tr[:,:,-kwargs['n_op']:]))
                train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)


                _model = self.model(**kwargs)

                # define loss, epochs learning rate
                if _model.loss == 'NLL':
                    def loss(y, p_y):
                        return -p_y.log_prob(y)
                if _model.loss == 'MSE':
                    loss = tf.keras.losses.mean_squared_error

                if 'epochs' in kwargs:
                    epochs = int(kwargs['epochs'])
                else:
                    epochs = self.epochs

                if 'lr_power' in kwargs:
                    lr = np.power(10, kwargs['lr_power'])
                else:
                    lr = 1e-3

                def loss_fn(y, p_y):
                    return -p_y.log_prob(y)

                optimizer = tf.optimizers.Adam(learning_rate=1e-3)

                _model(x_val)
                prediction_steps = 3
                _model, history = fit(_model, 
                        train_dataset,
                        optimizer=optimizer, 
                        epochs = epochs, 
                        loss_fn = loss_fn,  
                        prediction_steps = prediction_steps,
                        speedy_training=False
                        )

                predictions = _model.predict(x_val, 25, verbose=True)

                df = convert_to_df(predictions, y_val, val_dates + dt.timedelta(days = self.gamma), self._data, type=_model.forecast_type)

                # get score for fold, 2 options depending on whether the forecast is a list (IRNN) or a single prediction
                try:
                    score[fold] = Metrics.nll(df[self.gamma])
                except:
                    score[fold] = np.sum(np.asarray([Metrics.nll(d) for d in df.values()]))

                # can be useful to plot the validation curves to check things are working 
                if self.plot:
                    for idx, d in enumerate(df.values()):
                        plt.subplot(len(df.keys()), 1, idx+1)
                        plt.plot(d.index, d['True'], color='black')
                        plt.plot(d.index, d['Pred'], color='red')
                        plt.fill_between(d.index, d['Pred']+d['Std'], d['Pred']-d['Std'], color='red', alpha=0.3)
            except Exception as e:
                score[fold] = -self.min_score
                print(e)

        if self.plot:
            if not os.path.exists(self.root):
                os.mkdir(self.root)
            figs = os.listdir(self.root)
            nums=[-1]
            for f in figs:
                if 'fig' in f:
                    nums.append(int(f.split('_')[1].split('.')[0]))

            plt.savefig(self.root+'fig_'+str(max(nums)+1)+'.pdf')

        try:
            # NLL can be give nan values, try to prevent this breaking things
            if np.isfinite(-sum(score.values())):
                return -sum(score.values())
            else:
                return self.min_score
        except:
            return self.min_score

In [19]:
eval = Eval_Fn
eval(gamma = 28,
    epochs= 30,
    kl_power= -2.857091693154802,
    lr_power= -3.7364141761644545,
    n_op= 93,
    op_scale_pwr= -0.27139484469983133,
    p_scale_pwr= -1.827071638197097,
    q_scale_pwr= -1.2461978663286395,
    rnn_units= 108
    )
eval()

<__main__.Eval_Fn at 0x2d076275f10>

In [48]:
pbounds = {'rnn_units':(25,125),        # units in rnn layer
               'n_queries':(20,100),        # number of queries
               'kl_power':(-3,0),           # KL annealing term = 10^kl_power
               'op_scale':(0.01, 0.1),      # scaling factor for output
               'prior_scale':(1e-4, 1e-2),  # prior stddev
               'epochs':(10,100),           # epochs to train for
               'lr_power':(-4, -2),         # learning rate = 10^lr_power
               'q_scale':(0.001, 0.1)       # posterior scaling factor
               }

num = 3
df = pd.DataFrame(columns = ['rnn_units', 'n_op', 'kl_power', 'op_scale', 'prior_scale', 'epochs', 'lr_power', 'q_scale'])
for i in np.linspace(25, 125, num).astype(int):
    for j in np.linspace(70, 70, 1).astype(int):
        for k in np.linspace(-3, 0, num):
            for l in np.linspace(0.01, 0.1, num):
                for m in np.linspace(1e-4, 1e-2, num):
                    for n in np.linspace(10, 100, num):
                        for o in np.linspace(-3, -3, num):
                            for p in np.linspace(0.001, 0.1, num):
                                df = df.append(pd.DataFrame(columns = df.columns, data = np.asarray([[i,j,k,l,m,n,o,p]])))
df['started'] = np.zeros(df.shape[0])
df['validation_score'] = np.zeros(df.shape[0])
df.to_csv('validation_scores.csv')



In [29]:
df = df.append(pd.DataFrame(columns = df.columns, data = np.asarray([[i,j,k,l,m,n,o,p]])))

In [None]:
max_iter =250
model = IRNN_Bayes
gamma = 28
root = 'Results/IRNN_Bayes/'
n_folds = 2
eval = Eval_Fn(model=IRNN_Bayes, root = root, gamma=gamma, plot=False, n_folds=n_folds, verbose=False)
eval(gamma = 28,
        epochs= 30,
        kl_power= -2.857091693154802,
        lr_power= -3.7364141761644545,
        n_op= 93,
        op_scale_pwr= -0.27139484469983133,
        p_scale_pwr= -1.827071638197097,
        q_scale_pwr= -1.2461978663286395,
        rnn_units= 108
        )

In [None]:
IRNN_Bayes.pbounds

In [None]:
from bayes_opt import BayesianOptimization
from Test_Fn import Test_fn

max_iter =250
model = IRNN_Bayes
gamma = 28
root = 'Results/IRNN_Bayes/'

n_folds = 4 # increase this to improve rubustness, will get slower
eval = Eval_Fn(model=IRNN_Bayes, 
               root = root, gamma=gamma, plot=False, n_folds=n_folds, verbose=False)

optimizer = BayesianOptimization(
    f=eval,
    pbounds=model.pbounds,
    random_state=1,
    verbose=2
)

optimizer = load_steps(root, optimizer)

for _ in range(100):
    optimizer.maximize(
        init_points=10,
        n_iter=10)

    save_steps(root, optimizer)

Test_fn(root = root, model = model, gammas=[gamma], test_seasons = [2015])



[Errno 2] No such file or directory: 'Results/IRNN_Bayes/optimiser_results.json'
failed to register previous steps
|   iter    |  target   |  epochs   | kl_power  | lr_power  |   n_op    | op_sca... | p_scal... | q_scal... | rnn_units |
-------------------------------------------------------------------------------------------------------------------------
Instructions for updating:
Please pass an integer value for `reinterpreted_batch_ndims`. The current behavior corresponds to `reinterpreted_batch_ndims=tf.size(distribution.batch_shape_tensor()) - 1`.


Epoch 1:   0%|          | 0/142 [00:00<?, ?batch/s]

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Epoch 1: 100%|██████████| 142/142 [01:51<00:00,  1.28batch/s, kl=6.02e+3, nll=354]    
Epoch 2: 100%|██████████| 142/142 [00:08<00:00, 16.12batch/s, kl=601, nll=291]   
Epoch 3: 100%|██████████| 142/142 [00:08<00:00, 16.01batch/s, kl=207, nll=287]
Epoch 4: 100%|██████████| 142/142 [00:08<00:00, 16.00batch/s, kl=154, nll=270]
Epoch 5: 100%|██████████| 142/142 [00:08<00:00, 16.04batch/s, kl=132, nll=267]
Epoch 6: 100%|██████████| 142/142 [00:08<00:00, 15.88batch/s, kl=123, nll=258]
Epoch 7: 100%|██████████| 142/142 [00:08<00:00, 16.13batch/s, kl=117, nll=251]
Epoch 8: 100%|██████████| 142/142 [00:08<00:00, 16.02batch/s, kl=108, nll=243]
Epoch 9: 100%|██████████| 142/142 [00:08<00:00, 16.20batch/s, kl=106, nll=229]
Epoch 10: 100%|██████████| 142/142 [00:08<00:00, 16.11batch/s, kl=103, nll=213]
Epoch 11: 100%|██████████| 142/142 [00:08<00:00, 16.09batch/s, kl=120, nll=120]
Epoch 12: 100%|██████████| 142/142 [00:08<00:00, 16.01batch/s, kl=103, nll=47]  
Epoch 13: 100%|██████████| 142/142 [0

| [0m1        [0m | [0m-18.23   [0m | [0m100.9    [0m | [0m-0.839   [0m | [0m-4.0     [0m | [0m44.19    [0m | [0m-2.56    [0m | [0m-2.723   [0m | [0m-2.441   [0m | [0m59.56    [0m |


Epoch 1: 100%|██████████| 142/142 [01:47<00:00,  1.32batch/s, kl=55.9, nll=70.4]
Epoch 2: 100%|██████████| 142/142 [00:08<00:00, 16.73batch/s, kl=54.5, nll=1.07]
Epoch 3: 100%|██████████| 142/142 [00:08<00:00, 16.78batch/s, kl=53.1, nll=-.244]  
Epoch 4: 100%|██████████| 142/142 [00:08<00:00, 16.71batch/s, kl=51.6, nll=-.704] 
Epoch 5: 100%|██████████| 142/142 [00:08<00:00, 16.34batch/s, kl=50.1, nll=-.943]
Epoch 6: 100%|██████████| 142/142 [00:08<00:00, 16.53batch/s, kl=48.5, nll=-1.07]
Epoch 7: 100%|██████████| 142/142 [00:08<00:00, 16.53batch/s, kl=47, nll=-1.16]  
Epoch 8: 100%|██████████| 142/142 [00:08<00:00, 16.57batch/s, kl=45.4, nll=-1.24]
Epoch 9: 100%|██████████| 142/142 [00:08<00:00, 16.51batch/s, kl=43.8, nll=-1.28]
Epoch 10: 100%|██████████| 142/142 [00:08<00:00, 16.72batch/s, kl=42.3, nll=-1.32]
Epoch 11: 100%|██████████| 142/142 [00:08<00:00, 16.65batch/s, kl=40.7, nll=-1.33]
Epoch 12: 100%|██████████| 142/142 [00:08<00:00, 16.49batch/s, kl=39.1, nll=-1.36]
Epoch 13: 10

In [None]:
_model(x_test)

In [None]:
max_iter =250
model = IRNN_Bayes
gamma = 28
root = 'Results/IRNN_Bayes/'

n_folds = 5 # increase this to improve rubustness, will get slower

eval = Eval_Fn(model=IRNN_Bayes, root = root, gamma=gamma, plot=False, n_folds=n_folds, verbose=False)

In [None]:
eval(rnn_units = 25, 
     n_queries = 20,
     kl_power = -2.,
     p_scale_pwr = -3.,
     q_scale_pwr = -3.,
     op_scale_pwr = -3.,
     epochs = 10,
     lr_power = -3.,
     q_scale = 0.001)

In [None]:
model.pbounds

In [None]:
if __name__ == '__main__':


    from bayes_opt import BayesianOptimization
    from Test_Fn import Test_fn

    max_iter =250
    model = FF
    gamma = 14
    root = 'Results/FF/'

    n_folds = 5 # increase this to improve rubustness, will get slower

    eval = Eval_Fn(model=model, root = root, gamma=gamma, plot=False, n_folds=n_folds, verbose=False)
    optimizer = BayesianOptimization(
        f=eval,
        pbounds=model.pbounds,
        random_state=1,
        verbose=2
    )

    optimizer = load_steps(root, optimizer)

    optimizer.maximize(
        init_points=10,
        n_iter=50)

    save_steps(root, optimizer)

    Test_fn(root = root, model = model, gammas=[gamma], test_seasons = [2015])




In [None]:
prior_std = 0.1
c = np.log(np.expm1(1.))
q_scale = prior_std

x = np.random.normal(0, 0.1, 100)

In [None]:
a = q_scale*tf.nn.softplus(c + x)

In [None]:
a

In [None]:
a.numpy().mean()