# Preliminary experiments

In [1]:
%config Completer.use_jedi = False


# imports

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function




%matplotlib inline

# Various
import numpy as np
import pandas as pd
from six.moves import xrange
import numpy as np
import matplotlib.pyplot as plt
import os
import time


# Tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Activation, Masking, BatchNormalization, GRU, Lambda,\
TimeDistributed
from tensorflow.keras.optimizers import RMSprop,Adam
from tensorflow.keras.callbacks import History

from tensorflow.keras import backend as k
from tensorflow.keras import callbacks
from tensorflow import keras

# sklearn
from sklearn.preprocessing import normalize
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import KFold
from sklearn import pipeline
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Project specific modules
from losses import weibull_loglik_discrete, weibull_loglik_continuous
from preprocessing import build_data
from activations import activate

# math package
import math

In [2]:
tf.__version__

'2.4.1'

In [3]:
from tensorflow import keras

In [4]:
keras.__version__

'2.4.0'

## Loss Function

In [145]:
def loglik_discrete(y, u, a, b, epsilon=k.epsilon()):
    hazard0 = k.pow((y + epsilon) / a, b)
    hazard1 = k.pow((y + 1.0) / a, b)

    loglikelihoods = u * \
        k.log(k.exp(hazard1 - hazard0) - (1.0 - epsilon)) - hazard1
    return loglikelihoods


def loglik_continuous(y, u, a, b, epsilon=k.epsilon()):
    ya = (y + epsilon) / a
    loglikelihoods = u * (k.log(b) + b * k.log(ya)) - k.pow(ya, b)
    return loglikelihoods


class CustomLoss(keras.losses.Loss):
    """ Creates a keras WTTE-loss function.
        - Usage
            :Example:
            .. code-block:: python
               loss = wtte.Loss(kind='discrete').loss_function
               model.compile(loss=loss, optimizer=RMSprop(lr=0.01))
               # And with masking:
               loss = wtte.Loss(kind='discrete',reduce_loss=False).loss_function
               model.compile(loss=loss, optimizer=RMSprop(lr=0.01),
                              sample_weight_mode='temporal')
        .. note::
            With masking keras needs to access each loss-contribution individually.
            Therefore we do not sum/reduce down to scalar (dim 1), instead return a 
            tensor (with reduce_loss=False).
        :param kind:  One of 'discrete' or 'continuous'
        :param reduce_loss: 
        :param clip_prob: Clip likelihood to [log(clip_prob),log(1-clip_prob)]
        :param regularize: Deprecated.
        :param location: Deprecated.
        :param growth: Deprecated.
        :type reduce_loss: Boolean
    """

    def __init__(self,
                 kind,
                 reduce_loss=False):
        super(CustomLoss, self).__init__
        self.kind = kind
        self.reduce_loss = reduce_loss

    def loss_function(self, y_true, y_pred):
        
        y, u = tf.unstack(y_true, axis=-1)
        y = tf.cast(y, tf.float32)
        u = tf.cast(u, tf.float32)
        
        a, b = tf.unstack(y_pred, axis=-1)
        
        if self.kind == 'discrete':
            loglikelihoods = loglik_discrete(y, u, a, b)
        elif self.kind == 'continuous':
            loglikelihoods = loglik_continuous(y, u, a, b)
            
        if self.reduce_loss:
            loss = -1.0 * k.mean(loglikelihoods, axis=-1)
        else:
            loss = -loglikelihoods

        return loss

## Activation Layer

In [146]:
class Activate(keras.layers.Layer):
    """ Elementwise computation of alpha and regularized beta.
        Wrapper to `output_lambda` using keras.layers.Activation. 
        See this for details.
        - Usage
            .. code-block:: python
               wtte_activation = wtte.OuputActivation(init_alpha=1.,
                                                 max_beta_value=4.0).activation
               model.add(Dense(2))
               model.add(Activation(wtte_activation))
    """

    def __init__(self, init_alpha=1.0, max_beta_value=1.0):
        super(Activate, self).__init__()
        self.init_alpha = init_alpha
        self.max_beta_value = max_beta_value
        

    def __call__(self, ab):
        """ (Internal function) Activation wrapper
        :param ab: original tensor with alpha and beta.
        :return ab: return of `output_lambda` with `init_alpha` and `max_beta_value`.
        """
        
        a, b = tf.unstack(ab, axis=-1)
#         print(a)
#         print(b)
        
#         a = k.exp(a)
#         b = k.softplus(b)
        
#         print(a)
#         print(b)
        
        # Implicitly initialize alpha:
        a = self.init_alpha * k.exp(a)

        if self.max_beta_value > 1.05:  # some value >>1.0
            # shift to start around 1.0
            # assuming input is around 0.0
            _shift = np.log(self.max_beta_value - 1.0)

            b = b - _shift

        b = self.max_beta_value * k.sigmoid(b)

        x = k.stack([a, b], axis=-1)

        return x

## A simple network

In [134]:
def network(train_X, train_y, test_X, test_y, mask_value, epochs, batch_size, init_alpha):

    k.set_epsilon(1e-10)
    history = History()
    nan_terminator = callbacks.TerminateOnNaN()
    reduce_lr = callbacks.ReduceLROnPlateau(monitor='loss')

    n_features = train_X.shape[-1]
    print(train_X.shape)

    # Start building our model
    model = Sequential()
    # Mask parts of the lookback period that are all _mask_value_ (i.e., unobserved) so they don't skew the model
    model.add(Masking(mask_value=mask_value, input_shape=(None, n_features)))
    model.add(GRU(20, activation='tanh', recurrent_dropout=0.25, return_sequences=False))
    model.add(Dense(2))
    custom_activation = Activate(init_alpha)
    model.add(Activation(custom_activation))
#     model.add(Activation(activate))
    
    loss = CustomLoss(kind='discrete').loss_function
    # Use the discrete log-likelihood for Weibull survival data as our loss function
    model.compile(loss=loss, optimizer=Adam(lr=.01, clipvalue=0.5))

    model.fit(train_X, train_y,
              epochs=epochs,
              batch_size=batch_size,
              verbose=1,
              callbacks=[nan_terminator, history, reduce_lr],
              workers=32)

    return model

In [97]:
ab = tf.constant([[1,2],[3,4]])

In [93]:
ab

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1, 2],
       [3, 4]], dtype=int32)>

In [9]:
x, y = tf.unstack(ab, axis=-1)

In [10]:
xy = tf.stack([x,y], axis=-1)

In [11]:
xy

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1, 2],
       [3, 4]], dtype=int32)>

### Weibull specific functions

In [12]:
def weibull_pdf(alpha, beta, t):
    return (beta/alpha) * (t/alpha)**(beta-1)*np.exp(- (t/alpha)**beta)

In [13]:
def weibull_median(alpha, beta):
    return alpha*(-np.log(.5))**(1/beta)

In [14]:
def weibull_mean(alpha, beta):
    return alpha * math.gamma(1 + 1/beta)

In [15]:
def weibull_mode(alpha, beta):
    assert np.all(beta > 1)
    return alpha * ((beta-1)/beta)**(1/beta)

## Dataset 1 - CMAPSS

### 1st dataset

In [16]:
id_col = 'unit_number'
time_col = 'time'
feature_cols = [ 'op_setting_1', 'op_setting_2', 'op_setting_3'] + ['sensor_measurement_{}'.format(x) for x in range(1,22)]
column_names = [id_col, time_col] + feature_cols

In [17]:
train_x_orig = pd.read_csv('https://raw.githubusercontent.com/daynebatten/keras-wtte-rnn/master/train.csv', header=None, names=column_names)
test_x_orig = pd.read_csv('https://raw.githubusercontent.com/daynebatten/keras-wtte-rnn/master/test_x.csv', header=None, names=column_names)
test_y_orig = pd.read_csv('https://raw.githubusercontent.com/daynebatten/keras-wtte-rnn/master/test_y.csv', header=None, names=['T'])

In [18]:
test_x_orig.set_index(['unit_number', 'time'], verify_integrity=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,op_setting_1,op_setting_2,op_setting_3,sensor_measurement_1,sensor_measurement_2,sensor_measurement_3,sensor_measurement_4,sensor_measurement_5,sensor_measurement_6,sensor_measurement_7,...,sensor_measurement_12,sensor_measurement_13,sensor_measurement_14,sensor_measurement_15,sensor_measurement_16,sensor_measurement_17,sensor_measurement_18,sensor_measurement_19,sensor_measurement_20,sensor_measurement_21
unit_number,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,21.61,553.90,...,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735
1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,21.61,554.85,...,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916
1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,21.61,554.11,...,521.97,2388.03,8130.10,8.4441,0.03,393,2388,100.0,39.08,23.4166
1,4,0.0042,0.0000,100.0,518.67,642.44,1584.12,1406.42,14.62,21.61,554.07,...,521.38,2388.05,8132.90,8.3917,0.03,391,2388,100.0,39.00,23.3737
1,5,0.0014,0.0000,100.0,518.67,642.51,1587.19,1401.92,14.62,21.61,554.16,...,522.15,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.4130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,194,0.0049,0.0000,100.0,518.67,643.24,1599.45,1415.79,14.62,21.61,553.41,...,520.69,2388.00,8213.28,8.4715,0.03,394,2388,100.0,38.65,23.1974
100,195,-0.0011,-0.0001,100.0,518.67,643.22,1595.69,1422.05,14.62,21.61,553.22,...,521.05,2388.09,8210.85,8.4512,0.03,395,2388,100.0,38.57,23.2771
100,196,-0.0006,-0.0003,100.0,518.67,643.44,1593.15,1406.82,14.62,21.61,553.04,...,521.18,2388.04,8217.24,8.4569,0.03,395,2388,100.0,38.62,23.2051
100,197,-0.0038,0.0001,100.0,518.67,643.26,1594.99,1419.36,14.62,21.61,553.37,...,521.33,2388.08,8220.48,8.4711,0.03,395,2388,100.0,38.66,23.2699


In [19]:
train_x_orig.set_index(['unit_number', 'time'], verify_integrity=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,op_setting_1,op_setting_2,op_setting_3,sensor_measurement_1,sensor_measurement_2,sensor_measurement_3,sensor_measurement_4,sensor_measurement_5,sensor_measurement_6,sensor_measurement_7,...,sensor_measurement_12,sensor_measurement_13,sensor_measurement_14,sensor_measurement_15,sensor_measurement_16,sensor_measurement_17,sensor_measurement_18,sensor_measurement_19,sensor_measurement_20,sensor_measurement_21
unit_number,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.70,1400.60,14.62,21.61,554.36,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.4190
1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,21.61,553.75,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.00,23.4236
1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.20,14.62,21.61,554.26,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
1,4,0.0007,0.0000,100.0,518.67,642.35,1582.79,1401.87,14.62,21.61,554.45,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,21.61,554.00,...,522.19,2388.04,8133.80,8.4294,0.03,393,2388,100.0,38.90,23.4044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,196,-0.0004,-0.0003,100.0,518.67,643.49,1597.98,1428.63,14.62,21.61,551.43,...,519.49,2388.26,8137.60,8.4956,0.03,397,2388,100.0,38.49,22.9735
100,197,-0.0016,-0.0005,100.0,518.67,643.54,1604.50,1433.58,14.62,21.61,550.86,...,519.68,2388.22,8136.50,8.5139,0.03,395,2388,100.0,38.30,23.1594
100,198,0.0004,0.0000,100.0,518.67,643.42,1602.46,1428.18,14.62,21.61,550.94,...,520.01,2388.24,8141.05,8.5646,0.03,398,2388,100.0,38.44,22.9333
100,199,-0.0011,0.0003,100.0,518.67,643.23,1605.26,1426.53,14.62,21.61,550.68,...,519.67,2388.23,8139.29,8.5389,0.03,395,2388,100.0,38.29,23.0640


### Data pre-processing

In [20]:
# Make engine numbers and days zero-indexed, for everybody's sanity
train_x_orig.iloc[:, 0:2] -= 1
test_x_orig.iloc[:, 0:2] -= 1

In [21]:
# always observed in our case
test_y_orig
test_y_orig['E'] = 1
# test_y = test_y.values

In [22]:
test_y_orig.head()

Unnamed: 0,T,E
0,112,1
1,98,1
2,69,1
3,82,1
4,91,1


In [23]:
train_x_orig.head()

Unnamed: 0,unit_number,time,op_setting_1,op_setting_2,op_setting_3,sensor_measurement_1,sensor_measurement_2,sensor_measurement_3,sensor_measurement_4,sensor_measurement_5,...,sensor_measurement_12,sensor_measurement_13,sensor_measurement_14,sensor_measurement_15,sensor_measurement_16,sensor_measurement_17,sensor_measurement_18,sensor_measurement_19,sensor_measurement_20,sensor_measurement_21
0,0,0,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,0,1,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,0,2,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,0,3,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,0,4,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


In [24]:
test_x_orig.head()

Unnamed: 0,unit_number,time,op_setting_1,op_setting_2,op_setting_3,sensor_measurement_1,sensor_measurement_2,sensor_measurement_3,sensor_measurement_4,sensor_measurement_5,...,sensor_measurement_12,sensor_measurement_13,sensor_measurement_14,sensor_measurement_15,sensor_measurement_16,sensor_measurement_17,sensor_measurement_18,sensor_measurement_19,sensor_measurement_20,sensor_measurement_21
0,0,0,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735
1,0,1,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916
2,0,2,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,521.97,2388.03,8130.1,8.4441,0.03,393,2388,100.0,39.08,23.4166
3,0,3,0.0042,0.0,100.0,518.67,642.44,1584.12,1406.42,14.62,...,521.38,2388.05,8132.9,8.3917,0.03,391,2388,100.0,39.0,23.3737
4,0,4,0.0014,0.0,100.0,518.67,642.51,1587.19,1401.92,14.62,...,522.15,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.413


In [25]:
train_x_orig.shape

(20631, 26)

In [26]:
test_x_orig.shape

(13096, 26)

In [27]:
# tte_mean_train = np.nanmean(train_y[:, 0])
# mean_u = np.nanmean(train_y[:, 1])

# # Initialization value for alpha-bias
# init_alpha = -1.0 / np.log(1.0 - 1.0 / (tte_mean_train + 1.0))
# init_alpha = init_alpha / mean_u
# print('tte_mean_train', tte_mean_train, 'init_alpha: ', init_alpha, 'mean uncensored train: ', mean_u)

### K-fold CV

In [147]:
k.clear_session()

In [148]:
# parameters

max_time = 100
mask_value = -99
cv = 2
shuffle = True
random_state = 21

epochs = 50
batch_size = 100

In [149]:
kf = KFold(n_splits=cv, shuffle=shuffle, random_state=random_state)

In [150]:
print(f' Starting CV with: CV = {cv}, shuffle = {shuffle}, random_state = {random_state} \n and model parameters: epochs = {epochs}, batch_size =\
 {batch_size}, max_time = {max_time}, mask_value = {mask_value}')

rmse_train = []
r2_train = []
mae_train = []

rmse_test = []
r2_test = []
mae_test = []

file = 'RUL_SA_prelim_results_cmapss_1st.csv'
columns = ['fold', 'rmse_train', 'mae_train', 'r2_train', 'rmse_test', 'mae_test', 'r2_test']
results = pd.DataFrame(columns=columns)

fold_count = 0
      
start = time.time()

for train_units, test_units in kf.split(train_x_orig.unit_number.unique()):
    
    fold_count+= 1
    
    # Selecting data
    train_index = train_x_orig[train_x_orig.unit_number.isin(train_units)].index
    test_index = test_x_orig[test_x_orig.unit_number.isin(test_units)].index

    X_train = train_x_orig.iloc[train_index]
    X_test = test_x_orig.iloc[test_index]
    
    X_train.reset_index(drop=True, inplace=True)
    X_test.reset_index(drop=True, inplace=True)
    
    Y_test = test_y_orig.copy()
    Y_test = Y_test.iloc[test_units]
    Y_test.reset_index(drop=True, inplace=True)
    
    # Pre-processing data
    scaler=pipeline.Pipeline(steps=[
     ('minmax', MinMaxScaler(feature_range=(-1, 1))),
     ('remove_constant', VarianceThreshold())])

    train = X_train.copy()
    train = np.concatenate([train[['unit_number', 'time']], scaler.fit_transform(train[feature_cols])], axis=1)
    
    test = X_test.copy()
    test = np.concatenate([test[['unit_number', 'time']], scaler.transform(test[feature_cols])], axis=1)
    
    
    # Preparing data for the RNN (numpy arrays)
    train_x, train_y = build_data(units=train[:, 0], time=train[:, 1], x=train[:, 2:], max_time=max_time,
                                  is_test=False, mask_value=mask_value)

    test_x, _ = build_data(units=test[:, 0], time=test[:, 1], x=test[:, 2:], max_time=max_time,
                       is_test=True, mask_value=mask_value)
    
    test_y = Y_test.to_numpy()
    
    # initialization
    tte_mean_train = np.nanmean(train_y[:, 0])
    mean_u = np.nanmean(train_y[:, 1])

    # Initialization value for alpha-bias
    init_alpha = -1.0 / np.log(1.0 - 1.0 / (tte_mean_train + 1.0))
    init_alpha = init_alpha / mean_u
#     print('tte_mean_train', tte_mean_train, 'init_alpha: ', init_alpha, 'mean uncensored train: ', mean_u)
    
    # training
    model = network(train_x, train_y, test_x, test_y, mask_value, epochs, batch_size, init_alpha)
    
    # predicting the rul on the train fold
    train_predict = model.predict(train_x)
    train_predict = np.resize(train_predict, (train_x.shape[0], 2))
    train_result = np.concatenate((train_y, train_predict), axis=1)
    train_results_df = pd.DataFrame(train_result, columns=['T', 'E', 'alpha', 'beta'])
    train_results_df['unit_number'] = np.arange(1, train_results_df.shape[0]+1)
    
    train_results_df['predicted_mu'] = train_results_df[['alpha', 'beta']].apply(lambda row: weibull_mean(row[0], row[1]), axis=1)
#     train_results_df['predicted_median'] = train_results_df[['alpha', 'beta']].apply(lambda row: weibull_median(row[0], row[1]), axis=1)
#     train_results_df['predicted_mode'] = train_results_df[['alpha', 'beta']].apply(lambda row: weibull_mode(row[0], row[1]), axis=1)

    
    # predicting the rul on the test fold
    test_predict = model.predict(test_x)
    test_predict = np.resize(test_predict, (test_x.shape[0], 2))
    test_result = np.concatenate((test_y, test_predict), axis=1)
    test_results_df = pd.DataFrame(test_result, columns=['T', 'E', 'alpha', 'beta'])
    test_results_df['unit_number'] = np.arange(1, test_results_df.shape[0]+1)
    
    test_results_df['predicted_mu'] = test_results_df[['alpha', 'beta']].apply(lambda row: weibull_mean(row[0], row[1]), axis=1)
#     test_results_df['predicted_median'] = test_results_df[['alpha', 'beta']].apply(lambda row: weibull_median(row[0], row[1]), axis=1)
#     test_results_df['predicted_mode'] = test_results_df[['alpha', 'beta']].apply(lambda row: weibull_mode(row[0], row[1]), axis=1)
    
    # performance evaluation
    # train:
    rmse_train.append(np.sqrt(mean_squared_error(train_results_df['predicted_mu'], train_results_df['T'])))
    mae_train.append((mean_absolute_error(train_results_df['predicted_mu'], train_results_df['T'])))
    r2_train.append(r2_score(train_results_df['predicted_mu'], train_results_df['T']))
    
    # test:
    rmse_test.append(np.sqrt(mean_squared_error(test_results_df['predicted_mu'], test_results_df['T'])))
    mae_test.append((mean_absolute_error(test_results_df['predicted_mu'], test_results_df['T'])))
    r2_test.append(r2_score(test_results_df['predicted_mu'], test_results_df['T']))
    
    # registering results
    results['fold'] = [fold_count]
    results['rmse_train'] = [rmse_train[-1]]
    results['mae_train'] = [mae_train[-1]]
    results['r2_train'] = [r2_train[-1]]
    results['rmse_test'] = [rmse_test[-1]]
    results['mae_test'] = [mae_test[-1]]
    results['r2_test'] = [r2_test[-1]]   
    
    print('Before clearing session')
    print(rmse_test)
    print(mae_test)
    print(r2_test)
    
    if os.path.isfile(file):
        results.to_csv('./' + file, mode='a', index=False, header=False)
    else:
        results.to_csv('./' + file, mode='w', index=False, header=True)
    
    k.clear_session()
    del model
        

end = time.time()
print(f'Elapsed time: {(end - start)/60} minutes')

 20%|██        | 10/50 [00:00<00:00, 95.31it/s]

 Starting CV with: CV = 2, shuffle = True, random_state = 21 
 and model parameters: epochs = 50, batch_size = 100, max_time = 100, mask_value = -99


100%|██████████| 50/50 [00:00<00:00, 90.47it/s] 
100%|██████████| 50/50 [00:00<00:00, 12347.81it/s]


(10259, 100, 17)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


 20%|██        | 10/50 [00:00<00:00, 88.82it/s]

Before clearing session
[23.452695639657662]
[15.059538708321341]
[0.7422463302812077]


100%|██████████| 50/50 [00:00<00:00, 85.86it/s]
100%|██████████| 50/50 [00:00<00:00, 12675.44it/s]


(10372, 100, 17)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Before clearing session
[23.452695639657662, 28.147741059067542]
[15.059538708321341, 18.844230782951747]
[0.7422463302812077, 0.6792955529444062]
Elapsed time: 18.65465202331543 minutes
