In [None]:
#IMPORT LIBRARIES
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import keras_tuner as kt
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
#LOAD TRAINING DATA
dataset = pd.read_csv('../input/tstraining/Training.csv')

In [None]:
def inspect_dataframe(df, columns):
    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(df[col])
        axs[i].set_title(col)
    plt.show()
inspect_dataframe(dataset, dataset.columns)

In [None]:
test_size = int(dataset.shape[0]*0.05)
X_train_raw = dataset.iloc[:-test_size]
# y_train_raw = y.iloc[:-test_size]
X_test_raw = dataset.iloc[-test_size:]
# y_test_raw = y.iloc[-test_size:]
print(X_train_raw.shape, X_test_raw.shape)

# Normalize both features and labels
X_min = X_train_raw.min()
X_max = X_train_raw.max()

X_train_raw = (X_train_raw-X_min)/(X_max-X_min)
X_test_raw = (X_test_raw-X_min)/(X_max-X_min)

plt.figure(figsize=(17,5))
plt.plot(X_train_raw.Sponginess, label='Train (sponginess)')
plt.plot(X_test_raw.Sponginess, label='Test (sponginess)')
plt.title('Train-Test Split')
plt.legend()
plt.show()

In [None]:
#MULTIVARIATE FORECASTING
target_labels = dataset.columns
window = 800
stride = 10
telescope = 864

In [None]:
def build_sequences(df, target_labels, window, stride, telescope):
    # Sanity check to avoid runtime errors
    assert window % stride == 0
    dataset = []
    labels = []
    temp_df = df.copy().values
    temp_label = df[target_labels].copy().values
    padding_len = len(df)%window

    if(padding_len != 0):
        # Compute padding length
        padding_len = window - len(df)%window
        padding = np.zeros((padding_len,temp_df.shape[1]), dtype='float64')
        temp_df = np.concatenate((padding,df))
        padding = np.zeros((padding_len,temp_label.shape[1]), dtype='float64')
        temp_label = np.concatenate((padding,temp_label))
        assert len(temp_df) % window == 0

    for idx in np.arange(0,len(temp_df)-window-telescope,stride):
        dataset.append(temp_df[idx:idx+window])
        labels.append(temp_label[idx+window:idx+window+telescope])

    dataset = np.array(dataset)
    labels = np.array(labels)
    return dataset, labels

In [None]:
def hypermodel(hp):
    # Build the neural network layer by layer
    neurons = hp.Choice('neurons', values = [256,384,512])
    dropout = hp.Choice('dropout', values = [.0,.1,.2])
    lr = hp.Choice('lr', values = [.1,.01,.001,.0001])
    model = tfk.Sequential()
    model.add(tfkl.Input(shape=input_shape, name='Input'))
    model.add(tfkl.LSTM(neurons, kernel_initializer = tfk.initializers.GlorotUniform(seed = seed),dropout = dropout))
    model.add(tfkl.Dense(output_shape[-1]*output_shape[-2], activation='selu'))
    model.add(tfkl.Reshape((output_shape[-2],output_shape[-1])))
    model.compile(loss=tfk.losses.MeanSquaredError(), optimizer=tfk.optimizers.Adam(lr), metrics='mse')
    return model

In [None]:
X_train, y_train = build_sequences(X_train_raw, target_labels, window, stride, telescope)
X_test, y_test = build_sequences(X_test_raw, target_labels, window, stride, telescope)
X_train.shape, y_train.shape, X_test.shape, y_test.shape
input_shape = X_train.shape[1:]
output_shape = y_train.shape[1:]
tuner = kt.BayesianOptimization(hypermodel,
                objective='val_loss',
                directory='tuning',
                max_trials = 10,
                project_name='units-dropout-lr')
tuner.search_space_summary()




In [None]:
tuner.search(x = X_train,
             y = y_train,   
    epochs = 20,
    validation_split = .1,
             callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=15, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=5, factor=0.5, min_lr=1e-6)
    ])

In [None]:
models = tuner.get_best_models(num_models=2)
tuner.results_summary()