# Initial Model Testing with new Sequence Generator

In [1]:
import numpy as np
import pandas as pd 

#Keras
from tensorflow import keras
from tensorflow import stack, Tensor
from keras import Sequential, layers
from keras.callbacks import EarlyStopping
from keras.optimizers import RMSprop
from keras.callbacks import LearningRateScheduler
from keras.utils import timeseries_dataset_from_array
from tensorflow import data



#Project imports 
from bdi_predict.model.data import clean_data
from bdi_predict.model.preprocessor import train_val_test_split, min_max_scaler
from bdi_predict.model.sequencer import SequenceGenerator
from bdi_predict.model.params import BASE_PROJECT_PATH


In [2]:
%load_ext autoreload
%autoreload 2

## Preprocessing - (splitting data, scaling, and creating sequences)

In [3]:
df = pd.read_csv("../data/cleaned_data.csv")
df = df.drop(columns="log_BDRY").copy()
df.set_index("time", inplace=True)
df.head(3)

Unnamed: 0_level_0,BDRY,BDI,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-03-23,24.1005,1122.0,-0.024735
2018-03-26,24.84,1126.0,0.013126
2018-03-27,24.08,1117.0,-0.013495


In [4]:
dfs = train_val_test_split(df=df, train_val_test_ratio=(7,2,1))

Data split into train, validation, and test datasets.


In [5]:
df_train, df_val, df_test = min_max_scaler(dfs=dfs)

Datasets min-max scaled.


In [6]:
df_train.head(3)

Unnamed: 0,BDRY,BDI,target
0,0.769748,0.240992,0.33752
1,0.797941,0.242314,0.531249
2,0.768967,0.239339,0.395035


In [7]:
df_val.head(3)

Unnamed: 0,BDRY,BDI,target
0,0.86504,0.984463,0.453253
1,0.858178,0.984463,0.448999
2,0.880671,0.985785,0.513167


In [8]:
df_test.head(3)

Unnamed: 0,BDRY,BDI,target
0,0.462066,0.598678,0.417447
1,0.488753,0.592727,0.559068
2,0.505528,0.610579,0.521776


In [9]:
#Instantiating a SequenceGenerator Class

Sequencer = SequenceGenerator(input_width=20,
                                      target_width=1,
                                      offset=1,
                                      df_train=df_train,
                                      df_val=df_val,
                                     df_test=df_test,
                                     target_columns=["target"])

Sequencer

Total sequence size: 21
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
Target indice(s): [20]
Target column name(s): ['target']

In [10]:
len(df_train)

782

In [11]:
Sequencer.total_sequence_size

21

In [32]:
Sequencer.target_width

1

## Testing out the Sequence

In [33]:
example_sequence = stack([np.array(df_train[:Sequencer.total_sequence_size]),
                           np.array(df_train[100:100+Sequencer.total_sequence_size]),
                           np.array(df_train[200:200+Sequencer.total_sequence_size])])
example_sequence.shape

TensorShape([3, 21, 3])

In [34]:
example_sequence[0, 2, 2]

<tf.Tensor: shape=(), dtype=float64, numpy=0.39503531854439916>

In [37]:
# Inputting a tf.Tensor made up of 3 slices 
#(the 3 sequenecs that make up a single batch of batch_size=3) to generate

# Stack 3 slices, the length of the total window.
# FIX THIS!

example_inputs, example_targets = Sequencer.split_sequence(example_sequence)

print('All shapes are: (batch, time, features)')
print(f'Sequence shape: {example_sequence.shape}')
print(f'Inputs shape: {example_inputs.shape}')
print(f'targets shape: {example_targets.shape}')

#Desired shapes
#Sequence shape: (3, 21, 2)
#Inputs Shape: (3, 20, 2)
# target shape: (3, 1, 1)

All shapes are: (batch, time, features)
Sequence shape: (3, 21, 3)
Inputs shape: (3, 20, 3)
targets shape: (3, 1, 1)


In [15]:
def init_model():
    
    """ 
    Initialize the LSTM Reucrrent Neural Network.
    """
    
    print("\nInitialising model...")
    
    model = Sequential()

    #LSTM LAYERS:
    
    model.add(layers.LSTM(60,
                          activation="tanh",
                          input_shape=(20,2),
                          return_sequences=False))

    #DENSE LAYERS:
    
    model.add(layers.Dense(25, activation="relu"))
    model.add(layers.Dense(1, activation="linear"))
    
    print("\nmodel initialized.")

    #SETTING UP OPTIMIZERS:
    
    lr_schedule = ExponentialDecay(initial_learning_rate=1e-3,
                                   decay_steps=10000,
                                   decay_rate=0.9)
    
    rmsprop = RMSprop(learning_rate=lr_schedule)
    
    #COMPILING MODEL:
    
    model.compile(loss="mse",
                  optimizer=rmsprop,
                  metrics="mae")
    print("\nmodel compiled.")
    

    return model

In [23]:
def train_model(model:keras.Sequential,
                XandY:data.Dataset,
                patience=10,
                validation_data=data.Dataset):
    
    """
    Fit model and return a the tuple (fitted_model, history)
    """
    
    print("\nTraining model...")
    
    #EarlyStopping DEFINITION:
    
    es = EarlyStopping(monitor="val_mae",
                       patience=patience,
                       restore_best_weights=True)
    
    #FITTING MODEL:
    
    history = model.fit(XandY,
                        epochs=100,
                        validation_data=validation_data,
                        shuffle=True,
                        callbacks=es)
    
    
    print(f"\nmodel trained ({len(XandY)} rows).")
     
    return model, history

In [24]:
df = pd.read_csv("../data/cleaned_data.csv")