# Initial Setup 

In [8]:
import pandas as pd 
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras import layers
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
df = pd.read_csv("../raw_data/data/BDI/log_diff_BDI.csv")

df["Date"] = pd.to_datetime(df["Date"])
df.set_index("Date", inplace=True)

df.head(2)

Unnamed: 0_level_0,Unnamed: 0,Price,log_price,log_diff,log_diff_weekly
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1995-01-03,0,1964.0,3.293141,0.000664,3.304275
1995-01-04,1,1961.0,3.292478,-0.001327,


# Using our bespoke window/sequence generator to generate sequences:

1. Generate sequences that predict the log difference a day ahead with input length of 10 days (two weeks) 
2. generate sequences that predict the log differences a day ahead with input length of 20 days (a month)

# The Model

In [5]:
# Learning Rate Scheduler: 

lr_schedule = ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=10000,
    decay_rate=0.9)

In [19]:
#Optimizers

rmsprop = RMSprop(learning_rate=lr_schedule) #or 0.001 for inital test
adam = Adam(learning_rate=lr_schedule)

In [14]:
#Early Stopping Criterion:

es = EarlyStopping(monitor="val_mae", patience=30, restore_best_weights=True)

In [20]:
#Initializing and compiling model:

def init_model():
    
    model = Sequential()

    #LSTM layers
    model.add(layers.LSTM(400, activation="tanh", input_shape=(7,1), return_sequences=False))

    #Dense layers
    model.add(layers.Dense(150, activation="relu"))
    model.add(layers.Dense(1, activation="linear"))

    #compiling model
    model.compile(loss="mse", optimizer=rmsprop, metrics="mae")
    
    return model

# Training and Testing

In [21]:
#Instantiating model

model = init_model()

In [15]:
#TRAINING THE MODEL:

history = model.fit(
    generator, epochs=100, validation_data=generator_val, shuffle=True, callbacks=es
)

NameError: name 'generator' is not defined

In [22]:
#Model layers & params overview:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 400)               643200    
                                                                 
 dense_2 (Dense)             (None, 150)               60150     
                                                                 
 dense_3 (Dense)             (None, 1)                 151       
                                                                 
Total params: 703,501
Trainable params: 703,501
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Learning Curves:
plot_history(history)

# Learning Curves viz code:

In [17]:
def plot_history(history):
    
    fig, ax = plt.subplots(1,2, figsize=(20,7))
    # Loss:MSE
    ax[0].plot(history.history['loss'])
    ax[0].plot(history.history['val_loss'])
    ax[0].set_title('MSE')
    ax[0].set_ylabel('Loss')
    ax[0].set_xlabel('Epoch')
    ax[0].legend(['Train', 'Validation'], loc='best')
    ax[0].grid(axis="x",linewidth=0.5)
    ax[0].grid(axis="y",linewidth=0.5)
    
    # Metrics:MAE
    
    ax[1].plot(history.history['mae'])
    ax[1].plot(history.history['val_mae'])
    ax[1].set_title('MAE')
    ax[1].set_ylabel('MAE')
    ax[1].set_xlabel('Epoch')
    ax[1].legend(['Train', 'Validation'], loc='best')
    ax[1].grid(axis="x",linewidth=0.5)
    ax[1].grid(axis="y",linewidth=0.5)
                        
    return ax