# Multi Layer Perceptron Model

- BOOK: [Predict the Future with MLPs, CNNs and LSTMs in Python](https://machinelearningmastery.com/deep-learning-for-time-series-forecasting/) by Jason Brownlee
- NOTES: [TS -> ML split function](https://detraviousjbrinkley.notion.site/TS-ML-split-function-9ab51cbb49d244aa8b4ab434d009f8a7?pvs=4) by Detravious J.B. 
    - See for Forecast vs Prediction

In [1]:
# !pip3 uninstall -y torch torchvision
# !pip3 install torch torchvision

In [2]:
import os
import sys
import torch

import numpy as np
import pandas as pd

import torch.nn as nn
import torch.optim as optim

# Get the current working directory of the notebook
notebook_dir = os.getcwd()

# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../framework_for_time_series_data/tslearn/'))
from ml_models import MLP
from ts_models import EvaluationMetric

In [3]:
observations = [10, 20, 30, 40, 50, 60, 70, 80, 90]

# Book's implementation

In [4]:
def split_sequence(sequence, n_steps_in, n_steps_out): 
    X, y = list(), list() 
    for i in range(len(sequence)): 
        # find the end of this pattern 
        end_ix = i + n_steps_in 
        out_end_ix = end_ix + n_steps_out 
        # check if we are beyond the sequence 
        if out_end_ix > len(sequence): 
            break
        # gather input and output parts of the pattern 
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix] 
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [5]:
X, y = split_sequence(observations, 3, 3)

In [6]:
X

array([[10, 20, 30],
       [20, 30, 40],
       [30, 40, 50],
       [40, 50, 60]])

In [7]:
y

array([[40, 50, 60],
       [50, 60, 70],
       [60, 70, 80],
       [70, 80, 90]])

# My implementation

In [8]:
def convert_uts_sequence_to_sml(uts_observations, prior_observations, forecasting_step):
    """Splits a given UTS into multiple input rows where each input row has a specified number of timestamps and the output is a single timestamp.
    
    Parameters:
    uts_observations -- 1D np array (of UTS data to transform to SML data with size  b rows/length x 1 dimension)
    prior_observations -- py int (of all observations before we get to where we want to start making the predictions)
    forecasting_step -- py int (of how far out to forecast, 1 only the next timestamp, 2 the next two timestamps, ... n the next n timestamps)
    
    Return:
    agg.values -- np array (of new sml data)
    """
    
    df = pd.DataFrame(uts_observations)
    cols = list()
    
    lag_col_names = []
    count_lag = 0
    # input sequence (t-n, ... t-1)
    for prior_observation in range(prior_observations, 0, -1):
        # print("prior_observation: ", prior_observation)
        cols.append(df.shift(prior_observation))
        new_col_name = "t - " + str(prior_observation)
        # print(new_col_name)
        lag_col_names.append(new_col_name)
        
    
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, forecasting_step):
        cols.append(df.shift(-i))
        # print(f"t + {i}")
        if i == 0:
            new_col_name = f"t"
        else:
            new_col_name = f"t + {i}"
        # print(new_col_name)
        lag_col_names.append(new_col_name)
        
        # put it all together
        uts_sml_df = pd.concat(cols, axis=1) 
        uts_sml_df.columns=[lag_col_names]
        # drop rows with NaN values
        uts_sml_df.dropna(inplace=True)
    
    # print(uts_sml_df)
    
    # colums to use to make prediction for last col
    X_train = uts_sml_df.iloc[:, 0: -1]
    
    # last column
    y_train = uts_sml_df.iloc[:, [-1]]
    return uts_sml_df

In [9]:
n_steps = 3
output_size = 2
converted_seq_df = convert_uts_sequence_to_sml(observations, n_steps, output_size)

In [10]:
converted_seq_df

Unnamed: 0,t - 3,t - 2,t - 1,t,t + 1
3,10.0,20.0,30.0,40,50.0
4,20.0,30.0,40.0,50,60.0
5,30.0,40.0,50.0,60,70.0
6,40.0,50.0,60.0,70,80.0
7,50.0,60.0,70.0,80,90.0


In [11]:
forecast_X_train_df = converted_seq_df.iloc[:, :n_steps]
forecast_X_train_df

Unnamed: 0,t - 3,t - 2,t - 1
3,10.0,20.0,30.0
4,20.0,30.0,40.0
5,30.0,40.0,50.0
6,40.0,50.0,60.0
7,50.0,60.0,70.0


In [12]:
forecast_y_train_df = converted_seq_df.iloc[:, -output_size:]
forecast_y_train_df

Unnamed: 0,t,t + 1
3,40,50.0
4,50,60.0
5,60,70.0
6,70,80.0
7,80,90.0


In [13]:
forecast_X_test_df = converted_seq_df.iloc[[-1], -n_steps:]
forecast_X_test_df

Unnamed: 0,t - 1,t,t + 1
7,70.0,80,90.0


In [14]:
predict_X_train_df = converted_seq_df.iloc[:-1, :n_steps]
predict_X_train_df

Unnamed: 0,t - 3,t - 2,t - 1
3,10.0,20.0,30.0
4,20.0,30.0,40.0
5,30.0,40.0,50.0
6,40.0,50.0,60.0


In [15]:
predict_y_train_df = converted_seq_df.iloc[:-1, -output_size:]
predict_y_train_df

Unnamed: 0,t,t + 1
3,40,50.0
4,50,60.0
5,60,70.0
6,70,80.0


In [16]:
predict_X_test_df = converted_seq_df.iloc[[-n_steps], -n_steps:]
predict_X_test_df

Unnamed: 0,t - 1,t,t + 1
5,50.0,60,70.0


In [17]:
predict_y_test_df = converted_seq_df.iloc[[-1], -output_size:]
predict_y_test_df

Unnamed: 0,t,t + 1
7,80,90.0


# Book's implementation
- Keras

## Forecast model

In [18]:
from keras.models import Sequential 
from keras.layers import Dense

forecast_model = Sequential() 
forecast_model.add(Dense(100, activation='relu' , input_dim=n_steps)) 
forecast_model.add(Dense(output_size)) 
forecast_model.compile(optimizer='adam' , loss='mse') 

predict_model = Sequential() 
predict_model.add(Dense(100, activation='relu' , input_dim=n_steps)) 
predict_model.add(Dense(output_size)) 
predict_model.compile(optimizer='adam' , loss='mse') 

2024-06-30 03:00:25.651230: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-30 03:00:30.343867: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [19]:
# fit model 
forecast_model.fit(forecast_X_train_df, forecast_y_train_df, epochs=2000, verbose=0)

<keras.callbacks.History at 0x1988bd160>

In [20]:
forecast_X_test = np.array(forecast_X_test_df)
forecast_X_test, forecast_X_test.shape[0]

(array([[70., 80., 90.]]), 1)

In [21]:
X_test = forecast_X_test.reshape((forecast_X_test.shape[0]), n_steps)
X_test

array([[70., 80., 90.]])

In [22]:
forecasts = forecast_model.predict(X_test, verbose=0)
forecasts

array([[103.35738, 113.1224 ]], dtype=float32)

## Predict model

In [23]:
predict_model.fit(predict_X_train_df, predict_y_train_df, epochs=2000, verbose=0)

<keras.callbacks.History at 0x1989c7340>

In [24]:
predict_X_test = np.array(predict_X_test_df)
predict_X_test, predict_X_test.shape[0]

(array([[50., 60., 70.]]), 1)

In [25]:
predict_X_test = forecast_X_test.reshape((predict_X_test.shape[0]), n_steps)
predict_X_test

array([[70., 80., 90.]])

In [26]:
book_model_predictions = predict_model.predict(predict_X_test, verbose=0)
book_model_predictions

array([[101.16806, 111.03617]], dtype=float32)

In [27]:
predict_y_test_df

Unnamed: 0,t,t + 1
7,80,90.0


In [28]:
EvaluationMetric.eval_mse(predict_y_test_df, book_model_predictions, False)

Test MSE: 445.304


# My implementation
- PyTorch using my library

## Forecast model

- Extrapolation of future values `
- Use `70, 80, 90`, so X_test
- Expected `100, 110`. We say expected because we don't know the actual values, thus no y_test. Expected as in we increment by 10.

In [29]:
hidden_size = 100

mlp_forecast_model = MLP(n_steps, hidden_size, output_size)
mlp_forecast_model

MLP(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=100, out_features=2, bias=True)
)

In [30]:
forecast_X_train_df

Unnamed: 0,t - 3,t - 2,t - 1
3,10.0,20.0,30.0
4,20.0,30.0,40.0
5,30.0,40.0,50.0
6,40.0,50.0,60.0
7,50.0,60.0,70.0


In [31]:
forecast_y_train_df

Unnamed: 0,t,t + 1
3,40,50.0
4,50,60.0
5,60,70.0
6,70,80.0
7,80,90.0


In [32]:
criterion = nn.MSELoss()
optimizer = optim.Adam(mlp_forecast_model.parameters())
N_epochs = 2000
configs = [criterion, optimizer, N_epochs]

train_forecast_mlp_model = mlp_forecast_model.train(forecast_X_train_df, forecast_y_train_df, configs)

Epoch [50/2000], Loss: 55.46837615966797
Epoch [100/2000], Loss: 47.05717849731445
Epoch [150/2000], Loss: 40.73920822143555
Epoch [200/2000], Loss: 34.15233612060547
Epoch [250/2000], Loss: 27.485387802124023
Epoch [300/2000], Loss: 21.043737411499023
Epoch [350/2000], Loss: 15.135043144226074
Epoch [400/2000], Loss: 10.141475677490234
Epoch [450/2000], Loss: 6.243488311767578
Epoch [500/2000], Loss: 3.375147581100464
Epoch [550/2000], Loss: 1.7234878540039062
Epoch [600/2000], Loss: 0.8195081949234009
Epoch [650/2000], Loss: 0.3646969795227051
Epoch [700/2000], Loss: 0.15380480885505676
Epoch [750/2000], Loss: 0.06195255368947983
Epoch [800/2000], Loss: 0.02407493256032467
Epoch [850/2000], Loss: 0.00906088761985302
Epoch [900/2000], Loss: 0.003329445840790868
Epoch [950/2000], Loss: 0.0012305721174925566
Epoch [1000/2000], Loss: 0.00043836949043907225
Epoch [1050/2000], Loss: 0.00014950436889193952
Epoch [1100/2000], Loss: 4.8855552449822426e-05
Epoch [1150/2000], Loss: 1.5236348190

In [33]:
X_test_df = forecast_X_test_df.iloc[[-1], -n_steps:].copy()
X_test_df

Unnamed: 0,t - 1,t,t + 1
7,70.0,80,90.0


In [34]:
mlp_forecasts = mlp_forecast_model.predict(X_test_df)
mlp_forecasts

Predicted Outputs: [[103.61579895019531, 114.97189331054688]]


[[103.61579895019531, 114.97189331054688]]

# Prediction model

- Interpolation of in sample values 
- Use `50, 60, 70`, so X_test
- True predictions `80, 90`, so y_test

In [35]:
hidden_size = 100

mlp_predict_model = MLP(n_steps, hidden_size, output_size)
mlp_predict_model

MLP(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=100, out_features=2, bias=True)
)

In [36]:
criterion = nn.MSELoss()
optimizer = optim.Adam(mlp_predict_model.parameters())
N_epochs = 2000
configs = [criterion, optimizer, N_epochs]

train_predict_mlp_model = mlp_predict_model.train(predict_X_train_df, predict_y_train_df, configs)

Epoch [50/2000], Loss: 38.59309768676758
Epoch [100/2000], Loss: 33.279563903808594
Epoch [150/2000], Loss: 27.690452575683594
Epoch [200/2000], Loss: 22.207914352416992
Epoch [250/2000], Loss: 17.012147903442383
Epoch [300/2000], Loss: 12.346281051635742
Epoch [350/2000], Loss: 8.44957160949707
Epoch [400/2000], Loss: 5.435917854309082
Epoch [450/2000], Loss: 3.282123327255249
Epoch [500/2000], Loss: 1.886763334274292
Epoch [550/2000], Loss: 1.039473533630371
Epoch [600/2000], Loss: 0.5480918884277344
Epoch [650/2000], Loss: 0.2766892611980438
Epoch [700/2000], Loss: 0.13389042019844055
Epoch [750/2000], Loss: 0.062112726271152496
Epoch [800/2000], Loss: 0.027594950050115585
Epoch [850/2000], Loss: 0.01172571536153555
Epoch [900/2000], Loss: 0.004769054241478443
Epoch [950/2000], Loss: 0.0018550738459452987
Epoch [1000/2000], Loss: 0.0006900576408952475
Epoch [1050/2000], Loss: 0.00024520314764231443
Epoch [1100/2000], Loss: 8.298102329717949e-05
Epoch [1150/2000], Loss: 2.68484072876

In [37]:
X_test_df = predict_X_test_df.iloc[[-1], -n_steps:].copy()
X_test_df

Unnamed: 0,t - 1,t,t + 1
5,50.0,60,70.0


In [38]:
mlp_predictions = mlp_predict_model.predict(X_test_df)
mlp_predictions

Predicted Outputs: [[82.41029357910156, 94.2548599243164]]


[[82.41029357910156, 94.2548599243164]]

In [39]:
predict_y_test_df

Unnamed: 0,t,t + 1
7,80,90.0


In [40]:
EvaluationMetric.eval_mse(predict_y_test_df, mlp_predictions, False)

Test MSE: 11.957
