# Multi Layer Perceptron Model

- BOOK: [Predict the Future with MLPs, CNNs and LSTMs in Python](https://machinelearningmastery.com/deep-learning-for-time-series-forecasting/) by Jason Brownlee

In [1]:
# !pip3 uninstall -y torch torchvision
# !pip3 install torch torchvision

In [2]:
import os
import sys
import torch

import numpy as np
import pandas as pd

import torch.nn as nn
import torch.optim as optim

# Get the current working directory of the notebook
notebook_dir = os.getcwd()

# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../framework_for_time_series_data/tslearn/'))
from ml_models import MLP
from ts_models import EvaluationMetric

In [3]:
observations = [10, 20, 30, 40, 50, 60, 70, 80, 90]

# Book's implementation

In [4]:
def split_sequence(sequence, n_steps_in, n_steps_out): 
    X, y = list(), list() 
    for i in range(len(sequence)): 
        # find the end of this pattern 
        end_ix = i + n_steps_in 
        out_end_ix = end_ix + n_steps_out 
        # check if we are beyond the sequence 
        if out_end_ix > len(sequence): 
            break
        # gather input and output parts of the pattern 
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix] 
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [5]:
X, y = split_sequence(observations, 3, 2)

In [6]:
X

array([[10, 20, 30],
       [20, 30, 40],
       [30, 40, 50],
       [40, 50, 60],
       [50, 60, 70]])

In [7]:
y

array([[40, 50],
       [50, 60],
       [60, 70],
       [70, 80],
       [80, 90]])

# My implementation

In [8]:
def convert_uts_sequence_to_sml(uts_observations, prior_observations, forecasting_step):
    """Splits a given UTS into multiple input rows where each input row has a specified number of timestamps and the output is a single timestamp.
    
    Parameters:
    uts_observations -- 1D np array (of UTS data to transform to SML data with size  b rows/length x 1 dimension)
    prior_observations -- py int (of all observations before we get to where we want to start making the predictions)
    forecasting_step -- py int (of how far out to forecast, 1 only the next timestamp, 2 the next two timestamps, ... n the next n timestamps)
    
    Return:
    agg.values -- np array (of new sml data)
    """
    
    df = pd.DataFrame(uts_observations)
    cols = list()
    
    lag_col_names = []
    count_lag = 0
    # input sequence (t-n, ... t-1)
    for prior_observation in range(prior_observations, 0, -1):
        # print("prior_observation: ", prior_observation)
        cols.append(df.shift(prior_observation))
        new_col_name = "t - " + str(prior_observation)
        # print(new_col_name)
        lag_col_names.append(new_col_name)
        
    
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, forecasting_step):
        cols.append(df.shift(-i))
        # print(f"t + {i}")
        new_col_name = f"t + {i}"
        # print(new_col_name)
        lag_col_names.append(new_col_name)
        
        # put it all together
        uts_sml_df = pd.concat(cols, axis=1) 
        uts_sml_df.columns=[lag_col_names]
        # drop rows with NaN values
        uts_sml_df.dropna(inplace=True)
    
    # print(uts_sml_df)
    
    # colums to use to make prediction for last col
    X_train = uts_sml_df.iloc[:, 0: -1]
    
    # last column
    y_train = uts_sml_df.iloc[:, [-1]]
    return uts_sml_df

In [9]:
n_steps = 3
output_size = 2
converted_seq_df = convert_uts_sequence_to_sml(observations, n_steps, output_size)

In [10]:
converted_seq_df

Unnamed: 0,t - 3,t - 2,t - 1,t + 0,t + 1
3,10.0,20.0,30.0,40,50.0
4,20.0,30.0,40.0,50,60.0
5,30.0,40.0,50.0,60,70.0
6,40.0,50.0,60.0,70,80.0
7,50.0,60.0,70.0,80,90.0


In [11]:
forecast_X_train_df = converted_seq_df.iloc[:, :n_steps]
forecast_X_train_df

Unnamed: 0,t - 3,t - 2,t - 1
3,10.0,20.0,30.0
4,20.0,30.0,40.0
5,30.0,40.0,50.0
6,40.0,50.0,60.0
7,50.0,60.0,70.0


In [12]:
forecast_y_train_df = converted_seq_df.iloc[:, -output_size:]
forecast_y_train_df

Unnamed: 0,t + 0,t + 1
3,40,50.0
4,50,60.0
5,60,70.0
6,70,80.0
7,80,90.0


In [13]:
forecast_X_test_df = converted_seq_df.iloc[[-1], -n_steps:]
forecast_X_test_df

Unnamed: 0,t - 1,t + 0,t + 1
7,70.0,80,90.0


In [14]:
predict_X_train_df = converted_seq_df.iloc[:-1, :n_steps]
predict_X_train_df

Unnamed: 0,t - 3,t - 2,t - 1
3,10.0,20.0,30.0
4,20.0,30.0,40.0
5,30.0,40.0,50.0
6,40.0,50.0,60.0


In [15]:
predict_y_train_df = converted_seq_df.iloc[:-1, -output_size:]
predict_y_train_df

Unnamed: 0,t + 0,t + 1
3,40,50.0
4,50,60.0
5,60,70.0
6,70,80.0


In [16]:
predict_X_test_df = converted_seq_df.iloc[[-n_steps], -n_steps:]
predict_X_test_df

Unnamed: 0,t - 1,t + 0,t + 1
5,50.0,60,70.0


In [17]:
predict_y_test_df = converted_seq_df.iloc[[-1], -output_size:]
predict_y_test_df

Unnamed: 0,t + 0,t + 1
7,80,90.0


# Book's implementation
- Keras

## Forecast model

In [18]:
from keras.models import Sequential 
from keras.layers import Dense

forecast_model = Sequential() 
forecast_model.add(Dense(100, activation='relu' , input_dim=n_steps)) 
forecast_model.add(Dense(output_size)) 
forecast_model.compile(optimizer='adam' , loss='mse') 

predict_model = Sequential() 
predict_model.add(Dense(100, activation='relu' , input_dim=n_steps)) 
predict_model.add(Dense(output_size)) 
predict_model.compile(optimizer='adam' , loss='mse') 

2024-04-30 15:29:07.829240: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-30 15:29:10.921264: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [19]:
# fit model 
forecast_model.fit(forecast_X_train_df, forecast_y_train_df, epochs=2000, verbose=0)

<keras.callbacks.History at 0x196b017f0>

In [20]:
forecast_X_test = np.array(forecast_X_test_df)
forecast_X_test, forecast_X_test.shape[0]

(array([[70., 80., 90.]]), 1)

In [21]:
X_test = forecast_X_test.reshape((forecast_X_test.shape[0]), n_steps)
X_test

array([[70., 80., 90.]])

In [22]:
forecasts = forecast_model.predict(X_test, verbose=0)
forecasts

array([[103.66067 , 116.744064]], dtype=float32)

## Predict model

In [23]:
predict_model.fit(predict_X_train_df, predict_y_train_df, epochs=2000, verbose=0)

<keras.callbacks.History at 0x196b8cb80>

In [24]:
predict_X_test = np.array(predict_X_test_df)
predict_X_test, predict_X_test.shape[0]

(array([[50., 60., 70.]]), 1)

In [25]:
predict_X_test = forecast_X_test.reshape((predict_X_test.shape[0]), n_steps)
predict_X_test

array([[70., 80., 90.]])

In [26]:
book_model_predictions = predict_model.predict(predict_X_test, verbose=0)
book_model_predictions

array([[100.14402 , 111.925644]], dtype=float32)

In [27]:
predict_y_test_df

Unnamed: 0,t + 0,t + 1
7,80,90.0


In [28]:
EvaluationMetric.eval_mse(predict_y_test_df, book_model_predictions, False)

Test MSE: 443.258


# My implementation
- PyTorch using my library

## Forecast model

In [29]:
hidden_size = 100

mlp_forecast_model = MLP(n_steps, hidden_size, output_size)
mlp_forecast_model

MLP(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=100, out_features=2, bias=True)
)

In [30]:
criterion = nn.MSELoss()
optimizer = optim.Adam(mlp_forecast_model.parameters())
N_epochs = 2000
configs = [criterion, optimizer, N_epochs]

train_forecast_mlp_model = mlp_forecast_model.train(forecast_X_train_df, forecast_y_train_df, configs)

Epoch [100/2000], Loss: 36.22644805908203
Epoch [200/2000], Loss: 26.56416893005371
Epoch [300/2000], Loss: 17.33930778503418
Epoch [400/2000], Loss: 9.631731986999512
Epoch [500/2000], Loss: 4.4308762550354
Epoch [600/2000], Loss: 1.7382373809814453
Epoch [700/2000], Loss: 0.588018000125885
Epoch [800/2000], Loss: 0.17493736743927002
Epoch [900/2000], Loss: 0.04646710678935051
Epoch [1000/2000], Loss: 0.011045346036553383
Epoch [1100/2000], Loss: 0.0023364536464214325
Epoch [1200/2000], Loss: 0.00044467748375609517
Epoch [1300/2000], Loss: 8.580392750445753e-05
Epoch [1400/2000], Loss: 2.5706574888317846e-05
Epoch [1500/2000], Loss: 1.6277368558803573e-05
Epoch [1600/2000], Loss: 1.4381340406544041e-05
Epoch [1700/2000], Loss: 1.3902184036851395e-05
Epoch [1800/2000], Loss: 1.3680057236342691e-05
Epoch [1900/2000], Loss: 1.3506700270227157e-05
Epoch [2000/2000], Loss: 1.3364598089538049e-05


In [31]:
X_test_df = forecast_X_test_df.iloc[[-1], -n_steps:].copy()
X_test_df

Unnamed: 0,t - 1,t + 0,t + 1
7,70.0,80,90.0


In [32]:
X_test = torch.tensor(X_test_df.values, dtype=torch.float32)
X_test

tensor([[70., 80., 90.]])

In [33]:
mlp_forecasts = mlp_forecast_model.predict(X_test, n_steps)
mlp_forecasts

Predicted Outputs: [[102.75798034667969, 113.32259368896484]]


[[102.75798034667969, 113.32259368896484]]

# Prediction model

In [34]:
hidden_size = 100

mlp_predict_model = MLP(n_steps, hidden_size, output_size)
mlp_predict_model

MLP(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=100, out_features=2, bias=True)
)

In [35]:
criterion = nn.MSELoss()
optimizer = optim.Adam(mlp_predict_model.parameters())
N_epochs = 2000
configs = [criterion, optimizer, N_epochs]

train_predict_mlp_model = mlp_predict_model.train(predict_X_train_df, predict_y_train_df, configs)

Epoch [100/2000], Loss: 48.921600341796875
Epoch [200/2000], Loss: 39.95693588256836
Epoch [300/2000], Loss: 31.367080688476562
Epoch [400/2000], Loss: 23.14542007446289
Epoch [500/2000], Loss: 15.820924758911133
Epoch [600/2000], Loss: 9.960196495056152
Epoch [700/2000], Loss: 5.5983805656433105
Epoch [800/2000], Loss: 2.803992748260498
Epoch [900/2000], Loss: 1.245558500289917
Epoch [1000/2000], Loss: 0.4889838695526123
Epoch [1100/2000], Loss: 0.17084556818008423
Epoch [1200/2000], Loss: 0.054223913699388504
Epoch [1300/2000], Loss: 0.016543976962566376
Epoch [1400/2000], Loss: 0.005681116133928299
Epoch [1500/2000], Loss: 0.0028130884747952223
Epoch [1600/2000], Loss: 0.0018349839374423027
Epoch [1700/2000], Loss: 0.0014006347628310323
Epoch [1800/2000], Loss: 0.0012063574977219105
Epoch [1900/2000], Loss: 0.0011178140994161367
Epoch [2000/2000], Loss: 0.0010746007319539785


In [36]:
X_test_df = predict_X_test_df.iloc[[-1], -n_steps:].copy()
X_test_df

Unnamed: 0,t - 1,t + 0,t + 1
5,50.0,60,70.0


In [37]:
X_test = torch.tensor(X_test_df.values, dtype=torch.float32)
X_test

tensor([[50., 60., 70.]])

In [38]:
mlp_predictions = mlp_predict_model.predict(X_test, n_steps)
mlp_predictions

Predicted Outputs: [[80.83460998535156, 90.81647491455078]]


[[80.83460998535156, 90.81647491455078]]

In [39]:
predict_y_test_df

Unnamed: 0,t + 0,t + 1
7,80,90.0


In [40]:
EvaluationMetric.eval_mse(predict_y_test_df, mlp_predictions, False)

Test MSE: 0.682
