In [1]:
import scipy
matlab = scipy.io.loadmat('Xtrain.mat')

In [2]:
X_train = matlab['Xtrain']

In [3]:
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# Convert the data to a list
X_train_list = X_train.flatten().tolist()
print(X_train_list)

# Plot a line of the values in the training set
fig = px.line(y=X_train_list, x=range(len(X_train_list)), title='Line Plot of X_train Values')
fig.update_layout(xaxis_title='Index', yaxis_title='Value') 
fig.show()

In [5]:
import pandas as pd

In [None]:
def create_features(timeseries_array, n_lags):
    df = pd.DataFrame(timeseries_array, columns=['label'])
    for i in range(1, n_lags + 1):
        df[f'lag {i}'] = df['label'].shift(i)
    df = df.dropna()

    X = df.drop(columns=['label']).to_numpy()
    y = df['label'].to_numpy()

    return X, y

X, y = create_features(X_train_list, 10)

print(X.shape)
print(y.shape)

In [None]:
def expanding_window_cv_sets(X, y, folds: int, validation_split_ratio: float = 0.2):
    
    n = len(X)
    fold_size = int(n / folds)

    for fold in range(folds):
        start_fold = fold * fold_size
        end_fold = start_fold + fold_size

        training_start = 0
        training_end = int(end_fold - fold_size * validation_split_ratio)

        validation_start = training_end

        training_set = X[training_start:training_end], y[training_start:training_end]
        validation_set = X[validation_start:end_fold], y[validation_start:end_fold]
        yield training_set, validation_set

# Example usage
folds = 5
for train_set, val_set in expanding_window_cv_sets(X, y, folds):
    train_X, train_y = train_set
    val_X, val_y = val_set

    print("Training Set:")
    print(train_X.shape, train_y.shape)
    print("Validation Set:")
    print(val_X.shape, val_y.shape)
    print("\n")


In [8]:
# Create two tables to store the results of CV grid search
epoch_grid_search_results = pd.DataFrame(columns=['epoch', 'fold', 'MSE', 'MAE'])
grid_search_results = pd.DataFrame(columns=['hidden_units', 'lags', 'fold', 'MSE', 'MAE'])

In [9]:
FOLDS = 5
EPOCHS = [10, 20, 50, 100, 500, 1000]
LAGS = [5, 15, 25, 35, 50]
HIDDEN_UNITS = [5, 10, 20, 50, 100]

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error

X, y = create_features(X_train_list, 25)

for epoch in EPOCHS:
    print("Epoch: ", epoch)
    for fold, (train_set, val_set) in enumerate(expanding_window_cv_sets(X, y, folds)):

        train_X, train_y = train_set
        val_X, val_y = val_set
        
        # Reshape the data to be 3D for LSTM input
        train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
        val_X = val_X.reshape((val_X.shape[0], 1, val_X.shape[1]))

        # First, we are gonna fix the LSTM to 10 hidden units and grid search the epochs

        # Define the lSTM model with 15 hidden units
        model = Sequential([
            LSTM(10, activation='relu', input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=False),
            Dense(1)
        ])

        model.compile(optimizer='adam', loss='mse')
        model.fit(train_X, train_y, epochs=epoch, batch_size=32, verbose=0)

        # Recursively predict the validation set, so start with the first example of the validation set and use the model to predict the next value, then use that value to predict the next one, and so on.
        current_input = val_X[0].reshape((1, 1, val_X.shape[2]))
        y_pred = []

        for i in range(len(val_y)):
            new_y = model.predict(current_input)
            y_pred.append(new_y[0][0])
            # Remove the first value of the current input and append the new value to the end of the input sequence
            current_input = np.append(current_input[:, :, 1:], new_y.reshape((1, 1, 1)), axis=2)
        
        y_pred = np.array(y_pred).reshape(-1, 1)
        print("Epochs: ", epoch)
        print("Standardized MSE: ", mean_squared_error(val_y, y_pred))
        print("Standardized MAE: ", np.mean(np.abs(val_y - y_pred)))
        print("\n")
        
        # Append the results to the epoch grid search results table
        epoch_grid_search_results = pd.concat([epoch_grid_search_results, pd.DataFrame({'fold': [fold],'epoch': [epoch], 'MSE': [mean_squared_error(val_y, y_pred)], 'MAE': [np.mean(np.abs(val_y - y_pred))]})], ignore_index=True)

In [None]:
display(epoch_grid_search_results)
epoch_grid_search_results.to_csv('epoch_grid_search_results.csv', index=False)


In [12]:
import pandas as pd
epoch_grid_search_results = pd.read_csv('epoch_grid_search_results.csv')

In [13]:
epoch_average = epoch_grid_search_results.groupby('epoch').agg({'MSE': 'mean', 'MAE': 'mean'})
# Write the average results to a CSV file
epoch_average.to_csv('epoch_average.csv', index=False)
epoch_average = pd.read_csv('epoch_average.csv')


In [None]:
BEST_EPOCH = int(epoch_grid_search_results.loc[epoch_grid_search_results['MSE'].idxmin()]['epoch'])
print("Best epoch: ", BEST_EPOCH)

In [None]:

for lags in LAGS:
    print(f"Lag: {lags}")
    X, y = create_features(X_train_list, lags)

    # Expanding window cross-validation
    for fold, (train_set, validation_set) in enumerate(expanding_window_cv_sets(X, y, FOLDS)):
        train_X, train_y = train_set
        val_X, val_y = validation_set

        # Reshape the data to be 3D for LSTM input
        train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
        val_X = val_X.reshape((val_X.shape[0], 1, val_X.shape[1]))

        for hidden_units in HIDDEN_UNITS:
            # Define the LSTM model with the current number of hidden units
            model = Sequential([
                LSTM(hidden_units, activation='relu', input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=False),
                Dense(1)
            ])

            model.compile(optimizer='adam', loss='mse')
            model.fit(train_X, train_y, epochs=BEST_EPOCH, batch_size=32, verbose=0)

            # Recursively predict the validation set
            current_input = val_X[0].reshape((1, 1, val_X.shape[2]))
            y_pred = []

            for i in range(len(val_y)):
                new_y = model.predict(current_input)
                y_pred.append(new_y[0][0])
                current_input = np.append(current_input[:, :, 1:], new_y.reshape((1, 1, 1)), axis=2)

            y_pred = np.array(y_pred).reshape(-1, 1)
            print(f"Lag: {lags}, Hidden Units: {hidden_units}")
            print("Standardized MSE: ", mean_squared_error(val_y, y_pred))
            print("Standardized MAE: ", np.mean(np.abs(val_y - y_pred)))
            print("\n")

            # Append the results to the lag grid search results table
            grid_search_results = pd.concat([grid_search_results, pd.DataFrame({'fold': [folds], 'lag': [lags], 'hidden_units': [hidden_units], 'MSE': [mean_squared_error(val_y, y_pred)], 'MAE': [np.mean(np.abs(val_y - y_pred))]})], ignore_index=True)

In [None]:
display(grid_search_results)
grid_search_results.to_csv('grid_search_results.csv', index=False)

average_lag_units = grid_search_results.groupby(['lag', 'hidden_units']).agg({'MSE': 'mean', 'MAE': 'mean'})
display(average_lag_units)
# Write the average results to a CSV file
average_lag_units.to_csv('average_lag_units.csv', index=False)
average = pd.read_csv('average_lag_units.csv')


In [17]:
grid_search_results = pd.read_csv('grid_search_results.csv')
average_lag_units = pd.read_csv('average_lag_units.csv')

In [None]:
matlab2 = scipy.io.loadmat('Xtest.mat')
X_matlab_test = matlab2['Xtest']
X_test_list = X_matlab_test.flatten().tolist()

concat_train_test = X_train_list + X_test_list
fig = px.line(y=concat_train_test, x=range(len(concat_train_test)), title='Line Plot of X_train and X_test Values')
fig.update_layout(xaxis_title='Index', yaxis_title='Value')
fig.show()

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from sklearn.metrics import mean_squared_error

number_of_lags = 35

X, y = create_features(X_train_list, number_of_lags)
X = X.reshape((X.shape[0], 1, X.shape[1]))
y = y.reshape((y.shape[0], 1))

# 16 units uit grid search 
model = Sequential([
    Input(shape=(X.shape[1], X.shape[2])),
    LSTM(50, activation='relu', dropout=0.2, return_sequences=True),
    Dense(1)
])

# Epochs uit initele grid search
model.compile(optimizer='adam', loss='mse')
model.fit(X, y, epochs=50, batch_size=32, verbose=0)

# Now test the model on the test set, first create the features for the test set
X_test, y_test = create_features(X_test_list, number_of_lags)
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
y_test = y_test.reshape((y_test.shape[0], 1))

# Recursively predict the test set
current_input = X_test[0].reshape((1, 1, X_test.shape[2]))
y_test_pred = []

for i in range(len(y_test)):
    new_y = model.predict(current_input, verbose=0)
    y_test_pred.append(new_y[0][0])
    current_input = np.append(current_input[:, :, 1:], new_y.reshape((1, 1, 1)), axis=2)

y_test_pred = np.array(y_test_pred).reshape(-1, 1)

from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(y_test, y_test_pred)
mae = mean_absolute_error(y_test, y_test_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")


# Plot the predictions vs actual values
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(y=y_test.flatten(), mode='lines', name='Actual Values'))
fig.add_trace(go.Scatter(y=y_test_pred.flatten(), mode='lines', name='Predicted Values'))
fig.update_layout(title='LSTM Predictions vs Actual Values', xaxis_title='Index', yaxis_title='Value')
fig.show()




In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=y_test.flatten(), mode='lines', name='Actual Values'))
fig.add_trace(go.Scatter(y=y_test_pred.flatten(), mode='lines', name='Predicted Values'))

fig.update_layout(
    xaxis_title='Time',
    yaxis_title='Value',
    legend=dict(
        x=0.98,
        y=0.98,
        xanchor='right',
        yanchor='top',
        bgcolor='rgba(255, 255, 255, 0.7)',  # Optional: translucent white background
        bordercolor='black',
        borderwidth=1
    )
)

fig.show()
