# How to use PyTorch LSTMs/GRUs for time series regression
We aim to forcast stock price value by using LSTM with Pytorch.

credit: https://github.com/CrosstabKite/lstm-forecasting/blob/master/lstm_forecasting.ipynb

modified on (12/02/2023)

# Data

In [None]:
!pip install torchinfo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.2-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.2


In [None]:
!wget https://github.com/pvateekul/2110446_DS_2022s2/raw/main/code/Week05_Intro_Deep_learning/data/GOOG.csv

--2023-02-13 13:55:56--  https://github.com/pvateekul/2110446_DS_2022s2/raw/main/code/Week05_Intro_Deep_learning/data/GOOG.csv
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/pvateekul/2110446_DS_2022s2/main/code/Week05_Intro_Deep_learning/data/GOOG.csv [following]
--2023-02-13 13:55:56--  https://raw.githubusercontent.com/pvateekul/2110446_DS_2022s2/main/code/Week05_Intro_Deep_learning/data/GOOG.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 284248 (278K) [text/plain]
Saving to: ‘GOOG.csv.1’


2023-02-13 13:55:57 (25.0 MB/s) - ‘GOOG.csv.1’ saved [284248/284248]



In [None]:
import pandas as pd

df = pd.read_csv('GOOG.csv', index_col="Date")
df = df.drop(['Adj Close'], axis = 1)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-08-19,49.813286,51.835709,47.800831,49.982655,44871300
2004-08-20,50.316402,54.336334,50.062355,53.952770,22942800
2004-08-23,55.168217,56.528118,54.321388,54.495735,18342800
2004-08-24,55.412300,55.591629,51.591621,52.239193,15319700
2004-08-25,52.284027,53.798351,51.746044,52.802086,9232100
...,...,...,...,...,...
2019-09-30,1220.969971,1226.000000,1212.300049,1219.000000,1404100
2019-10-01,1219.000000,1231.229980,1203.579956,1205.099976,1273500
2019-10-02,1196.979980,1196.979980,1171.290039,1176.630005,1615100
2019-10-03,1180.000000,1189.060059,1162.430054,1187.829956,1621200


In [None]:
df.loc['2019-02-26']

Open      1.105750e+03
High      1.119510e+03
Low       1.099920e+03
Close     1.115130e+03
Volume    1.471300e+06
Name: 2019-02-26, dtype: float64

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

plot_template = dict(
    layout=go.Layout({
        "font_size": 18,
        "xaxis_title_font_size": 24,
        "yaxis_title_font_size": 24})
)

fig = px.line(df['Open'], labels=dict(
    created_at="Date", value="Open", variable="Sensor"
))
fig.update_layout(
  template=plot_template, legend=dict(orientation='h', y=1.02, title_text="")
)
fig.show()

## Create the target variable

In [None]:
target_col = "Open"
features = list(df.columns.difference([target_col]))

forecast_lead = 1
target = f"{target_col}_lead{forecast_lead}"

df[target] = df[target_col].shift(-forecast_lead)
df = df.iloc[:-forecast_lead]

In [None]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Open_lead1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-08-19,49.813286,51.835709,47.800831,49.982655,44871300,50.316402
2004-08-20,50.316402,54.336334,50.062355,53.952770,22942800,55.168217
2004-08-23,55.168217,56.528118,54.321388,54.495735,18342800,55.412300
2004-08-24,55.412300,55.591629,51.591621,52.239193,15319700,52.284027
2004-08-25,52.284027,53.798351,51.746044,52.802086,9232100,52.279045
...,...,...,...,...,...,...
2019-09-27,1243.010010,1244.020020,1214.449951,1225.089966,1353900,1220.969971
2019-09-30,1220.969971,1226.000000,1212.300049,1219.000000,1404100,1219.000000
2019-10-01,1219.000000,1231.229980,1203.579956,1205.099976,1273500,1196.979980
2019-10-02,1196.979980,1196.979980,1171.290039,1176.630005,1615100,1180.000000


## Create a hold-out test set and preprocess the data

In [None]:
test_start = "2019-01-01"
val_start = "2018-01-01"

df_train = df.loc[:val_start].copy()
df_val = df.loc[val_start:test_start].copy()
df_test = df.loc[test_start:].copy()

print("Test set fraction:", len(df_test) / len(df))

Test set fraction: 0.050157563025210086


## Standardize the features and target, based on the training set

In [None]:
target_mean = df_train[target].mean()
target_stdev = df_train[target].std()

for c in df_train.columns:
    mean = df_train[c].mean()
    stdev = df_train[c].std()

    df_train[c] = (df_train[c] - mean) / stdev
    df_val[c] = (df_val[c] - mean) / stdev
    df_test[c] = (df_test[c] - mean) / stdev

## Create datasets that PyTorch `DataLoader` can work with

In [None]:
import torch
from torch.utils.data import Dataset

class SequenceDataset(Dataset):
    def __init__(self, dataframe, target, features, sequence_length=5):
        self.features = features
        self.target = target
        self.sequence_length = sequence_length
        self.y = torch.tensor(dataframe[self.target].values).float()
        self.X = torch.tensor(dataframe[self.features].values).float()

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, i): 
        if i >= self.sequence_length - 1:
            i_start = i - self.sequence_length + 1
            x = self.X[i_start:(i + 1), :]
        else:
            padding = self.X[0].repeat(self.sequence_length - i - 1, 1)
            x = self.X[0:(i + 1), :]
            x = torch.cat((padding, x), 0)

        return x, self.y[i]

In [None]:
i = 27
sequence_length = 4

train_dataset = SequenceDataset(
    df_train,
    target=target,
    features=features,
    sequence_length=sequence_length
)

X, y = train_dataset[i]
print(X)

tensor([[-1.4005, -1.4044, -1.4018,  0.0794],
        [-1.4026, -1.4013, -1.3959,  0.1501],
        [-1.4059, -1.4081, -1.4001, -0.0974],
        [-1.3878, -1.3944, -1.3950,  1.0892]])


In [None]:
X, y = train_dataset[i + 1]
print(X)

tensor([[-1.4026, -1.4013, -1.3959,  0.1501],
        [-1.4059, -1.4081, -1.4001, -0.0974],
        [-1.3878, -1.3944, -1.3950,  1.0892],
        [-1.3789, -1.3784, -1.3822,  2.7240]])


In [None]:
print(df_train[features].iloc[(i - sequence_length + 1): (i + 1)])

               Close      High       Low    Volume
Date                                              
2004-09-23 -1.400509 -1.404409 -1.401750  0.079391
2004-09-24 -1.402597 -1.401323 -1.395935  0.150113
2004-09-27 -1.405909 -1.408082 -1.400095 -0.097414
2004-09-28 -1.387767 -1.394397 -1.394980  1.089247


In [None]:
from torch.utils.data import DataLoader
torch.manual_seed(99)

train_loader = DataLoader(train_dataset, batch_size=3, shuffle=True)

X, y = next(iter(train_loader))
print(X.shape)
print(X)

torch.Size([3, 4, 4])
tensor([[[-1.0283e+00, -1.0363e+00, -1.0355e+00,  3.3244e+00],
         [-1.0139e+00, -1.0227e+00, -1.0264e+00,  3.3321e+00],
         [-1.0183e+00, -1.0127e+00, -1.0091e+00,  3.6286e+00],
         [-1.0379e+00, -1.0229e+00, -1.0300e+00,  3.4512e+00]],

        [[-3.0608e-01, -3.1554e-01, -3.1614e-01,  2.0066e+00],
         [-2.9534e-01, -2.7960e-01, -2.8488e-01,  2.8479e+00],
         [-2.8260e-01, -2.8693e-01, -2.9963e-01,  6.5448e-01],
         [-2.2982e-01, -2.3949e-01, -2.4929e-01,  6.8556e-01]],

        [[-4.5368e-03, -1.7926e-03,  1.3788e-03, -4.5791e-01],
         [-3.9661e-02, -1.2372e-02, -3.4364e-02, -1.4888e-01],
         [ 3.1979e-02,  2.4656e-02, -2.3751e-02,  4.4746e-01],
         [ 3.2486e-02,  2.5747e-02, -5.2009e-03, -2.5444e-01]]])


## Create the datasets and data loaders for real

In this tutorial we will
use sequences of length 60 (60 days) to forcast 1 day ahead.

The PyTorch `DataLoader` is a very convenient way to iterate through these datasets. For
the training set we'll shuffle (the rows *within* each training sequence are not
shuffled, only the order in which we draw those blocks). For the test set, shuffling
isn't necessary.

In [None]:
torch.manual_seed(101)

batch_size = 32
sequence_length = 60

train_dataset = SequenceDataset(
    df_train,
    target=target,
    features=features,
    sequence_length=sequence_length
)
val_dataset = SequenceDataset(
    df_val,
    target=target,
    features=features,
    sequence_length=sequence_length
)
test_dataset = SequenceDataset(
    df_test,
    target=target,
    features=features,
    sequence_length=sequence_length
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

X, y = next(iter(train_loader))

print("Features shape:", X.shape)
print("Target shape:", y.shape)

Features shape: torch.Size([32, 60, 4])
Target shape: torch.Size([32])


# LSTM

## The model and learning algorithm

![picture](https://i.stack.imgur.com/SjnTl.png)

Credit : https://stackoverflow.com/questions/48302810/whats-the-difference-between-hidden-and-output-in-pytorch-lstm 

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
from torch import nn

class ShallowRegressionLSTM(nn.Module):
    def __init__(self, num_features, hidden_units):
        super().__init__()
        self.num_features = num_features  # this is the number of features
        self.hidden_units = hidden_units 
        self.num_layers = 4

        self.lstm = nn.LSTM(
            input_size=num_features,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=1)

    def forward(self, x):
        batch_size = x.shape[0]

        # initialize the hidden and cell state of the LSTM layer
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).to(device).requires_grad_()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).to(device).requires_grad_()
        
        _, (hn, _) = self.lstm(x, (h0, c0))
        out = self.linear(hn[-1]).flatten()  # get the output of the last hidden layer
        return out


In [None]:
learning_rate = 5e-4
num_hidden_units = 60

model = ShallowRegressionLSTM(num_features=len(features), hidden_units=num_hidden_units)
model.to(device)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
from torchinfo import summary
summary(model, input_size=(32, 60, 4))

Layer (type:depth-idx)                   Output Shape              Param #
ShallowRegressionLSTM                    [32]                      --
├─LSTM: 1-1                              [32, 60, 60]              103,680
├─Linear: 1-2                            [32, 1]                   61
Total params: 103,741
Trainable params: 103,741
Non-trainable params: 0
Total mult-adds (M): 199.07
Input size (MB): 0.03
Forward/backward pass size (MB): 0.92
Params size (MB): 0.41
Estimated Total Size (MB): 1.37

## Train

In [None]:
from tqdm.notebook import tqdm

In [None]:
def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss = 0
    model.train()
    
    for X, y in data_loader:
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        loss = loss_function(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")

def test_model(data_loader, model, loss_function, best_val_loss):
    
    num_batches = len(data_loader)
    total_loss = 0

    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            X = X.to(device)
            y = y.to(device)
            output = model(X)
            total_loss += loss_function(output, y).item()

    avg_loss = total_loss / num_batches
    print(f"Test loss: {avg_loss}")
    if avg_loss < best_val_loss:
        best_val_loss = avg_loss
        torch.save(model.state_dict(), 'model.pth')
        print('Save new best model')
    return best_val_loss

In [None]:
best_val_loss = torch.inf
for ix_epoch in tqdm(range(100)):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(train_loader, model, loss_function, optimizer=optimizer)
    best_val_loss = test_model(val_loader, model, loss_function, best_val_loss)
    print()

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch 0
---------
Train loss: 0.2915588092621205
Test loss: 0.4108197595924139
Save new best model

Epoch 1
---------
Train loss: 0.007845104992705977
Test loss: 0.11720327101647854
Save new best model

Epoch 2
---------
Train loss: 0.005768874090138542
Test loss: 0.10972362151369452
Save new best model

Epoch 3
---------
Train loss: 0.005003157642863269
Test loss: 0.09268538013566285
Save new best model

Epoch 4
---------
Train loss: 0.004642315307755093
Test loss: 0.07954309927299619
Save new best model

Epoch 5
---------
Train loss: 0.004899778800911077
Test loss: 0.10576264630071819

Epoch 6
---------
Train loss: 0.004388660702081221
Test loss: 0.07900401763617992
Save new best model

Epoch 7
---------
Train loss: 0.0038102213952268633
Test loss: 0.06791550852358341
Save new best model

Epoch 8
---------
Train loss: 0.0034644222815020257
Test loss: 0.057397327735088766
Save new best model

Epoch 9
---------
Train loss: 0.0037725965694506776
Test loss: 0.06026554189156741

Epoch 10


## Evaluation

In [None]:
def predict(data_loader, model):
    """Just like `test_loop` function but keep track of the outputs instead of the loss
    function.
    """
    output = torch.tensor([])
    model.eval()
    with torch.no_grad():
        for X, _ in data_loader:
            X = X.to(device)
            y_star = model(X)
            output = torch.cat((output, y_star.detach().cpu()), 0)
    
    return output

In [None]:
PATH = './model.pth'
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [None]:
train_eval_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

ystar_col = "Model forecast"
df_train[ystar_col] = predict(train_eval_loader, model).numpy()
df_val[ystar_col] = predict(val_loader, model).numpy()
df_test[ystar_col] = predict(test_loader, model).numpy()

df_out = pd.concat((df_train, df_val, df_test))[[target, ystar_col]]

for c in df_out.columns:
    df_out[c] = df_out[c] * target_stdev + target_mean

print(df_out)

             Open_lead1  Model forecast
Date                                   
2004-08-19    50.316402       52.646362
2004-08-20    55.168217       52.799988
2004-08-23    55.412300       54.844299
2004-08-24    52.284027       53.112579
2004-08-25    52.279045       52.587830
...                 ...             ...
2019-09-27  1220.969971     1203.750610
2019-09-30  1219.000000     1200.191162
2019-10-01  1196.979980     1195.909912
2019-10-02  1180.000000     1181.464722
2019-10-03  1191.890015     1176.163696

[3808 rows x 2 columns]


In [None]:
import numpy as np
import math
from sklearn.metrics import mean_squared_error

def MAPE(Y_actual,Y_Predicted):
    mape = np.mean(np.abs((Y_actual - Y_Predicted)/Y_actual))*100
    return mape

print( 'MPAE =', MAPE(df_test['Open_lead1'], df_test['Model forecast']) )
print( 'RMSE =', math.sqrt(mean_squared_error(df_test['Open_lead1'], df_test['Model forecast'])) )

MPAE = 1.9260358169336265
RMSE = 0.08594003132834233


In [None]:
fig = px.line(df_out, labels={'value': "Open", 'created_at': 'Date'})
fig.add_vline(x=val_start, line_width=4, line_dash="dash")
fig.add_vline(x=test_start, line_width=4, line_dash="dash")
# fig.add_annotation(xref="paper", x=0.75, yref="paper", y=0.8, text="Test set start", showarrow=False)
fig.update_layout(
  template=plot_template, legend=dict(orientation='h', y=1.02, title_text="")
)
fig.show()

# GRU

## The model and learning algorithm

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
from torch import nn

class ShallowRegressionGRU(nn.Module):
    def __init__(self, num_features, hidden_units):
        super().__init__()
        self.num_features = num_features  # this is the number of features
        self.hidden_units = hidden_units 
        self.num_layers = 4

        self.gru = nn.GRU(
            input_size=num_features,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=1)

    def forward(self, x):
        batch_size = x.shape[0]

        # initialize the hidden and cell state of the LSTM layer
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).to(device).requires_grad_()
        
        _, hn = self.gru(x, h0)
        out = self.linear(hn[-1]).flatten()  # get the output of the last hidden layer
        return out


In [None]:
learning_rate = 5e-4
num_hidden_units = 60

model = ShallowRegressionGRU(num_features=len(features), hidden_units=num_hidden_units)
model.to(device)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
from torchinfo import summary
summary(model, input_size=(32, 60, 4))

Layer (type:depth-idx)                   Output Shape              Param #
ShallowRegressionGRU                     [32]                      --
├─GRU: 1-1                               [32, 60, 60]              77,760
├─Linear: 1-2                            [32, 1]                   61
Total params: 77,821
Trainable params: 77,821
Non-trainable params: 0
Total mult-adds (M): 149.30
Input size (MB): 0.03
Forward/backward pass size (MB): 0.92
Params size (MB): 0.31
Estimated Total Size (MB): 1.26

## Train

In [None]:
from tqdm.notebook import tqdm

In [None]:
def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss = 0
    model.train()
    
    for X, y in data_loader:
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        loss = loss_function(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")

def test_model(data_loader, model, loss_function, best_val_loss):
    
    num_batches = len(data_loader)
    total_loss = 0

    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            X = X.to(device)
            y = y.to(device)
            output = model(X)
            total_loss += loss_function(output, y).item()

    avg_loss = total_loss / num_batches
    print(f"Test loss: {avg_loss}")
    if avg_loss < best_val_loss:
        best_val_loss = avg_loss
        torch.save(model.state_dict(), 'model_gru.pth')
        print('Save new best model')
    return best_val_loss

In [None]:
best_val_loss = torch.inf
for ix_epoch in tqdm(range(100)):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(train_loader, model, loss_function, optimizer=optimizer)
    best_val_loss = test_model(val_loader, model, loss_function, best_val_loss)
    print()

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch 0
---------
Train loss: 0.13306787341639822
Test loss: 0.3699864372611046
Save new best model

Epoch 1
---------
Train loss: 0.007901101493944396
Test loss: 0.19157686922699213
Save new best model

Epoch 2
---------
Train loss: 0.004264305023924571
Test loss: 0.11873267078772187
Save new best model

Epoch 3
---------
Train loss: 0.002774550795845055
Test loss: 0.03328921762295067
Save new best model

Epoch 4
---------
Train loss: 0.0023553188426753964
Test loss: 0.030856473778840154
Save new best model

Epoch 5
---------
Train loss: 0.0017743898733554161
Test loss: 0.04075582302175462

Epoch 6
---------
Train loss: 0.0015879208957864288
Test loss: 0.03599705174565315

Epoch 7
---------
Train loss: 0.001559625197430405
Test loss: 0.026783364999573678
Save new best model

Epoch 8
---------
Train loss: 0.0013402292940365853
Test loss: 0.031298818765208125

Epoch 9
---------
Train loss: 0.001172321058923976
Test loss: 0.02011057111667469
Save new best model

Epoch 10
---------
Train 

## Evaluation

In [None]:
def predict(data_loader, model):
    """Just like `test_loop` function but keep track of the outputs instead of the loss
    function.
    """
    output = torch.tensor([])
    model.eval()
    with torch.no_grad():
        for X, _ in data_loader:
            X = X.to(device)
            y_star = model(X)
            output = torch.cat((output, y_star.detach().cpu()), 0)
    
    return output

In [None]:
PATH = './model_gru.pth'
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [None]:
train_eval_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

ystar_col = "Model forecast"
df_train[ystar_col] = predict(train_eval_loader, model).numpy()
df_val[ystar_col] = predict(val_loader, model).numpy()
df_test[ystar_col] = predict(test_loader, model).numpy()

df_out = pd.concat((df_train, df_val, df_test))[[target, ystar_col]]

for c in df_out.columns:
    df_out[c] = df_out[c] * target_stdev + target_mean

print(df_out)

             Open_lead1  Model forecast
Date                                   
2004-08-19    50.316402       47.450989
2004-08-20    55.168217       45.702698
2004-08-23    55.412300       48.377167
2004-08-24    52.284027       45.704132
2004-08-25    52.279045       45.685181
...                 ...             ...
2019-09-27  1220.969971     1199.030273
2019-09-30  1219.000000     1195.956421
2019-10-01  1196.979980     1191.044434
2019-10-02  1180.000000     1175.459839
2019-10-03  1191.890015     1174.728760

[3808 rows x 2 columns]


In [None]:
import numpy as np
import math
from sklearn.metrics import mean_squared_error

def MAPE(Y_actual,Y_Predicted):
    mape = np.mean(np.abs((Y_actual - Y_Predicted)/Y_actual))*100
    return mape

print( 'MPAE =', MAPE(df_test['Open_lead1'], df_test['Model forecast']) )
print( 'RMSE =', math.sqrt(mean_squared_error(df_val['Open_lead1'], df_val['Model forecast'])) )

MPAE = 1.9992088811782285
RMSE = 0.08901433084995505


In [None]:
fig = px.line(df_out, labels={'value': "Open", 'created_at': 'Date'})
fig.add_vline(x=val_start, line_width=4, line_dash="dash")
fig.add_vline(x=test_start, line_width=4, line_dash="dash")
# fig.add_annotation(xref="paper", x=0.75, yref="paper", y=0.8, text="Test set start", showarrow=False)
fig.update_layout(
  template=plot_template, legend=dict(orientation='h', y=1.02, title_text="")
)
fig.show()