In [None]:
import numpy as np
import pandas as pd
import torch

from pathlib import Path
from sklearn.preprocessing import MinMaxScaler

from models.StackedAutoEncoder import SimpleAutoEncoder, AutoEncoder, StackedAutoEncoder
from models.LSTM import LSTM, GRU

In [None]:
data = pd.read_excel(Path("model_data/RawData.xlsx"), "S&P500 Index Data")
features = list(data.columns[2:])
# data[features] = data[features] / data[features].shift(1)
data.dropna(inplace=True)
train = data.loc[(data['Ntime'] >= 20120101) & (data['Ntime'] < 20140101)]
val = data.loc[(data['Ntime'] >= 20140101) & (data['Ntime'] < 20140401)]
test = data.loc[(data['Ntime'] >= 20140401) & (data['Ntime'] < 20140701)]
data = data[features]
data['target'] = data['Close Price'].shift(-1)
data.head()

In [None]:
scaler = MinMaxScaler()
scaler.fit(train)
train = scaler.transform(train)

In [None]:
np.save("train.npy", train)
np.save("val.npy", val)
np.save("test.npy", test)

In [None]:
def split_data(stock, lookback):
    data_raw = stock
    data = []
    for index in range(len(data_raw) - lookback): 
        data.append(data_raw[index: index + lookback])
    data = np.array(data)
    x = data[:, :-1]
    y = data[:, -1, :]
    x = torch.tensor(x, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)
    return x, y

x_train, y_train = split_data(train, lookback=20)
x_train.shape

In [None]:
input_size = 21
hidden_size = 32
num_layers = 2
output_size = 1
dropout = 0.2
num_epochs = 10_000

model = LSTM(input_size=21, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size, dropout=dropout)
model.fit(x_train, y_train)

In [None]:
model = GRU(input_size=21, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size, dropout=dropout)
model.fit(x_train, y_train)