## Modeling

In [3]:
import pandas as pd
import numpy as np

# Load preprocessed data
train_df = pd.read_pickle("train_df.pkl")
test_df  = pd.read_pickle("test_df.pkl")

In [8]:
# Build sequences

features = ["Open", "High", "Low", "Close", "Volume"]

def make_sequences(df, window=30, horizon=1):
    """
    Converts time-series data into sequences for an RNN.

    window  = how many past days the model sees (30)
    horizon = how many days into the future we predict (1, 5, 10)
    """
    X, y = [], []

    for stock in df["Stock"].unique():
        s = df[df["Stock"] == stock]

        data = s[features].values
        target = s["Return"].values

        for i in range(window, len(s) - horizon):
            X.append(data[i-window:i])
            y.append(target[i+horizon])

    return np.array(X), np.array(y)

In [None]:
WINDOW = 30
HORIZON = 1

X_train, y_train = make_sequences(train_df, WINDOW, HORIZON)
X_test, y_test   = make_sequences(test_df, WINDOW, HORIZON)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(144799, 30, 5) (144799,)
(640, 30, 5) (640,)
