Ethan Bartiromo Final Project Notebook
4-15-2025

Here we import all the needed libraries:

In [38]:
from statsmodels.tsa.seasonal import seasonal_decompose
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_squared_error
import xgboost as xgb
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import MinMaxScaler

Here we load in the datasets and combined them:

In [2]:
df_aapl = pd.read_csv("AAPL.csv")
df_googl = pd.read_csv("GOOGL.csv")
df_meta = pd.read_csv("META.csv")
df_nvda = pd.read_csv("NVDA.csv")
df_tsla = pd.read_csv("TSLA.csv")
df_aapl["Date"] = pd.to_datetime(df_aapl["Date"], format="%Y-%m-%d")
df_googl["Date"] = pd.to_datetime(df_googl["Date"], format="%Y-%m-%d")
df_meta["Date"] = pd.to_datetime(df_meta["Date"], format="%Y-%m-%d")
df_nvda["Date"] = pd.to_datetime(df_nvda["Date"], format="%Y-%m-%d")
df_tsla["Date"] = pd.to_datetime(df_tsla["Date"], format="%Y-%m-%d")
df_aapl["Volume"] = df_aapl["Volume"].astype(float)
df_googl["Volume"] = df_googl["Volume"].astype(float)
df_meta["Volume"] = df_meta["Volume"].astype(float)
df_nvda["Volume"] = df_nvda["Volume"].astype(float)
df_tsla["Volume"] = df_tsla["Volume"].astype(float)
df = pd.merge(df_aapl, df_googl, on="Date", suffixes=("", "_googl"))
df = pd.merge(df, df_meta, on="Date", suffixes=("", "_meta"))
df = pd.merge(df, df_nvda, on="Date", suffixes=("", "_nvda"))
df = pd.merge(df, df_tsla, on="Date", suffixes=("_aapl", "_tsla"))

!!!DEPRECATED!!!  This is where I make all of the helper functions I need in order to create and use any model easily:

In [None]:
### DO NOT RUN BLOCK DEPRECATED ###
# 
#
#
#
#
###################################
splits = [df["Date"][(len(df["Date"])*i)//20] for i in range(10,16)]
names = df.columns
y_names = ["Date"]
for val in names:
    if val[0] == "O" or val[0] == "C":
        y_names.append(val)
no_date = names[1:]
no_date_y = y_names[1:]
X = df.copy()
y = df.copy()[y_names]


pd.options.mode.chained_assignment = None  # default='warn'

def create_model(num_layers, num_neurons, drop_rate, activations, optim):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(25,)))
    model.add(tf.keras.layers.BatchNormalization())
    for i in range(num_layers):
        model.add(tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=0)))
        model.add(tf.keras.layers.SimpleRNN(num_neurons[i], activation=activations[i]))
    if drop_rate > 0.0:
        model.add(tf.keras.layers.Dropout(rate=drop_rate))
    model.add(tf.keras.layers.Dense(100))
    model.compile(optimizer=optim, loss="mse")
    return model

def use_model(epochs=100, split=splits[0], num_layers=1, num_neurons=[10], drop_rate=0.0, activations=["relu"], optim="Adam"):
    model = create_model(num_layers, num_neurons, drop_rate, activations, optim)
    X_train = X[X["Date"] < split]
    X_train
    y_train = pd.DataFrame(columns=y.columns)
    y_train["X_date"] = pd.Series(dtype=X["Date"].dtype)
    for val in X_train["Date"]:
        i = 0
        num = 0
        while num != 10:
            if not y[y["Date"] == val + pd.DateOffset(days=i)].empty:
                y_train_new = y[y["Date"] == val + pd.DateOffset(days=i)]
                y_train_new["X_date"] = val
                y_train = pd.concat([y_train, y_train_new])
                num += 1
            i += 1
    y_train = y_train.groupby("X_date").agg(lambda x: list(x))
    X_test = X[X["Date"] >= split]
    y_test = pd.DataFrame(columns=y.columns)
    y_test["X_date"] = pd.Series(dtype=X["Date"].dtype)
    for val in X_test["Date"][:-10]:
        i = 0
        num = 0
        while num != 10:
            if not y[y["Date"] == val + pd.DateOffset(days=i)].empty:
                y_test_new = y[y["Date"] == val + pd.DateOffset(days=i)]
                y_test_new["X_date"] = val
                y_test = pd.concat([y_test, y_test_new])
                num += 1
            i += 1
    y_test = y_test.groupby("X_date").agg(lambda x: list(x))
    history = fit_mod(X_train, y_train, model, epochs)
    loss = evaluate_mod(X_test, y_test, model)
    return (model, history, loss)

def fit_mod(X_train, y_train, model, epochs):
    y_train_used = np.array(y_train[:][no_date_y].values.tolist())
    y_train_used = y_train_used.reshape((len(y_train_used), 100))
    return model.fit(X_train[no_date], y_train_used, epochs=epochs)

def predict_mod(X_test, model):
    return model.predict(X_test[no_date][:-10], batch_size=1)

def evaluate_mod(X_test, y_test, model):
    y_test_used = np.array(y_test[:][no_date_y].values.tolist())
    y_test_used = y_test_used.reshape((len(y_test_used), 100))
    return mean_squared_error(predict_mod(X_test, model), y_test_used) 


def grid_search(epochs_lst = [100], splits_lst=splits, num_layers_lst=[1], num_neurons_lst = [[10]], drop_rate_lst = [0.0], activations_lst = [["relu"]], optim_lst = ["Adam"]):
    models = []
    histories = []
    losses = []
    min_loss = float("inf")
    min_loss_index = -1
    min_loss_params = [None, None, None, None, None, None, None]
    i = 0
    for epochs in epochs_lst:
        for split in splits_lst:
            for num_layers in num_layers_lst:
                for num_neurons in num_neurons_lst:
                    if len(num_neurons) == num_layers:    
                        for activations in activations_lst:
                            if len(activations) == num_layers:
                                for drop_rate in drop_rate_lst:
                                    for optim in optim_lst:
                                        model, history, loss = use_model(epochs, split, num_layers, num_neurons, drop_rate, activations, optim)
                                        models.append(model)
                                        histories.append(history)
                                        losses.append(loss)
                                        if loss < min_loss:
                                            min_loss = loss
                                            min_loss_index = i
                                            min_loss_params = [epochs, split, num_layers, num_neurons, drop_rate, activations, optim]
                                        i += 1
    return (models[min_loss_index], min_loss_params)              

### DEPRECATED ### 


This optimizes the above data, and uses 100 previous day input data points instead of only 1:

In [None]:
splits = [df["Date"][(len(df["Date"])*i)//20] for i in range(10,16)]
names = df.columns
y_names = ["Date"]
for val in names:
    if val[0] == "O" or val[0] == "C":
        y_names.append(val)
no_date = names[1:]
no_date_y = y_names[1:]
X = df.copy()
y = df.copy()[y_names]


pd.options.mode.chained_assignment = None  # default='warn'

def create_model(num_neurons, drop_rate, activations, optim):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(2500,)))
    model.add(tf.keras.layers.BatchNormalization())
    for i in range(min(len(num_neurons), len(activations))):
        model.add(tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=0)))
        model.add(tf.keras.layers.SimpleRNN(num_neurons[i],  activation=activations[i]))
    if drop_rate > 0.0:
        model.add(tf.keras.layers.Dropout(rate=drop_rate))
    model.add(tf.keras.layers.Dense(100))
    model.compile(optimizer=optim, loss="mse")
    return model

def use_model(epochs=100, split=splits[0], num_neurons=[10], drop_rate=0.0, activations=["relu"], optim="Adam"):
    model = create_model(num_neurons, drop_rate, activations, optim)
    X_train = pd.DataFrame(columns=X.columns)
    X_test = pd.DataFrame(columns=X.columns)
    X_test["X_date"] = pd.Series(dtype=X["Date"].dtype)
    X_train = X[:100]
    X_train["X_date"] = X["Date"][99]
    prev_val = X["Date"][99]
    for val in X["Date"][100:-10]:
        if val < split:
            X_train_new = X_train[X_train["X_date"] == prev_val][1:]
            X_train_new = pd.concat([X_train_new, X[X["Date"] == val]])
            X_train_new["X_date"] = val
            X_train = pd.concat([X_train, X_train_new])
            prev_val = val
        elif val == split:
            X_test = X_train[X_train["X_date"] == prev_val][1:]
            X_test = pd.concat([X_test, X[X["Date"] == val]])
            X_test["X_date"] = val
            prev_val = val
        else:
            X_test_new = X_test[X_test["X_date"] == prev_val][1:]
            X_test_new = pd.concat([X_test_new, X[X["Date"] == val]])
            X_test_new["X_date"] = val
            X_test = pd.concat([X_test, X_test_new])
            prev_val = val
    X_train = X_train.groupby("X_date").agg(lambda x: list(x))
    X_test = X_test.groupby("X_date").agg(lambda x: list(x))
    y_train = pd.DataFrame(columns=y.columns)
    y_train["X_date"] = pd.Series(dtype=y["Date"].dtype)
    y_test = pd.DataFrame(columns=y.columns)
    y_test["X_date"] = pd.Series(dtype=y["Date"].dtype)
    y_train = y[100:110]
    y_train["X_date"] = y["Date"][99]
    prev_val = y["Date"][99]
    for val in y["Date"][100:-10]:
        if val < split:
            y_train_new = y_train[y_train["X_date"] == prev_val][1:]
            y_train_new = pd.concat([y_train_new, y[y["Date"] == val]])
            y_train_new["X_date"] = val
            y_train = pd.concat([y_train, y_train_new])
            prev_val = val
        elif val == split:
            y_test = y_train[y_train["X_date"] == prev_val][1:]
            y_test = pd.concat([y_test, y[y["Date"] == val]])
            y_test["X_date"] = val
            prev_val = val
        else:
            y_test_new = y_test[y_test["X_date"] == prev_val][1:]
            y_test_new = pd.concat([y_test_new, y[y["Date"] == val]])
            y_test_new["X_date"] = val
            y_test = pd.concat([y_test, y_test_new])
            prev_val = val
    y_train = y_train.groupby("X_date").agg(lambda x: list(x))
    y_test = y_test.groupby("X_date").agg(lambda x: list(x))
    history = fit_mod(X_train, y_train, model, epochs)
    loss = evaluate_mod(X_test, y_test, model)
    return (model, history, loss)

def fit_mod(X_train, y_train, model, epochs):
    y_train_used = np.array(y_train[:][no_date_y].values.tolist())
    y_train_used = y_train_used.reshape((len(y_train_used), 100))
    X_train_used = np.array(X_train[:][no_date].values.tolist())
    X_train_used = X_train_used.reshape((len(X_train_used), 2500))
    return model.fit(X_train_used, y_train_used, epochs=epochs)

def predict_mod(X_test, model):
    X_test_used = np.array(X_test[:][no_date].values.tolist())
    X_test_used = X_test_used.reshape((len(X_test_used), 2500))
    return model.predict(X_test_used, batch_size=1)

def evaluate_mod(X_test, y_test, model):
    y_test_used = np.array(y_test[:][no_date_y].values.tolist())
    y_test_used = y_test_used.reshape((len(y_test_used), 100))
    return mean_squared_error(predict_mod(X_test, model), y_test_used) 


def grid_search(epochs_lst = [100], splits_lst=splits, num_neurons_lst = [[10]], drop_rate_lst = [0.0], activations_lst = [["relu"]], optim_lst = ["Adam"]):
    models = []
    histories = []
    losses = []
    min_loss = float("inf")
    min_loss_index = -1
    min_loss_params = [None, None, None, None, None, None]
    i = 0
    for epochs in epochs_lst:
        for split in splits_lst:
            for num_neurons in num_neurons_lst:  
                for activations in activations_lst:
                    if len(activations) == len(num_neurons):
                        for drop_rate in drop_rate_lst:
                            for optim in optim_lst:
                                model, history, loss = use_model(epochs, split, num_neurons, drop_rate, activations, optim)
                                models.append(model)
                                histories.append(history)
                                losses.append(loss)
                                if loss < min_loss:
                                    min_loss = loss
                                    min_loss_index = i
                                    min_loss_params = [epochs, split, num_neurons, drop_rate, activations, optim]
                                i += 1
    return (models, histories, losses, min_loss_index, min_loss_params)

Here is where I complete the grid search:

In [None]:
### DO NOT RUN!!! TAKES OVER THREE HOURS!
# values = grid_search(splits_lst=splits, num_neurons_lst=[[10], [25], [50], [100], [10, 10], [25, 25], [50, 50], [25, 25, 25]], activations_lst=[["relu"], ["relu", "relu"], ['relu', 'relu', 'relu']])


Epoch 1/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 2846.3721
Epoch 2/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2119.8428
Epoch 3/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1211.5654
Epoch 4/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 808.3501
Epoch 5/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 692.4478
Epoch 6/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 637.2757
Epoch 7/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 627.6744
Epoch 8/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 609.7730
Epoch 9/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 585.4927
Epoch 10/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5m

The best with each split for neuron numbers for each layer: [10], [25], [50], [100], [10, 10], [25, 25], [50, 50], [25, 25, 25]:
50/50 split: Overall best for everything: [10,10], Overall second best for everything: [25, 25, 25]
55/45 split: Overall third best for everything: [25, 25, 25]
60/40 split: [25]
65/35 split: [25, 25, 25]
70/30 split: [100]
75/25 split: Overall fourth best for everything: [10]

I do not plan on running that many options at once anymore, it took over 3 hours to run the code, I will be more optimized with it later.

However, here I am just putting the information I gained from it.

I will focus on three or more layers splits, as that tends to be the mode of top values.  I will focus on different numbers of neurons.

I originally forgot to implement early stopping, which is probably why this took so long, however I also will take the train test splitting
process out of the model creation, and after completing a few more small tests just choose the best train test split from our options.

The following code is a new version to work on, with early stopping, an implicit number of epochs,
an implicit optimizer, and a random seed for repeatability:

In [None]:
splits = [df["Date"][(len(df["Date"])*i)//20] for i in range(10,16)]
names = df.columns
y_names = ["Date"]
for val in names:
    if val[0] == "O" or val[0] == "C":
        y_names.append(val)
no_date = names[1:]
no_date_y = y_names[1:]
X = df.copy()
y = df.copy()[y_names]


pd.options.mode.chained_assignment = None  # default='warn'

def create_model(num_neurons, drop_rate, activations):
    tf.random.set_seed(42)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(2500,)))
    model.add(tf.keras.layers.BatchNormalization())
    for i in range(min(len(num_neurons), len(activations))):
        model.add(tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=0)))
        model.add(tf.keras.layers.SimpleRNN(num_neurons[i],  activation=activations[i]))
    if drop_rate > 0.0:
        model.add(tf.keras.layers.Dropout(rate=drop_rate))
    model.add(tf.keras.layers.Dense(100))
    model.compile(optimizer="Adam", loss="mse")
    return model

def use_model(split=splits[0], num_neurons=[10], drop_rate=0.0, activations=["relu"]):
    model = create_model(num_neurons, drop_rate, activations)
    X_train = pd.DataFrame(columns=X.columns)
    X_test = pd.DataFrame(columns=X.columns)
    X_test["X_date"] = pd.Series(dtype=X["Date"].dtype)
    X_train = X[:100]
    X_train["X_date"] = X["Date"][99]
    prev_val = X["Date"][99]
    for val in X["Date"][100:-10]:
        if val < split:
            X_train_new = X_train[X_train["X_date"] == prev_val][1:]
            X_train_new = pd.concat([X_train_new, X[X["Date"] == val]])
            X_train_new["X_date"] = val
            X_train = pd.concat([X_train, X_train_new])
            prev_val = val
        elif val == split:
            X_test = X_train[X_train["X_date"] == prev_val][1:]
            X_test = pd.concat([X_test, X[X["Date"] == val]])
            X_test["X_date"] = val
            prev_val = val
        else:
            X_test_new = X_test[X_test["X_date"] == prev_val][1:]
            X_test_new = pd.concat([X_test_new, X[X["Date"] == val]])
            X_test_new["X_date"] = val
            X_test = pd.concat([X_test, X_test_new])
            prev_val = val
    X_train = X_train.groupby("X_date").agg(lambda x: list(x))
    X_test = X_test.groupby("X_date").agg(lambda x: list(x))
    y_train = pd.DataFrame(columns=y.columns)
    y_train["X_date"] = pd.Series(dtype=y["Date"].dtype)
    y_test = pd.DataFrame(columns=y.columns)
    y_test["X_date"] = pd.Series(dtype=y["Date"].dtype)
    y_train = y[100:110]
    y_train["X_date"] = y["Date"][99]
    prev_val = y["Date"][99]
    for val in y["Date"][100:-10]:
        if val < split:
            y_train_new = y_train[y_train["X_date"] == prev_val][1:]
            y_train_new = pd.concat([y_train_new, y[y["Date"] == val]])
            y_train_new["X_date"] = val
            y_train = pd.concat([y_train, y_train_new])
            prev_val = val
        elif val == split:
            y_test = y_train[y_train["X_date"] == prev_val][1:]
            y_test = pd.concat([y_test, y[y["Date"] == val]])
            y_test["X_date"] = val
            prev_val = val
        else:
            y_test_new = y_test[y_test["X_date"] == prev_val][1:]
            y_test_new = pd.concat([y_test_new, y[y["Date"] == val]])
            y_test_new["X_date"] = val
            y_test = pd.concat([y_test, y_test_new])
            prev_val = val
    y_train = y_train.groupby("X_date").agg(lambda x: list(x))
    y_test = y_test.groupby("X_date").agg(lambda x: list(x))
    history = fit_mod(X_train, y_train, model)
    loss = evaluate_mod(X_test, y_test, model)
    return (model, history, loss)

def fit_mod(X_train, y_train, model):
    y_train_used = np.array(y_train[:][no_date_y].values.tolist())
    y_train_used = y_train_used.reshape((len(y_train_used), 100))
    X_train_used = np.array(X_train[:][no_date].values.tolist())
    X_train_used = X_train_used.reshape((len(X_train_used), 2500))
    return model.fit(X_train_used, y_train_used, epochs=100, callbacks=[tf.keras.callbacks.EarlyStopping(monitor="loss", patience=5)])

def predict_mod(X_test, model):
    X_test_used = np.array(X_test[:][no_date].values.tolist())
    X_test_used = X_test_used.reshape((len(X_test_used), 2500))
    return model.predict(X_test_used, batch_size=1)

def evaluate_mod(X_test, y_test, model):
    y_test_used = np.array(y_test[:][no_date_y].values.tolist())
    y_test_used = y_test_used.reshape((len(y_test_used), 100))
    return mean_squared_error(predict_mod(X_test, model), y_test_used) 


def grid_search(splits_lst=splits, num_neurons_lst = [[10]], drop_rate_lst = [0.0], activations_lst = [["relu"]]):
    models = []
    histories = []
    losses = []
    min_loss = float("inf")
    min_loss_index = -1
    min_loss_params = [None, None, None, None]
    i = 0
    for split in splits_lst:
        for num_neurons in num_neurons_lst:  
            for activations in activations_lst:
                if len(activations) == len(num_neurons):
                    for drop_rate in drop_rate_lst:
                            model, history, loss = use_model(split, num_neurons, drop_rate, activations)
                            models.append(model)
                            histories.append(history)
                            losses.append(loss)
                            if loss < min_loss:
                                min_loss = loss
                                min_loss_index = i
                                min_loss_params = [split, num_neurons, drop_rate, activations]
                            i += 1
    return (models, histories, losses, min_loss_index, min_loss_params)

Here is time to start to try out different numbers of neurons for three and four layers:

In [107]:
values_2 = grid_search([splits[0], splits[-1]], num_neurons_lst=[[25, 25, 25], [50, 50, 50], [100, 100, 100], [50, 50, 50, 50]], activations_lst=[["relu", "relu", "relu"], ["relu", "relu", "relu", "relu"]])

Epoch 1/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - loss: 2843.9304
Epoch 2/100
[1m26/47[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 6ms/step - loss: 1477.0122

  current = self.get_monitor_value(logs)


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1341.4680
Epoch 3/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 639.8492
Epoch 4/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 650.8766
Epoch 5/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 608.3795
Epoch 6/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 584.3402
Epoch 7/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 566.5021
Epoch 8/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 560.5222
Epoch 9/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 557.1893
Epoch 10/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 555.2924
Epoch 11/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss

  current = self.get_monitor_value(logs)


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 736.2753
Epoch 3/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 595.7421
Epoch 4/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 589.4708
Epoch 5/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 576.0062
Epoch 6/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 570.2522
Epoch 7/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 574.3104
Epoch 8/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 603.7280
Epoch 9/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 620.1796
Epoch 10/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 610.8008
Epoch 11/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/st

  current = self.get_monitor_value(logs)


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 664.0690
Epoch 3/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 590.0923
Epoch 4/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 575.4103
Epoch 5/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 567.6309
Epoch 6/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 573.4537
Epoch 7/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 686.0989
Epoch 8/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 640.3965
Epoch 9/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 636.5583
Epoch 10/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 596.2815
Epoch 11/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/st

  current = self.get_monitor_value(logs)


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 881.3040
Epoch 3/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 591.4207
Epoch 4/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 571.3105
Epoch 5/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 569.5321
Epoch 6/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 563.7057
Epoch 7/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 558.4181
Epoch 8/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 562.0679
Epoch 9/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 564.2046
Epoch 10/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 568.4003
Epoch 11/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/st

  current = self.get_monitor_value(logs)


[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 3315.0029
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3125.2361
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3088.3438
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3075.1782
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3068.9070
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3067.1323
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3061.2454
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3070.9392
Epoch 10/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3101.8364
Epoch 11/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/ste

  current = self.get_monitor_value(logs)


[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3292.6401
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3176.6970
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 3127.2261 
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3131.7000
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3105.4363
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3075.6660
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3083.1716
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3072.2766
Epoch 10/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3084.5786
Epoch 11/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0

  current = self.get_monitor_value(logs)


[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 3258.2344
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 3237.1870
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 3159.4856
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 3154.6768
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 3095.3140
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 3096.2061
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 3102.4641
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 3118.5017
Epoch 10/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 3288.5630
Epoch 11/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0

  current = self.get_monitor_value(logs)


[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3195.2561
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3113.6541
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3137.4795
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3216.5808
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3157.9180
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3130.3770
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3105.9688
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3131.9912
Epoch 10/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3086.9414
Epoch 11/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0

In [122]:
#print(use_model(split=splits[0], num_neurons=[10,10], activations=["relu", "relu"])[2])
print(values_2[2][np.argmin(values_2[2][:7])])

15302.049917464645


The same best model parameters as before gave us a mean squared error of ~10950.77 with a random seed of 42
The original mean squared error for the original model is ~9219.05
The mean squared error for our model of 4 layers of 50 neurons on the 75/25 split is ~13300.97, which is best in this case, with a random seed of 42
The second best is our model of 3 layers of 100 neurons on the 75/25 split has mean squared error of ~15302.05
The third best is very bad, but is again on the 75/25 split.

The following code isolates a single split as the best, and performs it once for all models:

In [3]:
pd.options.mode.chained_assignment = None  # default='warn'
split = df["Date"][(len(df["Date"])*15)//20]
names = df.columns
y_names = ["Date"]
for val in names:
    if val[0] == "O" or val[0] == "C":
        y_names.append(val)
no_date = names[1:]
no_date_y = y_names[1:]
X = df.copy()
y = df.copy()[y_names]
X_train = pd.DataFrame(columns=X.columns)
X_test = pd.DataFrame(columns=X.columns)
X_test["X_date"] = pd.Series(dtype=X["Date"].dtype)
X_train = X[:100]
X_train["X_date"] = X["Date"][99]
prev_val = X["Date"][99]
for val in X["Date"][100:-10]:
    if val < split:
        X_train_new = X_train[X_train["X_date"] == prev_val][1:]
        X_train_new = pd.concat([X_train_new, X[X["Date"] == val]])
        X_train_new["X_date"] = val
        X_train = pd.concat([X_train, X_train_new])
        prev_val = val
    elif val == split:
        X_test = X_train[X_train["X_date"] == prev_val][1:]
        X_test = pd.concat([X_test, X[X["Date"] == val]])
        X_test["X_date"] = val
        prev_val = val
    else:
        X_test_new = X_test[X_test["X_date"] == prev_val][1:]
        X_test_new = pd.concat([X_test_new, X[X["Date"] == val]])
        X_test_new["X_date"] = val
        X_test = pd.concat([X_test, X_test_new])
        prev_val = val
X_train = X_train.groupby("X_date").agg(lambda x: list(x))
X_test = X_test.groupby("X_date").agg(lambda x: list(x))
y_train = pd.DataFrame(columns=y.columns)
y_train["X_date"] = pd.Series(dtype=y["Date"].dtype)
y_test = pd.DataFrame(columns=y.columns)
y_test["X_date"] = pd.Series(dtype=y["Date"].dtype)
y_train = y[100:110]
y_train["X_date"] = y["Date"][99]
prev_val = y["Date"][99]
for val in y["Date"][100:-10]:
    if val < split:
        y_train_new = y_train[y_train["X_date"] == prev_val][1:]
        y_train_new = pd.concat([y_train_new, y[y["Date"] == val]])
        y_train_new["X_date"] = val
        y_train = pd.concat([y_train, y_train_new])
        prev_val = val
    elif val == split:
        y_test = y_train[y_train["X_date"] == prev_val][1:]
        y_test = pd.concat([y_test, y[y["Date"] == val]])
        y_test["X_date"] = val
        prev_val = val
    else:
        y_test_new = y_test[y_test["X_date"] == prev_val][1:]
        y_test_new = pd.concat([y_test_new, y[y["Date"] == val]])
        y_test_new["X_date"] = val
        y_test = pd.concat([y_test, y_test_new])
        prev_val = val
y_train = y_train.groupby("X_date").agg(lambda x: list(x))
y_test = y_test.groupby("X_date").agg(lambda x: list(x))

This adds a random seed cv aspect to the grid search with no split in order to get multiple models with the same parameters,
and I changed the patience of early stopping to 10 instead of 5, because it was always ending way too early.

In [4]:
def create_model(seed, num_neurons, drop_rate, activations):
    tf.random.set_seed(seed)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(2500,)))
    model.add(tf.keras.layers.BatchNormalization())
    for i in range(min(len(num_neurons), len(activations))):
        model.add(tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=0)))
        model.add(tf.keras.layers.SimpleRNN(num_neurons[i],  activation=activations[i]))
    if drop_rate > 0.0:
        model.add(tf.keras.layers.Dropout(rate=drop_rate))
    model.add(tf.keras.layers.Dense(100))
    model.compile(optimizer="Adam", loss="mse")
    return model

def use_model(seed, num_neurons=[10], drop_rate=0.0, activations=["relu"]):
    model = create_model(seed, num_neurons, drop_rate, activations)
    history = fit_mod(X_train, y_train, model)
    loss = evaluate_mod(X_test, y_test, model)
    return (model, history, loss)

def fit_mod(X_train, y_train, model):
    y_train_used = np.array(y_train[:][no_date_y].values.tolist())
    y_train_used = y_train_used.reshape((len(y_train_used), 100))
    X_train_used = np.array(X_train[:][no_date].values.tolist())
    X_train_used = X_train_used.reshape((len(X_train_used), 2500))
    return model.fit(X_train_used, y_train_used, epochs=100, callbacks=[tf.keras.callbacks.EarlyStopping(monitor="loss", patience=15)])

def predict_mod(X_test, model):
    X_test_used = np.array(X_test[:][no_date].values.tolist())
    X_test_used = X_test_used.reshape((len(X_test_used), 2500))
    return model.predict(X_test_used, batch_size=1)

def evaluate_mod(X_test, y_test, model):
    y_test_used = np.array(y_test[:][no_date_y].values.tolist())
    y_test_used = y_test_used.reshape((len(y_test_used), 100))
    return mean_squared_error(predict_mod(X_test, model), y_test_used) 


def grid_search(num_seeds=2, num_neurons_lst = [[10]], drop_rate_lst = [0.0], activations_lst = [["relu"]]):
    models = []
    histories = []
    losses = []
    min_loss = float("inf")
    min_loss_index = -1
    min_loss_params = [None, None, None, None]
    i = 0
    np.random.seed = 42
    for i in range(num_seeds):
        seed = np.random.randint(1,100)
        for num_neurons in num_neurons_lst:  
            for activations in activations_lst:
                if len(activations) == len(num_neurons):
                    for drop_rate in drop_rate_lst:
                        model, history, loss = use_model(seed, num_neurons, drop_rate, activations)
                        models.append(model)
                        histories.append(history)
                        losses.append(loss)
                        if loss < min_loss:
                            min_loss = loss
                            min_loss_index = i
                            min_loss_params = [seed, num_neurons, drop_rate, activations]
                        i += 1
    return (models, histories, losses, min_loss_index, min_loss_params)

In [137]:
values_3 = grid_search(num_neurons_lst=[[50, 50, 50, 50], [100,100,100,100], [150,150,150,150], [200, 100, 50, 25]], activations_lst=[["relu", "relu", "relu", "relu"]])

Epoch 1/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - loss: 7501.8672
Epoch 2/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3378.2500
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3195.0527
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3124.6042
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3101.6387
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3085.1560
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3160.5239
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 3324.2212
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3456.6240
Epoch 10/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

This code ran much much faster for 8 models it only took less than 6 minutes, however, none of the models are that great.

The best is:
With a seed of 61: ~11278.00 MSE for [200, 100, 50, 25]
Second best is: ~14436.42 MSE for [100, 100, 100, 100]
Third best is: ~16720.52 MSE for [50, 50, 50, 50]
Other seed was bad for all models

In [151]:
values_4 = grid_search(num_seeds=4, num_neurons_lst=[[500, 250, 125], [200, 100, 50, 25], [400, 200, 100, 50, 25]], activations_lst=[["relu", "relu", "relu"], ["relu", "relu", "relu", "relu"], ["relu", "relu", "relu", "relu", "relu"]])

Epoch 1/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 106ms/step - loss: 6941.6450
Epoch 2/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 105ms/step - loss: 4034.9634
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 106ms/step - loss: 3258.5430
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 106ms/step - loss: 3212.2524
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 117ms/step - loss: 3129.9316
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 109ms/step - loss: 3115.8821
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 106ms/step - loss: 3129.9736
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 105ms/step - loss: 3103.4360
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 105ms/step - loss: 2987.0688
Epoch 10/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [169]:
print(values_4[3])
print(values_4[2])

5
[24409.24967065799, 14837.110409115394, 14235.29810093636, 13525.622497113407, 15525.364003277167, 13691.684386206536, 12137.749482639187, 11535.195215788997, 21736.483894325876, 15370.682958623998, 10513.745198144925, 10144.52878687827]


To be completely honest, I believe my PC might just be burnt out with this last answer.  I've been running it at full power for around 12 or more hours, so I think it made it mistake, since clearly the index which minimizes the loss is 11, not 5.

In [5]:
values_5 = grid_search(num_seeds=6, num_neurons_lst=[[400, 200, 100, 50, 25]], activations_lst=[["relu", "relu", "relu", "relu", "relu"]])


Epoch 1/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 96ms/step - loss: 7716.7222
Epoch 2/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 96ms/step - loss: 4204.3682
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 97ms/step - loss: 3304.2146
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 96ms/step - loss: 3436.5271
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 95ms/step - loss: 3144.9871
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 96ms/step - loss: 3073.5613
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 94ms/step - loss: 3062.6086
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 95ms/step - loss: 3122.4783
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 95ms/step - loss: 3195.3350
Epoch 10/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [8]:
print(values_5[2])
print(values_5[3])
print(values_5[4])

[11887.991889803914, 10173.96339245577, 18634.997095683466, 9352.203088718588, 9795.990391562054, 12038.151420980897]
3
[72, [400, 200, 100, 50, 25], 0.0, ['relu', 'relu', 'relu', 'relu', 'relu']]


With the previous test I was able to find our best model yet with a MSE of ~9352.20, with a seed of 72

In the following test I will check various dropout rates and various uses of sigmoid and relu combined:

In [10]:
def grid_search(num_neurons_lst = [[10]], drop_rate_lst = [0.0], activations_lst = [["relu"]]):
    models = []
    histories = []
    losses = []
    min_loss = float("inf")
    min_loss_index = -1
    min_loss_params = [None, None, None, None]
    i = 0
    for num_neurons in num_neurons_lst:  
        for activations in activations_lst:
            if len(activations) == len(num_neurons):
                for drop_rate in drop_rate_lst:
                    model, history, loss = use_model(72, num_neurons, drop_rate, activations)
                    models.append(model)
                    histories.append(history)
                    losses.append(loss)
                    if loss < min_loss:
                        min_loss = loss
                        min_loss_index = i
                        min_loss_params = [72, num_neurons, drop_rate, activations]
                    i += 1
    return (models, histories, losses, min_loss_index, min_loss_params)

In [11]:
values_6 = grid_search(drop_rate_lst=[0.0, 0.1, 0.25], num_neurons_lst=[[400, 200, 100, 50, 25]], activations_lst=[["sigmoid", "sigmoid", "sigmoid", "sigmoid", "relu"], ["relu", "relu", "relu", "sigmoid", "sigmoid"] , ["relu", "relu", "relu", "relu", "relu"]])

Epoch 1/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 81ms/step - loss: 8723.0430
Epoch 2/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 81ms/step - loss: 7799.5796
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 81ms/step - loss: 5960.2964
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 79ms/step - loss: 4465.0869
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 80ms/step - loss: 3658.9785
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 80ms/step - loss: 3262.9622
Epoch 7/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 80ms/step - loss: 3128.9290
Epoch 8/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 80ms/step - loss: 3100.2285
Epoch 9/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 80ms/step - loss: 3096.2664
Epoch 10/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [12]:
print(values_6[2])

[25506.527838840513, 25713.392327821537, 25662.77817955751, 26166.36536442484, 26407.98987918284, 27013.12964934643, 10017.893357509696, 12469.129361099061, 20330.3110486283]


In this previous test I was able to see that even with the same random seed, the model does not work the same.  The same random seed and the same parameters before gave us ~9352.20 MSE on test set, this time it gave us ~10017.89 MSE.  So I will hopefully be able to export the original model to a file.

Also, using sigmoid at all made the models perform much much worse, and also using any droppout made the models much much worse, so it seems like I'm sticking with the original model.

Next I plan to implement XG boosting to test it against the RNN model.


In [13]:
values_5[0][3].save("my_model_1.keras")

In [None]:
def create_regressor(n_estimators=10, max_depth=15, eta=0.1):
    return MultiOutputRegressor(xgb.XGBRegressor(objective="reg:squarederror", n_estimators=n_estimators, max_depth=max_depth, eta=eta))

def fit_regress(X_train, y_train, model):
    y_train_used = np.array(y_train[:][no_date_y].values.tolist())
    y_train_used = y_train_used.reshape((len(y_train_used), 100))
    X_train_used = np.array(X_train[:][no_date].values.tolist())
    X_train_used = X_train_used.reshape((len(X_train_used), 2500))
    return model.fit(X_train_used, y_train_used)

def predict_regress(X_test, model):
    X_test_used = np.array(X_test[:][no_date].values.tolist())
    X_test_used = X_test_used.reshape((len(X_test_used), 2500))
    return model.predict(X_test_used)

def evaluate_regress(X_test, y_test, model):
    y_test_used = np.array(y_test[:][no_date_y].values.tolist())
    y_test_used = y_test_used.reshape((len(y_test_used), 100))
    return mean_squared_error(predict_regress(X_test, model), y_test_used) 

def regressor_grid(n_estimators_lst=[1000], max_depth_lst=[15], eta_lst=[0.1]):
    models = []
    histories = []
    losses = []
    min_loss = float("inf")
    min_loss_index = -1
    min_loss_params = [None, None, None]
    i = 0
    for num in n_estimators_lst:
        for depth in max_depth_lst:
            for eta in eta_lst:
                models.append(create_regressor(num, depth, eta))
                histories.append(fit_regress(X_train, y_train, models[-1]))
                loss = evaluate_regress(X_test, y_test, models[-1])
                losses.append(loss)
                if loss < min_loss:
                    min_loss = loss
                    min_loss_index = i
                    min_loss_params = [num, depth, eta]
                i += 1
    return (models, histories, losses, min_loss_index, min_loss_params)

I realized while running this on a single XG boost model that it takes an extremely long time to run.  My cpu was having trouble, so I went into google Colab and ran the code, first for 100 estimators, then when that took too long as well, I just ran it for 1 estimator.  From what it looks like, it takes apprioximately 3 or so minutes to run for a single estimator with maximum depth of 10, so it will take quite a while to run, but I plan to test it with 3, 5, and 10 estimators, and maximum depths of 10, 15, and 25, and also with the eta of 0.1, 0.3, and 0.05.  This will take an extremely long time to run, so I'll do only a few at a time and get rid of lower performing parameters as I continue.  I will also start using Google Colab to run my code, instead of my cpu, and then I will export and import the model into this document, rather than running it here. 

In [36]:
#values_7 = regressor_grid([3, 5, 10], [10, 15, 25], [0.1, 0.3, 0.05])

I first ran a single xgboost with parameters 10, 15, and 0.1 respectively, which had the best error so far with ~6971.42 MSE, however then I ran the above code on Google Colab for a grid search and it took 4 hours and 48 minutes to run, and it found the best model by far so far with ~2878.04 MSE.  I will export that from colab and import it here, HOWEVER, I plan to adjust the code by standardizing the y variables in order to optimize performance, and instead of being approximately $53 off of each day open and close for each stock, I want to use a minmax scalar for each stock from 0 to 1, based solely on their stock minimum and maximum overall, then fit the thing total, so for example if the minimum value of the aapl stock is 1 cent, the maximum value is $800, and for tsla the minimum is $50 and maximum is $750, then we transform 0.01 to approximately 0, and 800 to approximately 1 for apple, and then similarly 50 and 750 to 0 and 1, and basically fit each variable on it's own.  That way if we have a mse of approximately 0.0004 (completely arbitray example) then it would only be off by 2% rather than being off by $53, which is a much bigger deal for one stock than the other.

The learning rate of 0.3 was by far the best in all cases, which is surprising, the model did not care how much depth really if there was 10 regressors, it was almost the same, with the MSE's between 10, 15, and 25 with 0.3 learning rate all within 0.03 of each other.

And now after saving the models, Google Colab has deleted the files, so I am dying inside, since it took over 4 hours and 48 minutes to complete the grid search.  Thankfully, I wrote here which parameters I used though.  So I am just recreating those three models, but they might not be as good as before.

In [None]:
"""pd.options.mode.chained_assignment = None  # default='warn'
split = df["Date"][(len(df["Date"])*15)//20]

names = df.columns
y_names = ["Date"]
for val in names:
    if val[0] == "O" or val[0] == "C":
        y_names.append(val)
no_date = names[1:]
no_date_y = y_names[1:]


each_names = df_aapl.columns
y_each_names = ["Date"]
for val in each_names:
    if val[0] == "O" or val[0] == "C":
        y_each_names.append(val)
no_date_each = each_names[1:]
no_date_y_each = y_each_names[1:]
X_aapl = df_aapl.copy()
X_googl = df_googl.copy()
X_meta = df_meta.copy()
X_nvda = df_nvda.copy()
X_tsla = df_tsla.copy()
y_aapl = df_aapl.copy()[y_each_names]
y_googl = df_googl.copy()[y_each_names]
y_meta = df_meta.copy()[y_each_names]
y_nvda = df_nvda.copy()[y_each_names]
y_tsla = df_tsla.copy()[y_each_names]
dfs = [X_aapl, X_googl, X_meta, X_nvda, X_tsla, y_aapl, y_googl, y_meta, y_nvda, y_tsla]
suffix = ["", "googl", "meta", "nvda", "tsla", "aapl"]
min_max = []


for i in range(len(dfs)):
    min_max.append(MinMaxScaler())
    if i < 5:
        min_max[-1].fit_transform(dfs[i][no_date_each])
        X_nvda
    else:
        min_max[-1].fit_transform(dfs[i][no_date_y_each])


X_scaled = None
y_scaled = None


for i in range(len(dfs) - 2):
    if i < 4:
        X_val_scaled = pd.concat([pd.DataFrame(min_max[i].transform(dfs[i][no_date_each])), dfs[i]["Date"]])
        X_val_scaled_2 = pd.concat([pd.DataFrame(min_max[i + 1].transform(dfs[i + 1][no_date_each])), dfs[i + 1]["Date"]])
        if i < 3:
            X_scaled = pd.merge(X_val_scaled, X_val_scaled_2, on="Date", suffixes=(suffix[0], suffix[i + 1]))
        else:
            X_scaled = pd.merge(X_val_scaled, X_val_scaled_2, on="Date", suffixes=(suffix[i + 1], suffix[i]))
    else:
        y_val_scaled = pd.concat([pd.DataFrame(min_max[i + 1].transform(dfs[i + 1][no_date_y_each])), dfs[i + 1]["Date"]])
        y_val_scaled_2 = pd.concat([pd.DataFrame(min_max[i + 2].transform(dfs[i + 2][no_date_y_each])), dfs[i + 2]["Date"]])
        if i < 3:
            y_scaled = pd.merge(y_val_scaled, y_val_scaled_2, on="Date", suffixes=(suffix[0], suffix[i - 3]))
        else:
            y_scaled = pd.merge(y_val_scaled, y_val_scaled_2, on="Date", suffixes=(suffix[i - 3], suffix[i - 4]))


X_scaled_train = pd.DataFrame(columns=X_scaled.columns)
X_scaled_test = pd.DataFrame(columns=X_scaled.columns)
X_scaled_test["X_date"] = pd.Series(dtype=X_scaled["Date"].dtype)
X_scaled_train = X_scaled[:100]
X_scaled_train["X_date"] = X_scaled["Date"][99]
prev_val = X_scaled["Date"][99]
for val in X_scaled["Date"][100:-10]:
    if val < split:
        X_train_new = X_scaled_train[X_scaled_train["X_date"] == prev_val][1:]
        X_train_new = pd.concat([X_train_new, X_scaled[X_scaled["Date"] == val]])
        X_train_new["X_date"] = val
        X_scaled_train = pd.concat([X_scaled_train, X_train_new])
        prev_val = val
    elif val == split:
        X_scaled_test = X_scaled_train[X_scaled_train["X_date"] == prev_val][1:]
        X_scaled_test = pd.concat([X_scaled_test, X_scaled[X_scaled["Date"] == val]])
        X_scaled_test["X_date"] = val
        prev_val = val
    else:
        X_test_new = X_scaled_test[X_scaled_test["X_date"] == prev_val][1:]
        X_test_new = pd.concat([X_test_new, X[X["Date"] == val]])
        X_test_new["X_date"] = val
        X_scaled_test = pd.concat([X_scaled_test, X_test_new])
        prev_val = val
X_scaled_train = X_scaled_train.groupby("X_date").agg(lambda x: list(x))
X_scaled_test = X_scaled_test.groupby("X_date").agg(lambda x: list(x))
y_scaled_train = pd.DataFrame(columns=y_scaled.columns)
y_scaled_train["X_date"] = pd.Series(dtype=y_scaled["Date"].dtype)
y_scaled_test = pd.DataFrame(columns=y_scaled.columns)
y_scaled_test["X_date"] = pd.Series(dtype=y_scaled["Date"].dtype)
y_scaled_train = y_scaled[100:110]
y_scaled_train["X_date"] = y_scaled["Date"][99]
prev_val = y_scaled["Date"][99]
for val in y_scaled["Date"][100:-10]:
    if val < split:
        y_train_new = y_scaled_train[y_scaled_train["X_date"] == prev_val][1:]
        y_train_new = pd.concat([y_train_new, y_scaled[y_scaled["Date"] == val]])
        y_train_new["X_date"] = val
        y_scaled_train = pd.concat([y_scaled_train, y_train_new])
        prev_val = val
    elif val == split:
        y_scaled_test = y_scaled_train[y_scaled_train["X_date"] == prev_val][1:]
        y_scaled_test = pd.concat([y_scaled_test, y_scaled[y_scaled["Date"] == val]])
        y_scaled_test["X_date"] = val
        prev_val = val
    else:
        y_test_new = y_scaled_test[y_scaled_test["X_date"] == prev_val][1:]
        y_test_new = pd.concat([y_test_new, y_scaled[y_scaled["Date"] == val]])
        y_test_new["X_date"] = val
        y_scaled_test = pd.concat([y_scaled_test, y_test_new])
        prev_val = val
y_scaled_train = y_scaled_train.groupby("X_date").agg(lambda x: list(x))
y_scaled_test = y_scaled_test.groupby("X_date").agg(lambda x: list(x))
"""

KeyboardInterrupt: 

In [53]:
X_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

y_train_used = np.array(y_train[:][no_date_y].values.tolist())
y_train_used = y_train_used.reshape((len(y_train_used), 100))
X_train_used = np.array(X_train[:][no_date].values.tolist())
X_train_used = X_train_used.reshape((len(X_train_used), 2500))
y_test_used = np.array(y_test[:][no_date_y].values.tolist())
y_test_used = y_test_used.reshape((len(y_test_used), 100))
X_test_used = np.array(X_test[:][no_date].values.tolist())
X_test_used = X_test_used.reshape((len(X_test_used), 2500))

X_scaler.fit_transform(X_train_used)
y_scaler.fit_transform(y_train_used)

X_scaled = X_scaler.transform(X_train_used)
y_scaled = y_scaler.transform(y_train_used)

X_test_scaled = X_scaler.transform(X_test_used)
y_test_scaled = y_scaler.transform(y_test_used)

def fit_regress_scaled(X_train_scaled, y_train_scaled, model):
    return model.fit(X_train_scaled, y_train_scaled)

def predict_regress_scaled(X_test_scaled, model):
    return model.predict(X_test_scaled)

def evaluate_regress_scaled(X_test_scaled, y_test_scaled, model):
    return mean_squared_error(predict_regress_scaled(X_test_scaled, model), y_test_scaled) 


def regressor_grid_scaled(n_estimators_lst=[1000], max_depth_lst=[15], eta_lst=[0.1]):
    models = []
    histories = []
    losses = []
    min_loss = float("inf")
    min_loss_index = -1
    min_loss_params = [None, None, None]
    i = 0
    for num in n_estimators_lst:
        for depth in max_depth_lst:
            for eta in eta_lst:
                models.append(create_regressor(num, depth, eta))
                histories.append(fit_regress_scaled(X_scaled, y_scaled, models[-1]))
                loss = evaluate_regress_scaled(X_test_scaled, y_test_scaled, models[-1])
                losses.append(loss)
                if loss < min_loss:
                    min_loss = loss
                    min_loss_index = i
                    min_loss_params = [num, depth, eta]
                i += 1
                print(i)
    return (models, histories, losses, min_loss_index, min_loss_params)

def inverse_scaler_evaluate(model, X_test_scaled, y_test):
    return mean_squared_error(y_scaler.inverse_transform(predict_regress_scaled(X_test_scaled, model)), y_test_used)

In [54]:
values_8 = regressor_grid_scaled([10, 15], [25, 30], eta_lst=[0.3, 0.5])

Exception ignored on calling ctypes callback function: <bound method DataIter._next_wrapper of <xgboost.data.SingleBatchInternalIter object at 0x000001696EE06F60>>
Traceback (most recent call last):
  File "C:\Users\barti\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\xgboost\core.py", line 585, in _next_wrapper
    def _next_wrapper(self, this: None) -> int:  # pylint: disable=unused-argument

KeyboardInterrupt: 


: 