In [84]:
import numpy as np
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

from tensorflow import keras
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from sklearn.model_selection import GridSearchCV
from tensorflow.keras import regularizers

import tensorflow as tf
from scikeras.wrappers import KerasRegressor

# On avge, 252 trading days a year
NUM_TRADING_DAYS = 252

# Generate random w (diff. portfolios)
NUM_PORTFOLIOS = 10000

# Our portfolio stocks
stocks = ['AAPL', 'NVDA', 'MAR', 'LMT', 'BA']

# Historical data - define START, END dates
start_date = '2010-01-01'
end_date = '2023-10-01'

In [85]:
merged = pd.read_excel("merged.xlsx", index_col=0)

In [86]:
import numpy as np
import pandas as pd

stock_dict = {}
for stock in stocks:
    new_df = merged.filter(regex=f'{stock}_')
    stock_dict[f"{stock}"] = new_df
stock_dict


{'AAPL':             AAPL_Adj Close  AAPL_Close  AAPL_Daily_Return   AAPL_High  \
 Date                                                                    
 2010-01-04        6.487534    7.643214           0.000000    7.660714   
 2010-01-05        6.498749    7.656429           0.001729    7.699643   
 2010-01-06        6.395380    7.534643          -0.015906    7.686786   
 2010-01-07        6.383554    7.520714          -0.001849    7.571429   
 2010-01-08        6.425996    7.570714           0.006649    7.571429   
 ...                    ...         ...                ...         ...   
 2023-10-13      178.850006  178.850006          -0.010293  181.929993   
 2023-10-16      178.720001  178.720001          -0.000727  179.080002   
 2023-10-17      177.149994  177.149994          -0.008785  178.419998   
 2023-10-18      175.839996  175.839996          -0.007395  177.580002   
 2023-10-19      175.460007  175.460007          -0.002161  177.839996   
 
               AAPL_Low  AAP

In [87]:
stock_names = ['AAPL_', 'NVDA_', 'MAR_', 'LMT_', 'BA_']

trunc_stock_dict = {}
for value in stock_dict.values():
    for stock in stock_names:
        value.columns = value.columns.str.replace(stock, "")
        trunc_stock_dict[stock[0:-1]] = value

vertical_concat = pd.concat([trunc_stock_dict["AAPL"], trunc_stock_dict["NVDA"], trunc_stock_dict["MAR"], trunc_stock_dict["LMT"], trunc_stock_dict["BA"]])
print(vertical_concat.columns)

Index(['Adj Close', 'Close', 'Daily_Return', 'High', 'Low', 'Lower_Bollinger',
       'MACD', 'Open', 'RSI', 'SMA', 'Sentiment_score', 'Upper_Bollinger',
       'Volatility', 'Volume'],
      dtype='object')


In [88]:
training_list = ["Daily_Return", "Adj Close", "Volume", "Sentiment_score"]
    # return training_list
trunc_df = vertical_concat.loc[:, training_list]
stock_values = vertical_concat.loc[:, training_list].values
# return stock_df

sc = MinMaxScaler(feature_range=(0,1))
stock_values = sc.fit_transform(stock_values)

X = []
y = []
length = len(stock_values)
sequence_length = 60

for i in range(length - sequence_length):
    X.append(stock_values[i:i + sequence_length])
    y.append(stock_values[i + sequence_length])

X, y = np.array(X), np.array(y)

train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(y_test.shape, X_test.shape)

(3462, 4) (3462, 60, 4)


In [89]:
!pip install scikeras



In [90]:
def create_lstm_model(units=50, learning_rate=0.001):
    model = Sequential()
    model.add(LSTM(units, kernel_regularizer=regularizers.l2(learning_rate), input_shape=(sequence_length, 4)))
    model.add(Dense(4))
    model.compile(optimizer='adam', loss='mean_squared_error')
    print("Creating model in function...")
    return model

lstm_model = KerasRegressor(model=create_lstm_model, learning_rate=0.001, units=50, verbose=0)

In [57]:
# Specify hyperparameter values to search
param_grid = {
    'units': [50, 100],
    'learning_rate': [0.001, 0.01],
    # from the loss curve earlier, early stopping
    # optimal number of epoch < 10, else overfit
    'epochs': [5, 10],
    'batch_size': [30, 50]
}

# Perform grid search
grid_search = GridSearchCV(estimator=lstm_model, param_grid=param_grid, cv=2, verbose=0)
print("Initialised GridSearchCV")
grid_search.fit(X_train, y_train, verbose = 0) 

# Get the best hyperparameters
best_units = grid_search.best_params_['units']
best_learning_rate = grid_search.best_params_['learning_rate']
best_epochs = grid_search.best_params_['epochs']
best_batch_size = grid_search.best_params_['batch_size']

print(best_units, best_learning_rate, best_epochs, best_batch_size)

Initialised GridSearchCV
Creating model in function...


2023-11-04 20:52:25.723266: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-04 20:52:25.841113: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-04 20:52:25.925143: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


KeyboardInterrupt: 

In [91]:
# Train the final model with the best hyperparameters

# best hyperparameters:
# best_units, best_learning_rate, best_epochs, best_batch_size
# 100, 0.001, 5, 30

best_units = 100
best_learning_rate = 0.001
best_epochs = 5
best_batch_size = 30

final_model = create_lstm_model(units=best_units, learning_rate=best_learning_rate)

Creating model in function...


In [92]:
final_model.fit(X_train,y_train, epochs=best_epochs, batch_size=best_batch_size)
# y_pred = final_model.predict(X_test)

# # only interested in daily returns
# y_test_slice = y_test[:,1] #MinMaxScaled
# y_pred_slice = y_pred[:,1] #MinMaxScaled

# # AAPL_test = sc.inverse_transform(AAPL_test)
# y_pred = sc.inverse_transform(y_pred)

Epoch 1/5


2023-11-04 22:18:56.889615: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-04 22:18:56.998099: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-04 22:18:57.070529: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x2e8d90510>

In [93]:
merged = pd.read_excel("merged.xlsx", index_col=0)

stock_dict = {}
for stock in stocks:
    new_df = merged.filter(regex=f'{stock}_')
    stock_dict[f"{stock}"] = new_df


In [94]:
def LSTM_model(key, stock_df):
    training_list = [f"{key}_Daily_Return", f"{key}_Adj Close", f"{key}_Volume", f"{key}_Sentiment_score"]
    # return training_list
    trunc_df = stock_df.loc[:, training_list]
    stock_values = stock_df.loc[:, training_list].values
    # return stock_df

    sc = MinMaxScaler(feature_range=(0,1))
    stock_values = sc.fit_transform(stock_values)

    X = []
    y = []
    length = len(stock_values)
    sequence_length = 60

    for i in range(length - sequence_length):
        X.append(stock_values[i:i + sequence_length])
        y.append(stock_values[i + sequence_length])

    X, y = np.array(X), np.array(y)
    # return(X.shape, y.shape)

    X_test = X

    y_pred = final_model.predict(X_test)
    y_pred = sc.inverse_transform(y_pred)

    return y_pred[:, 1]




In [95]:
# stock_dict
pred_dict = {}
for key in stock_dict.keys():
    stock_df = stock_dict[key]
    # print(key)
    y_pred = LSTM_model(key, stock_df)

    df2 = pd.DataFrame(index=stock_dict["AAPL"].index.copy())
    trunc = [0] * 60
    y_pred = y_pred.tolist()
    pred_dict[f"{key}"] = trunc + y_pred





2023-11-04 22:19:38.605662: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-04 22:19:38.640556: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




{'AAPL': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  12.560555458068848,
  12.208791732788086,
  12.332592010498047,
  12.741918563842773,
  12.658163070678711,
  12.770834922790527,
  12.475645065307617,
  12.4623384475708,
  12.357783317565918,
  12.224438667297363,
  12.743170738220215,
  13.200685501098633,
  13.026241302490234,
  12.653188705444336,
  11.995138168334961,
  14.46095085144043,
  15.406685829162598,
  15.808772087097168,
  15.008910179138184,
  13.46839427947998,
  13.027726173400879,
  13.777364730834961,
  12.343846321105957,
  12.961576461791992,
  11.815936088562012,
  11.41165828704834,
  9.989869117736816,
  8.380529403686523,
  11.262121200561523,
  11.746953964233398,
  12.823148727416992,
  12.540963172912

In [96]:
df = pd.DataFrame(data=pred_dict)
df.to_excel('predictions.xlsx')