<a href="https://colab.research.google.com/github/MPrazzoli/AI_driven_investment_strategy/blob/main/LSTM_single_stock_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!python --version

Python 3.7.10


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import csv
from datetime import date, timedelta
import tensorflow as tf

# multivariate lstm example
from numpy import array
from numpy import hstack
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

In [4]:
class CorrelationClass(object):
    # Initialization of the CorrelationClass object with the ticker symbol
    def __init__(self, ticker):
        self.principal_ticker = ticker
        self.correlated_tickers_for_open_prediction = None # we compute the correlation between open and all other stock close/adjclose price
        self.correlated_tickers_for_close_prediction = None # we compute the correlation between close/adjclose and all other stock open price

In [5]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps):
  X, y = list(), list()
  for i in range(len(sequences)):
    # find the end of this pattern
    end_ix = i + n_steps
    # check if we are beyond the dataset
    if end_ix > len(sequences):
        break
    # gather input and output parts of the pattern
    seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
    X.append(seq_x)
    y.append(seq_y)
  return array(X), array(y)

In [6]:
open_df = pd.read_csv('/content/drive/MyDrive/_8_0_exported_dataframe/open', index_col='date')
adjclose_df = pd.read_csv('/content/drive/MyDrive/_8_0_exported_dataframe/adjclose', index_col='date')

In [7]:
ticker_list = open_df.columns

In [8]:
correlation_object_dictionary = {'{0}'.format(ticker): CorrelationClass(ticker=ticker) for ticker in ticker_list}

In [9]:
with open('/content/drive/MyDrive/_8_1_correlation_array/open_corr_prediction', 'r', newline='') as f:
    reader = csv.reader(f)
    for i, row in enumerate(reader):
      correlation_object_dictionary['{0}'.format(ticker_list[i])].correlated_tickers_for_open_prediction = row

In [10]:
with open('/content/drive/MyDrive/_8_1_correlation_array/close_corr_prediction', 'r', newline='') as f:
    reader = csv.reader(f)
    for i, row in enumerate(reader):
      correlation_object_dictionary['{0}'.format(ticker_list[i])].correlated_tickers_for_close_prediction = row

In [11]:
time = np.arange(len(open_df.index))
time = np.delete(time, -1)

# parameters
split_time = round(len(open_df.index) * .80)
# choose a number of time steps
n_steps = 10 # numero di osservazioni da tenere in conto per una previsione... in questo caso 10 giorni
# choose batch size
batch_size = 32
# choose shuffle buffer size
shuffle_buffer_size = 500

In [12]:
principal = 'ABC'

# df_for_principal_and_indipendent = pd.concat([adjclose_df[correlation_object_dictionary['{0}'.format(principal)].correlated_tickers_for_open_prediction], stock_object_dictionary['{0}'.format(principal)].history['Open'].shift(-1)], axis=1)[:-1]
dataset = pd.concat([adjclose_df[correlation_object_dictionary['{0}'.format(principal)].correlated_tickers_for_open_prediction[1:]], open_df[principal].shift(-1)], axis=1)[:-1].to_numpy()
# convert into input/output
X, y = split_sequences(dataset, n_steps)
# splitting between train and validation
time_train = time[:split_time]
x_train = X[:split_time]
y_train = y[:split_time]
time_valid = time[split_time:]
x_valid = X[split_time:]
y_valid = y[split_time:]

# THE MODEL
# the dataset knows the number of features, e.g. 2 or use the second dimension of X
n_features = X.shape[2]

# define model
LSTMmodel = Sequential()
LSTMmodel.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
LSTMmodel.add(Dense(1))
LSTMmodel.compile(optimizer='adam', loss = tf.keras.losses.Huber(), metrics=["mae"])
# fit model
history = LSTMmodel.fit(X, y, epochs=200, verbose=0)

prediction trial

In [13]:
LSTMmodel.predict(X[0].reshape((1, n_steps, n_features)), verbose=0)

array([[98.72611]], dtype=float32)