# Multivariate LSTM Model Development (Local)
The purpose of this notebook is to construct multivariate LSTM model to predict feature stock prices. Feature construction and data integrity check has already been completed on data being used to construct this model. 

## Load training and testing data


In [1]:
# Import Libraries

import psycopg2
from psycopg2 import sql
import pandas as pd

In [73]:
# Functions and variables to query local postgresql server

USER = 'postgres'
PASSWORD = 'password'
HOST = 'localhost'
PORT = '5433'
DATABASE = 'tradinganalytics_db'

#Initialize Database Connection
host = HOST
username = USER
password = PASSWORD
port = PORT
database = DATABASE

def query(query,connection):
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        result = cursor.fetchall()
        cursor.close()
        return result
    except Exception as e:
        print("Error executing query:", e)
        return None

In [80]:
# Get ml data from postgresql local and convert to dataframe

conn = psycopg2.connect(
    host = host,
    database = database,
    user = username,
    port = port,
    password = password
)

ticker = "AMD"

query_string = "select datetime, close, open_close_diff, cumulative_volume from dm_tradinganalytics_sch.view_ml_training_data where ticker = " + "'" + ticker + "'"

result = query(query_string,conn)

df = pd.DataFrame(result,columns=["datetime","close","open_close_diff","cumulative_volume"])

conn.close()

In [81]:
df.head()

Unnamed: 0,datetime,close,open_close_diff,cumulative_volume
0,2023-12-27 09:30:00,144.31390380859375,-0.40609741210937,-3441430
1,2023-12-27 09:35:00,143.9654998779297,-0.34449768066405,-5166410
2,2023-12-27 09:40:00,144.44180297851562,0.49180603027342,-3880099
3,2023-12-27 09:45:00,144.09060668945312,-0.37629699707032,-5034796
4,2023-12-27 09:50:00,143.7899932861328,-0.2900085449219,-5967734


## Construct MultiVariate LSTM Model

In [70]:
# Import ml Libraries
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
import numpy as np

In [82]:
stock_dates = pd.to_datetime(df['datetime']) # Save datetime column for future plotting

df_for_modeling = df.drop('datetime',axis=1) # Remove datetime for training data set

In [83]:
df_for_modeling.head()

Unnamed: 0,close,open_close_diff,cumulative_volume
0,144.31390380859375,-0.40609741210937,-3441430
1,143.9654998779297,-0.34449768066405,-5166410
2,144.44180297851562,0.49180603027342,-3880099
3,144.09060668945312,-0.37629699707032,-5034796
4,143.7899932861328,-0.2900085449219,-5967734


In [84]:
# Data normalization
scaler = StandardScaler()
scaler = scaler.fit(df_for_modeling)
scaled_df = scaler.transform(df_for_modeling)

# Parse data to train and test
percent80index = int(len(df)*.80)
train_set_scaled = scaled_df[:percent80index]
test_set_scaled = scaled_df[percent80index:]

In [123]:
# Window size of 5 days
days_past = 5
num5minperday = (6 * 60 + 30) / 5 # this gets number of 5 minutes in a trading day excluding pre and post market hours
n_past = int(num5minperday * days_past) # Use data from last 5 days
n_future = 1 # Predict the next 5 minutes

# Initalization variables to store LSTM tailored training sets for modelling
x_train = []
y_train = []


In [124]:
# Construct x and y training sets

for i in range(n_past,len(train_set_scaled)):
    x_train.append(train_set_scaled[i-n_past:i,0:train_set_scaled.shape[1]])
    y_train.append(train_set_scaled[i+1:i+n_future,0])

In [67]:
# Convert from list to array
x_train, y_train = np.array(x_train), np.array(y_train)

In [69]:
x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1],8))