## LSTM Network Training

In [None]:
import os
import warnings
import pandas as pd

from src.StockDataProcessor import StockDataProcessor
from src.LongShortTermMemory import LSTMModel

# Suppress TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or '3' to suppress all messages

# Suppress other warnings
warnings.filterwarnings("ignore", category=UserWarning, module="tensorflow")

from datetime import timedelta

In [None]:
# for combined per-day and per-minute interval data
# FOLDER_PREFIX = "data/comb/"
# TIME_STEPS = 60
# PREDICTION_RANGE = 60 # *24

In [None]:
# for per-minute interval data
# FOLDER_PREFIX = "data/min/"
# TIME_STEPS = 60
# PREDICTION_RANGE = 60

In [None]:
# for per-day interval data
FOLDER_PREFIX = "data/day/"
TIME_STEPS = 1
PREDICTION_RANGE = 5

In [None]:
# import secrets
# TODAY_RUN = datetime.today().strftime(DATE_FORMAT)
# TOKEN = '_' + TODAY_RUN + '_' + secrets.token_hex(16)
TOKEN = ""

In [None]:
STOCK_TICKER = "GOOG"
EPOCHS = 100
BATCH_SIZE = 10
TRAIN_DATA_SPLIT = 0.65
RUN_FOLDER = f"{FOLDER_PREFIX}{STOCK_TICKER}{TOKEN}/"
WORK_DIR = os.path.join(os.getcwd(), RUN_FOLDER)
CSV_FILE = f"{WORK_DIR}data.csv"

PROJECT_FOLDER = os.path.join(os.getcwd(), RUN_FOLDER)
if not os.path.exists(PROJECT_FOLDER):
    os.makedirs(PROJECT_FOLDER)

In [None]:
# STOCK_START_DATE = pd.to_datetime("2017-06-07")
# STOCK_VALIDATION_DATE = pd.to_datetime("2022-06-07")
# STOCK_END_DATE = pd.to_datetime("2024-06-07")

data = pd.read_csv(CSV_FILE)
first_and_last_rows = data.iloc[[0, -1]]

STOCK_START_DATE = pd.to_datetime(first_and_last_rows.iloc[0]['Datetime'])
total_number_of_rows = len(data)
validation_date_index = int(total_number_of_rows * TRAIN_DATA_SPLIT)

STOCK_VALIDATION_DATE = pd.to_datetime(data.iloc[validation_date_index]['Datetime'])
STOCK_END_DATE = pd.to_datetime(first_and_last_rows.iloc[1]['Datetime'])

In [None]:
# DATE_FORMAT = "%Y-%m-%d"
# .strftime(DATE_FORMAT)
print('Ticker: ' + STOCK_TICKER)
print('Start Date: ' + STOCK_START_DATE.__str__())
print('Validation Date: ' + STOCK_VALIDATION_DATE.__str__())
print('End Date: ' + STOCK_END_DATE.__str__())
print('Test Run Folder: ' + RUN_FOLDER)

### Prepare the data for train and test

In [None]:
(x_train, y_train), (x_test, y_test), (training_data, test_data) = StockDataProcessor.load_csv_transform_to_numpy(TIME_STEPS, CSV_FILE, STOCK_VALIDATION_DATE)

### Train

In [None]:
# Initilize model class
lstm = LSTMModel(x_train.shape, EPOCHS, BATCH_SIZE, PROJECT_FOLDER)

In [None]:
# Execute model training
lstm.train(x_train, y_train, x_test, y_test, training_data, test_data)

### Prediction

In [None]:
data = pd.read_csv(CSV_FILE)

latest_close_price = data['Close'].iloc[-1]
latest_date = data['Datetime'].iloc[-1]

print(f"{latest_close_price}: latest_close_price")
print(f"{latest_date}: latest_date")

start_date = pd.to_datetime(latest_date) + timedelta(1)
end_date = pd.to_datetime(start_date) + timedelta(PREDICTION_RANGE)

print(f"{start_date}: start_date")
print(f"{end_date}: end_date")

In [None]:
lstm.infer(start_date, end_date, latest_close_price, WORK_DIR, TIME_STEPS)

### Unseen data

In [None]:
UNSEEN_CSV = f"{WORK_DIR}unseen.csv"
unseen_data = pd.read_csv(UNSEEN_CSV, index_col=0)

latest_close_price = unseen_data['Close'].iloc[-1]
latest_date = unseen_data['Datetime'].iloc[-1]
first_date = unseen_data['Datetime'].iloc[0]

print(f"{latest_close_price}: latest_close_price")
print(f"{latest_date}: latest_date")
print(f"{first_date}: first_date")

start_date = pd.to_datetime(first_date)
end_date = pd.to_datetime(latest_date)

print(f"{start_date}: start_date")
print(f"{end_date}: end_date")

In [None]:
import tensorflow as tf
from src.StockDataVisualizer import StockDataVisualizer

if unseen_data.shape[0] > 0:

    unseen_data.drop(columns=["Open", "High", "Low", "Adj Close", "Volume"], errors='ignore', inplace=True)
    unseen_data.reset_index(drop=True, inplace=True)
    unseen_data.set_index('Datetime', inplace=True)

    model_path = os.path.join(PROJECT_FOLDER, 'model_weights.keras')
    model = tf.keras.models.load_model(model_path)
    model.summary()

    dates_range = end_date - start_date
    dates_range = dates_range.days + 1
    sample_data = x_test[-1*dates_range:]

    predicted_data = model.predict(sample_data)
    predicted_data = StockDataProcessor.min_max.inverse_transform(predicted_data)
    predicted_data = pd.DataFrame(predicted_data, columns=['Predicted'])

    predicted_data['Datetime'] = unseen_data.index
    predicted_data.reset_index(drop=True, inplace=True)
    predicted_data.set_index('Datetime', inplace=True)

    # Plotting predictions
    StockDataVisualizer.plot_future(unseen_data, predicted_data, PROJECT_FOLDER)