## LSTM Network Training

In [1]:
import os
import warnings
import pandas as pd

from src.stock_prediction_class import StockClass
from src.stock_prediction_numpy import DataClass
from src.stock_prediction_deep_learning import train_LSTM_network

# Suppress TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or '3' to suppress all messages

# Suppress other warnings
warnings.filterwarnings("ignore", category=UserWarning, module="tensorflow")

import tensorflow as tf
import matplotlib.pyplot as plt
from datetime import timedelta, datetime

2024-06-02 22:50:43.604501: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# import secrets
# TOKEN = STOCK_TICKER + '_' + TODAY_RUN + '_' + secrets.token_hex(16)

In [6]:
STOCK_TICKER = "GOOG"
STOCK_START_DATE = pd.to_datetime("2018-05-30")
STOCK_VALIDATION_DATE = pd.to_datetime("2021-05-30")
STOCK_END_DATE = pd.to_datetime("2024-05-30")
EPOCHS = 100
BATCH_SIZE = 10
TIME_STEPS = 1
TODAY_RUN = datetime.today().strftime("%Y%m%d")
TOKEN = "GOOG"
FOLDER_PREFIX = "data/day/"
RUN_FOLDER = f"{FOLDER_PREFIX}{TOKEN}/"
WORK_DIR = os.path.join(os.getcwd(), RUN_FOLDER)
CSV_FILE = f"{WORK_DIR}data.csv"

print('Ticker: ' + STOCK_TICKER)
print('Start Date: ' + STOCK_START_DATE.strftime("%Y-%m-%d"))
print('Validation Date: ' + STOCK_VALIDATION_DATE.strftime("%Y-%m-%d"))
print('Test Run Folder: ' + RUN_FOLDER)

PROJECT_FOLDER = os.path.join(os.getcwd(), RUN_FOLDER)
if not os.path.exists(PROJECT_FOLDER):
    os.makedirs(PROJECT_FOLDER)

Ticker: GOOG
Start Date: 2018-05-30
Validation Date: 2021-05-30
Test Run Folder: data/day/GOOG/


In [7]:
stock_prediction = StockClass(STOCK_TICKER, 
                                   STOCK_START_DATE,
                                   STOCK_END_DATE,
                                   STOCK_VALIDATION_DATE, 
                                   PROJECT_FOLDER, 
                                   EPOCHS,
                                   TIME_STEPS,
                                   TOKEN,
                                   BATCH_SIZE)

In [8]:
# Data loading
stock_data = DataClass()
# (x_train, y_train), (x_test, y_test), (training_data, test_data) = data.download_transform_to_numpy(
    # stock.ticker, 
    # stock.time_steps, 
    # stock.project_folder,
    # stock.start_date,
    # stock.end_date,
    # stock.validation_date)
(x_train, y_train), (x_test, y_test), (training_data, test_data) = stock_data.load_csv_transform_to_numpy(TIME_STEPS, CSV_FILE, STOCK_VALIDATION_DATE)

mean: [0.5        0.29170237]
max 1.0
min 0.0
Std dev: [0.28905723 0.23739201]


ValueError: cannot reshape array of size 1510 into shape (755,1,1)

In [None]:
# Execute Deep Learning model
train_LSTM_network(stock_data, stock_prediction, x_train, y_train, x_test, y_test, training_data, test_data)

## Prediction

In [None]:
def infer_data(stock_data, start_date, end_date, latest_close_price, work_dir):
    
    x_test, y_test, test_data = stock_data.generate_future_data(TIME_STEPS, start_date, end_date, latest_close_price)

    # Check if the future data is not empty
    if x_test.shape[0] > 0:
        # load the weights from our best model
        model = tf.keras.models.load_model(os.path.join(work_dir, 'model_weights.keras'))
        model.summary()

        # perform a prediction
        test_predictions_baseline = model.predict(x_test)
        test_predictions_baseline = stock_data.min_max.inverse_transform(test_predictions_baseline)
        test_predictions_baseline = pd.DataFrame(test_predictions_baseline, columns=['Predicted_Price'])

        # Combine the predicted values with dates from the test data
        predicted_dates = pd.date_range(start=test_data.index[0], periods=len(test_predictions_baseline))
        test_predictions_baseline['Date'] = predicted_dates
        
        # Reset the index for proper concatenation
        test_data.reset_index(inplace=True)
        
        # Concatenate the test_data and predicted data
        combined_data = pd.concat([test_data, test_predictions_baseline], ignore_index=True)
        
        # Plotting predictions
        plt.figure(figsize=(14, 5))
        plt.plot(combined_data['Date'], combined_data.Close, color='green', label='Simulated [' + STOCK_TICKER + '] price')
        plt.plot(combined_data['Date'], combined_data['Predicted_Price'], color='red', label='Predicted [' + STOCK_TICKER + '] price')
        plt.xlabel('Time')
        plt.ylabel('Price [USD]')
        plt.legend()
        plt.title('Simulated vs Predicted Prices')
        plt.savefig(os.path.join(work_dir, 'future_comparison.png'))
        plt.show()
    else:
        print("Error: Future data is empty.")

In [None]:
stock_data = DataClass()

data = pd.read_csv(CSV_FILE)

latest_close_price = data['Close'].iloc[-1]
latest_date = data['Date'].iloc[-1]

print(f"{latest_close_price}: latest_close_price")
print(f"{latest_date}: latest_date")

start_date = pd.to_datetime(latest_date) + timedelta(1)
# Specify the next X days
X = 6
end_date = pd.to_datetime(latest_date) + timedelta(TIME_STEPS * X)

print(f"{start_date}: start_date")
print(f"{end_date}: end_date")

infer_data(stock_data, start_date, end_date, latest_close_price, WORK_DIR)