In [1]:
import logging
import tensorflow as tf


from src.common.plots import Visualize as V
from src.data.get_data import CSVsLoader
from src.common.logs import setup_logging, log_model_info
from src.features.build_features import FeatureEngineering as FE

from src.models_service.models_service import TensorflowDataPreparation as TFDataPrep
from src.models_service.models_service import TensorflowModelService as TFModelService
from src.models_service.errors import ErrorsCalculation as ErrorCalc
from env import Env


logger = setup_logging(logger_name=__name__,
                        console_level=logging.INFO, 
                        log_file_level=logging.INFO)

DATA_DIR_PROCESSED = (f'{Env.PROJECT_ROOT}/data/03_processed/daily_full')

config = {
    'AV': {
        'key': '',
        'ticker': 'MSFT',
        'outputsize': 'full',
        'key_adjusted_close': 'Adj Close',
        'key_volume': 'Volume',
    },
    'data': {
        'test_size': 0.05,
    }, 
    'model': {
        'name': 'LSTM', 
        'window': [10, 20, 40, 60, 120, 240],
        'batch_size' : 32,
        'epochs' : 500,
        'shuffle_buffer_size' : 5500, # https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle
        'optimizer': 'adam',
        'loss': 'huber_loss',
    },
    'plots': {
        'loss_zoom': 0.9,
        'show': False,
    },
}

2023-10-09 15:49:28.563523: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-09 15:49:28.563587: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-09 15:49:28.563615: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-10-09 15:49:28.570702: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
for window_size in config['model']['window']:
    config_in_loop = config.copy()
    config_in_loop['model']['window'] = window_size
    # -----------------------------Data----------------------------------------
    df = CSVsLoader(ticker=config['AV']['ticker'], directory=DATA_DIR_PROCESSED)
    df = FE.create_features(df, logger)
    df_train, df_test = TFDataPrep.split_train_test(df, config['data']['test_size'], logger)

    df_train_X = df_train.drop(columns=['Adj Close'])
    df_train_y = df_train['Adj Close']

    df_test_X = df_test.drop(columns=['Adj Close'])
    df_test_X = FE.rename_shifted_columns(df_test_X)
    df_test_y = df_test['Adj Close']


    train_dataset_X, scalers_X = TFDataPrep.windowed_dataset_X(df_train_X, 
                                                                window_size=config_in_loop['model']['window'], 
                                                                logger=logger,
                                                                verbose=False)
    train_dataset_y = TFDataPrep.windowed_dataset_y(df_train_y, 
                                        window_size=config_in_loop['model']['window'], 
                                        logger=logger,
                                        verbose=False)
    train_dataset = TFDataPrep.combine_datasets(train_dataset_X, train_dataset_y, config, logger, verbose=False)

    # -----------------------------Model Architecture--------------------------
    model = tf.keras.models.Sequential([
            tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(None, 2)),
            tf.keras.layers.LSTM(32),
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(1),
            ]
        )

    model = TFModelService.name_model(model, config)
    log_model_info(config, model, logger)


    # -----------------------------Model Training-------------------------------
    model.compile(loss=config['model']['loss'], 
                optimizer=config['model']['optimizer'], 
                metrics=['mae','mape'],
                )    

    history = model.fit(train_dataset, epochs=config['model']['epochs'])

    # Plot MAE and Loss
    mae=history.history['mae']
    loss=history.history['loss']
    zoom = int(len(mae) * config['plots']['loss_zoom'])

    V.plot_series(x=range(config['model']['epochs'])[-zoom:],
                    y=(mae[-zoom:],loss[-zoom:]),
                    model_name=config['model']['name'],
                    title='MAE_and_Loss',
                    xlabel='Epochs',
                    ylabel=f'MAE and Loss',
                    legend=['MAE', f'Loss - {config["model"]["loss"]}'],
                    show=config['plots']['show'],
                )

    # # Save the model
    TFModelService.save_model(model=model, logger=logger)    
    TFModelService.save_scalers(scalers=scalers_X, model_name=model._name ,logger=logger)


    # #------------------------Load the model if necessary--------------------------
    # model_skaler_name = 'LSTM_42113_2023_10_03__04_26'
    # model = TFModelService.load_model(model_name=model_skaler_name, logger=logger)
    # scalers_X = TFModelService.load_scalers(model_name=model_skaler_name, logger=logger)

    # -----------------------------Predictions-----------------------------------
    results = TFModelService.model_forecast(model=model, 
                                            df=df_test_X,
                                            window_size=config_in_loop['model']['window'],
                                            scalers=scalers_X,
                                            verbose=False)

    df_test_plot_y = TFModelService.prep_test_df_shape(df_test_y, config)

    V.plot_series(  x=df_test_plot_y.index,  # as dates
                    y=(df_test_plot_y, results),
                    model_name=model._name,
                    title='Preds',
                    xlabel='Date',
                    ylabel='Price',
                    legend=['Actual', 'Predicted'],
                    show=config['plots']['show'],)

    # -----------------------Calculate Errors----------------------------------
    naive_forecast = ErrorCalc.get_naive_forecast(df).loc[df_test_plot_y.index] # Getting same days as results
    rmse, mae, mape, mase = ErrorCalc.calc_errors(df_test_plot_y, results, naive_forecast)
    ErrorCalc.save_errors_to_table(model._name, {'rmse': rmse, 'mae': mae, 'mape': mape, 'mase': mase})

2023-10-09 15:49:31 - src.data.get_data - INFO - Loaded "../p7-trading-bot/data/03_processed/daily_full". Number data points 5995. From "1999-11-01 00:00:00" to "2023-08-29 00:00:00"
2023-10-09 15:49:31 - __main__ - INFO - df.shape: (5994, 3)
2023-10-09 15:49:31 - __main__ - INFO - df.columns: Index(['Adj Close', 'Adj Close - 1', 'Volume - 1'], dtype='object')
2023-10-09 15:49:31 - __main__ - INFO - df_train.shape: (5695, 3), df_test.shape: (299, 3)


2023-10-09 15:49:31.470679: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-09 15:49:31.475060: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-09 15:49:31.475092: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-09 15:49:31.476132: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-09 15:49:31.476164: I tensorflow/compile

Epoch 1/500


2023-10-09 15:49:38.173485: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8600
2023-10-09 15:49:39.150663: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f54240d5cc0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-10-09 15:49:39.150707: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
2023-10-09 15:49:39.155560: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-10-09 15:49:39.240923: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/500


2023-10-09 15:49:42.263384: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 7111026389408225104


Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 

2023-10-09 16:15:03 - src.common.plots - INFO - Plot "LSTM-MAE_and_Loss-2023-10-09_16-15-03-2.png" saved to "/workspaces/p7-trading-bot/figures/LSTM-MAE_and_Loss-2023-10-09_16-15-03-2.png"
2023-10-09 16:15:03 - __main__ - INFO - Model saved as MSFT_LSTM_W10_SBS5500_B32_E500_P42113_2023_10_09__15_49.keras
2023-10-09 16:15:03 - __main__ - INFO - Scalers saved: "/workspaces/p7-trading-bot/models_trained/MSFT_LSTM_W10_SBS5500_B32_E500_P42113_2023_10_09__15_49_scalers.pkl"




2023-10-09 16:15:05.589824: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 7111026389408225104
2023-10-09 16:15:06 - src.common.plots - INFO - Plot "LSTM-Predictions-2023-10-09_16-15-06-6.png" saved to "/workspaces/p7-trading-bot/figures/LSTM-Predictions-2023-10-09_16-15-06-6.png"
2023-10-09 16:15:06 - src.models_service.errors - INFO - Test RMSE: $ 4.71
2023-10-09 16:15:06 - src.models_service.errors - INFO - Test MAE : $ 4.644
2023-10-09 16:15:06 - src.models_service.errors - INFO - Test MAPE:   0.017
2023-10-09 16:15:06 - src.models_service.errors - INFO - Test MASE:   1.18
2023-10-09 16:15:06 - src.models_service.errors - INFO - Errors saved to for MSFT_LSTM_W10_SBS5500_B32_E500_P42113_2023_10_09__15_49 model to "logs/models_table.csv" file.
2023-10-09 16:15:06 - src.data.get_data - INFO - Loaded "../p7-trading-bot/data/03_processed/daily_full". Number data points 5995. From "1999-11-01 00:00:00" to "2023-08-29 00:00:00"
2023-10

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

2023-10-09 16:40:59 - src.common.plots - INFO - Plot "LSTM-MAE_and_Loss-2023-10-09_16-40-59-1.png" saved to "/workspaces/p7-trading-bot/figures/LSTM-MAE_and_Loss-2023-10-09_16-40-59-1.png"
2023-10-09 16:40:59 - __main__ - INFO - Model saved as MSFT_LSTM_W20_SBS5500_B32_E500_P42113_2023_10_09__16_15.keras
2023-10-09 16:40:59 - __main__ - INFO - Scalers saved: "/workspaces/p7-trading-bot/models_trained/MSFT_LSTM_W20_SBS5500_B32_E500_P42113_2023_10_09__16_15_scalers.pkl"




2023-10-09 16:41:01.542635: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 7111026389408225104
2023-10-09 16:41:01 - src.common.plots - INFO - Plot "LSTM-Predictions-2023-10-09_16-41-01-6.png" saved to "/workspaces/p7-trading-bot/figures/LSTM-Predictions-2023-10-09_16-41-01-6.png"
2023-10-09 16:41:01 - src.models_service.errors - INFO - Test RMSE: $ 0.954
2023-10-09 16:41:01 - src.models_service.errors - INFO - Test MAE : $ 0.625
2023-10-09 16:41:01 - src.models_service.errors - INFO - Test MAPE:   0.002
2023-10-09 16:41:01 - src.models_service.errors - INFO - Test MASE:   0.152
2023-10-09 16:41:01 - src.models_service.errors - INFO - Errors saved to for MSFT_LSTM_W20_SBS5500_B32_E500_P42113_2023_10_09__16_15 model to "logs/models_table.csv" file.
2023-10-09 16:41:01 - src.data.get_data - INFO - Loaded "../p7-trading-bot/data/03_processed/daily_full". Number data points 5995. From "1999-11-01 00:00:00" to "2023-08-29 00:00:00"
2023-

Epoch 1/500
Epoch 2/500


2023-10-09 16:41:09.283096: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 7111026389408225104


Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 

In [None]:
# # -----------------------------Data----------------------------------------
# df = CSVsLoader(ticker=config['AV']['ticker'], directory=DATA_DIR_PROCESSED)
# df = FE.create_features(df, logger)
# df_train, df_test = TFDataPrep.split_train_test(df, config['data']['test_size'], logger)

# df_train_X = df_train.drop(columns=['Adj Close'])
# df_train_y = df_train['Adj Close']

# df_test_X = df_test.drop(columns=['Adj Close'])
# df_test_X = FE.rename_shifted_columns(df_test_X)
# df_test_y = df_test['Adj Close']


# train_dataset_X, scalers_X = TFDataPrep.windowed_dataset_X(df_train_X, 
#                                                             window_size=config['model']['window'], 
#                                                             logger=logger,
#                                                             verbose=False)
# train_dataset_y = TFDataPrep.windowed_dataset_y(df_train_y, 
#                                     window_size=config['model']['window'], 
#                                     logger=logger,
#                                     verbose=False)
# train_dataset = TFDataPrep.combine_datasets(train_dataset_X, train_dataset_y, config, logger, verbose=True)

In [None]:
# # -----------------------------Model Architecture--------------------------
# model = tf.keras.models.Sequential([
#         tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(None, 2)),
#         tf.keras.layers.LSTM(32),
#         tf.keras.layers.Dense(128, activation="relu"),
#         tf.keras.layers.Dense(64, activation="relu"),
#         tf.keras.layers.Dense(1),
#         ]
#     )

# model = TFModelService.name_model(model, config)
# log_model_info(config, model, logger)


# # -----------------------------Model Training-------------------------------
# model.compile(loss=config['model']['loss'], 
#             optimizer=config['model']['optimizer'], 
#             metrics=['mae','mape'],
#             )    

# history = model.fit(train_dataset, epochs=config['model']['epochs'])

# # Plot MAE and Loss
# mae=history.history['mae']
# loss=history.history['loss']
# zoom = int(len(mae) * config['plots']['loss_zoom'])

# V.plot_series(x=range(config['model']['epochs'])[-zoom:],
#                 y=(mae[-zoom:],loss[-zoom:]),
#                 model_name=config['model']['name'],
#                 title='MAE_and_Loss',
#                 xlabel='Epochs',
#                 ylabel=f'MAE and Loss',
#                 legend=['MAE', f'Loss - {config["model"]["loss"]}'],
#                 show=config['plots']['show'],
#             )

In [None]:
# # # Save the model
# TFModelService.save_model(model=model, logger=logger)    
# TFModelService.save_scalers(scalers=scalers_X, model_name=model._name ,logger=logger)


# # #------------------------Load the model if necessary--------------------------
# # model_skaler_name = 'LSTM_42113_2023_10_03__04_26'
# # model = TFModelService.load_model(model_name=model_skaler_name, logger=logger)
# # scalers_X = TFModelService.load_scalers(model_name=model_skaler_name, logger=logger)

In [None]:
# # -----------------------------Predictions-----------------------------------
# results = TFModelService.model_forecast(model=model, 
#                                         df=df_test_X,
#                                         window_size=config['model']['window'],
#                                         scalers=scalers_X,
#                                         verbose=False)

# df_test_plot_y = TFModelService.prep_test_df_shape(df_test_y, config)

# V.plot_series(  x=df_test_plot_y.index,  # as dates
#                 y=(df_test_plot_y, results),
#                 model_name=config['model']['name'],
#                 title='Predictions',
#                 xlabel='Date',
#                 ylabel='Price',
#                 legend=['Actual', 'Predicted'],
#                 show=config['plots']['show'],)

In [None]:
# # -----------------------Calculate Errors----------------------------------
# naive_forecast = ErrorCalc.get_naive_forecast(df).loc[df_test_plot_y.index] # Getting same days as results
# rmse, mae, mape, mase = ErrorCalc.calc_errors(df_test_plot_y, results, naive_forecast)
# ErrorCalc.save_errors_to_table(model._name, {'rmse': rmse, 'mae': mae, 'mape': mape, 'mase': mase})