In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import joblib
import tabulate as tb
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.losses import Huber
from tensorflow.keras import Sequential, layers, optimizers, losses
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import os, random, numpy as np, tensorflow as tf
from model import FinancialLSTMModel



In [16]:
CSV_PATH = './../data/GOOGL_1h.csv'
DATE_COL = 'Datetime'

SEQ_LENGTH = 24
BATCH_SIZE = 32
LEARNING_RATE = 0.001
EPOCHS = 100
TEST_RATIO = 0.2
VAL_SPLIT = 0.2

FEATURES = [
    ("Close", "minmax"),
    ("Volume", "minmax"),
    # ("High", "minmax"),
    # ("Low", "minmax"),
    # ("Open", "minmax")
    # ("rsi_14", "minmax"),
    # ("rsi_28", "minmax"),
    # ("rsi_7", "minmax"),
    # ("macd", "standard"),
    # ("ema_20", "standard"),
    # ("ema_50", "standard"),
    # ("stoch_k", "minmax"),
    # ("stoch_d", "minmax"),
    # ("roc", "standard"),
    # ("adx", "minmax"),
    # ("di_plus", "minmax"),
    # ("di_minus", "minmax"),
    # ("atr", "standard"),
    # ("close_pos", "none"),
    # ("body_range_ratio", "none"),
]

TARGET = 'direction'

def build_hidden_layers():
    return [
        tf.keras.layers.LSTM(96, return_sequences=True, recurrent_dropout=0.1),
        tf.keras.layers.LayerNormalization(),
        tf.keras.layers.LSTM(96, return_sequences=False, recurrent_dropout=0.1),
        tf.keras.layers.LayerNormalization(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.BatchNormalization(),
    ]

In [17]:
SEED = 42
os.environ["PYTHONHASHSEED"]=str(SEED)
os.environ["TF_DETERMINISTIC_OPS"]="1"
os.environ["TF_CUDNN_DETERMINISTIC"]="1"
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
# optionally limit threads
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)


In [18]:
model = FinancialLSTMModel(
    csv_path=CSV_PATH,
    features_scales=FEATURES,
    target_col="direction",
    datetime_col="Datetime",

    seq_length=SEQ_LENGTH,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    epochs=EPOCHS,
    test_ratio=TEST_RATIO,
    val_split=VAL_SPLIT,
)

model.prepare_data()
model.build_model(build_hidden_layers())
model.train()
ev = model.evaluate()
print(ev)

Data prepared: 2134 train samples, 533 val samples, 666 test samples.
Train data feature stats:
              Close        Volume
count  51216.000000  51216.000000
mean       0.477980      0.103941
std        0.226943      0.084689
min        0.000000      0.000000
25%        0.316482      0.055945
50%        0.470360      0.078828
75%        0.623163      0.122293
max        1.000000      1.000000


Epoch 1/100


2025-11-29 16:25:36.309913: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.5086 - loss: 0.7957

2025-11-29 16:25:49.693198: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 93ms/step - accuracy: 0.4930 - loss: 0.7839 - val_accuracy: 0.5310 - val_loss: 0.6937 - learning_rate: 0.0010
Epoch 2/100
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 87ms/step - accuracy: 0.5047 - loss: 0.7163 - val_accuracy: 0.4934 - val_loss: 0.7069 - learning_rate: 0.0010
Epoch 3/100
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - accuracy: 0.5098 - loss: 0.7091 - val_accuracy: 0.4934 - val_loss: 0.7392 - learning_rate: 0.0010
Epoch 4/100
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 85ms/step - accuracy: 0.4850 - loss: 0.7118 - val_accuracy: 0.4953 - val_loss: 0.7465 - learning_rate: 0.0010
Epoch 5/100
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - accuracy: 0.4842 - loss: 0.7160
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 104ms/s

2025-11-29 16:28:53.298554: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


{'first_prediction_correct': np.True_, 'accuracy': 0.44594594594594594, 'f1_score': 0.0, 'precision': 0.0, 'recall': 0.0, 'auc_roc': 0.5367632508277893, 'confusion_matrix': [[297, 0], [369, 0]], 'last epoch num': 33}


In [19]:
ev_df = pd.DataFrame([ev])
print(tb.tabulate(ev_df, headers='keys', tablefmt='github'))

|    | first_prediction_correct   |   accuracy |   f1_score |   precision |   recall |   auc_roc | confusion_matrix     |   last epoch num |
|----|----------------------------|------------|------------|-------------|----------|-----------|----------------------|------------------|
|  0 | True                       |   0.445946 |          0 |           0 |        0 |  0.536763 | [[297, 0], [369, 0]] |               33 |
