In [1]:
import sys
import os

# Get the absolute path to the project directory
project_dir = os.path.abspath("..")

# Append the project directory to sys.path
if project_dir not in sys.path:
    sys.path.append(project_dir)
    
from src.predictionModule.TreeTimeML import TreeTimeML

import pandas as pd
import numpy as np
import polars as pl
import datetime
import logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(message)s'
)
logger = logging.getLogger(__name__)

In [None]:
params = {
    "daysAfterPrediction": 7,
    'timesteps': 8,
    
    'target_option': 'last',

    "TreeTime_isFiltered": True,
    "TreeTime_RSIExt_q":0.2,
    "TreeTime_FourierRSME_q": None,

    "TreeTime_lstm_units":8,
    "TreeTime_lstm_num_layers":1,
    "TreeTime_lstm_dropout":0.5,
    "TreeTime_lstm_recurrent_dropout":0.2,
    "TreeTime_lstm_learning_rate":0.1,
    "TreeTime_lstm_optimizer": "rmsprop",
    "TreeTime_lstm_bidirectional": True,
    "TreeTime_lstm_batch_size":64,
    "TreeTime_lstm_epochs":1,
    
    'TreeTime_lgb_num_boost_round': 1000,
    'TreeTime_lgb_lambda_l1': 0.5,
    'TreeTime_lgb_lambda_l2': 0.5,
    'TreeTime_lgb_feature_fraction': 0.6,
    'TreeTime_lgb_num_leaves': 280,
    'TreeTime_lgb_max_depth': 12,
    'TreeTime_lgb_learning_rate': 0.5,
    'TreeTime_lgb_min_data_in_leaf': 120,
    'TreeTime_lgb_min_gain_to_split': 0.1,
    'TreeTime_lgb_path_smooth': 0.6,
    'TreeTime_lgb_min_sum_hessian_in_leaf': 0.6,
    
    'TreeTime_MatchFeatures_minWeight': 0.1,
    'TreeTime_MatchFeatures_truncation': 2,
    
    'TreeTime_MatchFeatures_Pricediff': True,
    'TreeTime_MatchFeatures_FinData_quar': False,
    'TreeTime_MatchFeatures_FinData_metrics': False,
    'TreeTime_MatchFeatures_Fourier_RSME': False,
    'TreeTime_MatchFeatures_Fourier_Sign': False,
    'TreeTime_MatchFeatures_TA_trend': False,
    'TreeTime_MatchFeatures_FeatureGroup_VolGrLvl': False,
    'TreeTime_MatchFeatures_LSTM_Prediction': True,
    
    "TreeTime_top_highest": 10,
}

stock_group = "group_debug"

eval_date = datetime.date(year=2024, month=12, day=13)
start_train_date = datetime.date(year=2023, month=1, day=1)

treetimeML = TreeTimeML(
    train_start_date=start_train_date,
    test_dates=[eval_date],
    group=stock_group,
    params=params,
)

In [3]:
treetimeML.load_and_filter_sets(main_path = "../src/featureAlchemy/bin/")

In [4]:
assert treetimeML.train_Xtree.shape[0]   == treetimeML.train_ytree.shape[0], "Train Tree X and y shape mismatch"
assert treetimeML.train_Xtime.shape[0]   == treetimeML.train_ytime.shape[0], "Train Time X and y shape mismatch"
assert treetimeML.test_Xtree.shape[0]    == treetimeML.test_ytree.shape[0], "Test Tree X and y shape mismatch"
assert treetimeML.test_Xtime.shape[0]    == treetimeML.test_ytime.shape[0], "Test Time X and y shape mismatch"
assert treetimeML.meta_pl_train.shape[0] == treetimeML.train_Xtree.shape[0], "Meta Train and X Tree shape mismatch"
assert treetimeML.meta_pl_test.shape[0]  == treetimeML.test_Xtree.shape[0], "Meta Test and X Tree shape mismatch"
assert treetimeML.meta_pl_train.shape[0] == treetimeML.train_Xtime.shape[0], "Meta Train and X Time shape mismatch"
assert treetimeML.meta_pl_test.shape[0]  == treetimeML.test_Xtime.shape[0], "Meta Test and X Time shape mismatch"
assert treetimeML.meta_pl_train.shape[0] == treetimeML.train_ytree.shape[0], "Meta Train and y Tree shape mismatch"
assert treetimeML.meta_pl_test.shape[0]  == treetimeML.test_ytree.shape[0], "Meta Test and y Tree shape mismatch"
assert treetimeML.meta_pl_train.shape[0] == treetimeML.train_ytime.shape[0], "Meta Train and y Time shape mismatch"
assert treetimeML.meta_pl_test.shape[0]  == treetimeML.test_ytime.shape[0], "Meta Test and y Time shape mismatch"
assert len(treetimeML.featureTreeNames)  == treetimeML.train_Xtree.shape[1], "Feature Tree names and X Tree shape mismatch"
assert len(treetimeML.featureTimeNames)  == treetimeML.train_Xtime.shape[2], "Feature Time names and X Time shape mismatch"

In [5]:
assert np.all(treetimeML.train_Xtime>=0.0) & np.all(treetimeML.train_Xtime<=1.0), "Train X Time values out of range [0, 1]"

In [6]:
treetimeML.analyze()

Number of time features: 60
Overall Training Label Distribution:
  Label False: Count = 3990, Frequency = 0.69
  Label True: Count = 1834, Frequency = 0.31



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - loss: 0.2741 - mean_squared_error: 0.2741 - val_loss: 0.0337 - val_mean_squared_error: 0.0337


LSTM completed in 0:00:04.297485.


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 410ms/step


LSTM Prediction completed in 0:00:01.022442.
Weights established in 0:00:00.
Iteration 0: valid_0's rmse: 0.040808133352613185
Iteration 100: valid_0's rmse: 0.040808133352613185
LGB completed in 0:00:01.741861.
LGB Prediction completed in 0:00:00.025441.
Predicted Training Label Distribution:
  Label True: Count = 5824, Frequency = 1.00

Testing Masked Classification Metrics:

  Overall Accuracy: 1.00

  Metrics per Class:
    Class False:
      TPR: 1.00, FPR: 0.00, TNR: 0.00, FNR: 0.00

Testing Errors:
Mean Squared Error: 0.0527
Accuracy of top 10 to be over 5%: 0.00%
Mean value of top 10: 0.9686114192008972
Min value of top 10: 0.8792195320129395
Max value of top 10: 1.004068374633789
DataFrame:
shape: (10, 7)
+------------+--------+------------+-------------+--------------+--------------+--------------+
| date       | ticker | Close      | target_date | target_price | target_close | target_ratio |
| ---        | ---    | ---        | ---         | ---          | ---          | ---

(np.float32(0.9686114), np.float64(1.0088483485857327))

In [7]:
na = treetimeML.featureTreeNames