### IMPORTS

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
root = os.environ.get("ROOT_PATH", ".")
os.chdir(root)
from datetime import datetime, timedelta
from data_preprocessing.fetch_data import fetch_ohlcv_to_df
from data_preprocessing.training_helpers import evaluate_token_strategy
from data_preprocessing.training_helpers import create_merged_feature
import tensorflow as tf
from data_preprocessing.training_helpers import create_all_features
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.utils import to_categorical
import warnings
from sklearn.decomposition import PCA
from scikeras.wrappers import KerasClassifier
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping

warnings.filterwarnings("ignore")

2025-07-05 22:45:14.134133: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-05 22:45:14.138548: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-05 22:45:14.152396: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751735714.174533  689860 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751735714.180779  689860 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1751735714.198685  689860 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

### CONSTATNTS

In [2]:
EXCHANGE = "binance"
TRAIN_START_DATE = "2024-07-01 00:00:00"
TRAIN_END_DATE = "2025-02-15 00:01:00"
SET_OVERLAPPING_DAYS = 20
VAL_START_DATE = (datetime.strptime(TRAIN_END_DATE, "%Y-%m-%d %H:%M:%S") - timedelta(days=SET_OVERLAPPING_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
VAL_END_DATE = "2025-03-15 00:01:00"
TEST_START_DATE = (datetime.strptime(VAL_END_DATE, "%Y-%m-%d %H:%M:%S") - timedelta(days=SET_OVERLAPPING_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
TEST_END_DATE = "2025-06-10 00:01:00"
ALL_START_DATE = "2024-10-20 00:00:00"
ALL_END_DATE = "2025-06-10 00:01:00"
LONG_PROBA = 0.6
SHORT_PROBA = 0.6
TAKE_PROFIT = 2
STOP_LOSS = 0.8
TAKE_PROFIT_PCT = TAKE_PROFIT/100
STOP_LOSS_PCT = STOP_LOSS / 100
CLASS_WEIGHT = { 0:0.3, 1: 1, 2:1 }

### LOAD DATA

In [3]:
df_train_ada = await fetch_ohlcv_to_df("ADA/USDT", TRAIN_START_DATE, TRAIN_END_DATE)
df_val_ada = await fetch_ohlcv_to_df("ADA/USDT", VAL_START_DATE, VAL_END_DATE)
df_test_ada = await fetch_ohlcv_to_df("ADA/USDT", TEST_START_DATE, TEST_END_DATE)

df_train_btc = await fetch_ohlcv_to_df("BTC/USDT", TRAIN_START_DATE, TRAIN_END_DATE)
df_val_btc = await fetch_ohlcv_to_df("BTC/USDT", VAL_START_DATE, VAL_END_DATE)
df_test_btc = await fetch_ohlcv_to_df("BTC/USDT", TEST_START_DATE, TEST_END_DATE)

### PREPROCESS DATA

In [4]:
train_data_btc = create_all_features(df=df_train_btc, token="btc", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
val_data_btc = create_all_features(df=df_val_btc, token="btc", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
test_data_btc = create_all_features(df=df_test_btc, token="btc", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)

train_data_ada = create_all_features(df=df_train_ada, token="ada", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
val_data_ada = create_all_features(df=df_val_ada, token="ada", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
test_data_ada = create_all_features(df=df_test_ada, token="ada", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)

In [5]:
train_data = create_merged_feature(
    main_feature_df=df_train_ada,
    complementry_data_df=df_train_btc,
    main_token_name="ada",
    complementry_token_name="btc",
    target_type="pct",
    tp=2,
    sl=0.8,
    max_bars=60 * 6,
    atr_period=60 * 6,
)

val_data = create_merged_feature(
    main_feature_df=df_val_ada,
    complementry_data_df=df_val_btc,
    main_token_name="ada",
    complementry_token_name="btc",
    target_type="pct",
    tp=2,
    sl=0.8,
    max_bars=60 * 6,
    atr_period=60 * 6,
)

test_data = create_merged_feature(
    main_feature_df=df_test_ada,
    complementry_data_df=df_test_btc,
    main_token_name="ada",
    complementry_token_name="btc",
    target_type="pct",
    tp=2,
    sl=0.8,
    max_bars=60 * 6,
    atr_period=60 * 6,
)

In [6]:
target_y = "y_combined_1h_2h_4h"
target_x = "X_combined_1h_2h_4h"

### DEFINE MODEL

In [7]:
def get_model(input_shape, num_classes=3):
    inputs = layers.Input(shape=input_shape, name="features")
    x = layers.BatchNormalization(name="batchnorm_input")(inputs)
    x = layers.Dense(
        128, activation="relu", kernel_regularizer=regularizers.l2(1e-4), name="dense_1"
    )(x)
    x = layers.Dropout(0.3, name="dropout_1")(x)
    x = layers.Dense(
        64, activation="relu", kernel_regularizer=regularizers.l2(1e-4), name="dense_2"
    )(x)
    x = layers.Dropout(0.3, name="dropout_2")(x)
    x = layers.Dense(
        32, activation="relu", kernel_regularizer=regularizers.l2(1e-4), name="dense_3"
    )(x)
    x = layers.Dropout(0.2, name="dropout_3")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="output")(x)
    model = models.Model(inputs=inputs, outputs=outputs, name="mlp_3class")
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.003),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

In [8]:
def create_model():
    return get_model(input_shape=(250,), num_classes=3)

clf = KerasClassifier(
    model=create_model,
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["precision"],
    epochs=50,
    batch_size=32,
    verbose=0,
)

nn_pipeline = Pipeline([
    ('pca', PCA(n_components=250)),
    ('clf', clf),
])

nn_ada = get_model(input_shape=(train_data_btc[target_x].shape[1],), num_classes=3)
nn_btc = get_model(input_shape=(train_data_btc[target_x].shape[1],), num_classes=3)

es = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)


E0000 00:00:1751735788.558026  689860 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1751735788.559176  689860 gpu_device.cc:2341] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


### TRAIN MODEL ( EXPERIMENT )

In [9]:
nn_pipeline.fit(
    train_data[target_x],
    train_data[target_y],
    callbacks=[es],
    class_weight=CLASS_WEIGHT,
)
nn_pipeline.score(val_data[target_x], val_data[target_y])

ValueError: Pipeline.fit does not accept the callbacks parameter. You can pass parameters to specific steps of your pipeline using the stepname__parameter format, e.g. `Pipeline.fit(X, y, logisticregression__sample_weight=sample_weight)`.

In [None]:
X_train = train_data_btc[target_x].values
y_int = train_data_btc[target_y].values.reshape(-1)
y_ohe = to_categorical(y_int, num_classes=3)

nn_btc.fit(
    X_train,
    y_ohe,
    epochs=25,
    batch_size=32,
    verbose=0,
    callbacks=[es],
    class_weight=CLASS_WEIGHT,
)

y_int_val = val_data_btc[target_y].values.reshape(-1)
y_ohe_val = to_categorical(y_int_val, num_classes=3)
nn_btc.evaluate(val_data_btc[target_x], y_ohe_val)

y_int_test = test_data_btc[target_y].values.reshape(-1)
y_ohe_test = to_categorical(y_int_test, num_classes=3)
print(nn_btc.evaluate(val_data_btc[target_x], y_ohe_test))

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8015 - loss: 0.8369


[1.2032915353775024, 0.7593032717704773]

In [None]:
X_train = train_data_ada[target_x].values
y_int = train_data_ada[target_y].values.reshape(-1)
y_ohe = to_categorical(y_int, num_classes=3)

nn_ada.fit(
    X_train,
    y_ohe,
    epochs=25,
    batch_size=32,
    verbose=0,
    callbacks=[es],
    class_weight=CLASS_WEIGHT,
)

y_int_val = val_data_ada[target_y].values.reshape(-1)
y_ohe_val = to_categorical(y_int_val, num_classes=3)
nn_ada.evaluate(val_data_ada[target_x], y_ohe_val)

y_int_test = test_data_ada[target_y].values.reshape(-1)
y_ohe_test = to_categorical(y_int_test, num_classes=3)
print(nn_ada.evaluate(val_data_ada[target_x], y_ohe_test))

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5686 - loss: 1.0928


[1.3256561756134033, 0.5038268566131592]

### EVALUATION

##### GROUPED FEATURE MODEL

In [None]:
break_even_precision = 1/(1+(TAKE_PROFIT/STOP_LOSS))
print(f"MINIMUM PRECISION TO BREAK EVEN (BEFORE FEES) - {break_even_precision}")

MINIMUM PRECISION TO BREAK EVEN (BEFORE FEES) - 0.2857142857142857


In [None]:
position_val_backtest, vectorbt_val_backtest = evaluate_token_strategy(
    model=nn_pipeline,
    data=val_data,
    data_1m=df_val_ada,
    target_x=target_x,
    target_y=target_y,
    val_end_date=TRAIN_END_DATE,
    token_name="ADA",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = False
)

overall_winrate = position_val_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_val_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_val_backtest['metrics']['short']['trade_metrics']['win_rate']

print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_val_backtest.stats()

|   class |    ADA_precision        |
|---------|-------------------------|
|       1 | 0.25 (570 | 637 | 2832) |
|       2 | 0.22 (265 | 681 | 2832) |
Prediction time - 0.29
Start Date => 2025-02-15 00:30:00+00:00, End Date => 2025-04-15 00:00:00+00:00

overall winrate => 0.28811524609843936
long winrate    => 0.29525483304042177
short winrate    => 0.2727272727272727



Start                         2025-02-15 00:01:00+00:00
End                           2025-04-15 00:00:00+00:00
Period                                 59 days 00:00:00
Start Value                                       100.0
End Value                                     99.094886
Total Return [%]                              -0.905114
Benchmark Return [%]                         -20.308387
Max Gross Exposure [%]                         1.027346
Total Fees Paid                                0.458459
Max Drawdown [%]                               0.923145
Max Drawdown Duration                  58 days 15:26:00
Total Trades                                        459
Total Closed Trades                                 458
Total Open Trades                                     1
Open Trade PnL                                  -0.0007
Win Rate [%]                                  30.131004
Best Trade [%]                                 2.430139
Worst Trade [%]                               -2

In [None]:
position_test_backtest, vectorbt_test_backtest = evaluate_token_strategy(
    model=nn_pipeline,
    data=test_data,
    data_1m=df_test_ada,
    target_x=target_x,
    target_y=target_y,
    val_end_date=VAL_END_DATE,
    token_name="ADA",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = False
)

overall_winrate = position_test_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_test_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_test_backtest['metrics']['short']['trade_metrics']['win_rate']
print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_test_backtest.stats()

|   class |    ADA_precision        |
|---------|-------------------------|
|       1 | 0.24 (396 | 586 | 2688) |
|       2 | 0.20 (556 | 533 | 2688) |
Prediction time - 0.35
Start Date => 2025-04-15 00:30:00+00:00, End Date => 2025-06-10 00:00:00+00:00

overall winrate => 0.3280757097791798
long winrate    => 0.3484848484848485
short winrate    => 0.31351351351351353



Start                         2025-04-15 00:01:00+00:00
End                           2025-06-10 00:00:00+00:00
Period                                 56 days 00:00:00
Start Value                                       100.0
End Value                                     99.700923
Total Return [%]                              -0.299077
Benchmark Return [%]                          11.062156
Max Gross Exposure [%]                         1.021882
Total Fees Paid                                0.415562
Max Drawdown [%]                               0.434053
Max Drawdown Duration                  33 days 02:28:00
Total Trades                                        416
Total Closed Trades                                 415
Total Open Trades                                     1
Open Trade PnL                                -0.003827
Win Rate [%]                                  40.240964
Best Trade [%]                                 2.160799
Worst Trade [%]                               -1

##### INDIVIDUAL FEATURE MODEL

In [None]:
position_val_backtest, vectorbt_val_backtest = evaluate_token_strategy(
    model=nn_ada,
    data=val_data_ada,
    data_1m=df_val_ada,
    target_x=target_x,
    target_y=target_y,
    val_end_date=TRAIN_END_DATE,
    token_name="ADA",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = True
)

overall_winrate = position_val_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_val_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_val_backtest['metrics']['short']['trade_metrics']['win_rate']

print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_val_backtest.stats()

[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
|   class |    ADA_precision        |
|---------|-------------------------|
|       1 | 0.24 (161 | 637 | 2832) |
|       2 | 0.18 (363 | 681 | 2832) |
Prediction time - 0.19
Start Date => 2025-02-15 00:30:00+00:00, End Date => 2025-04-15 00:00:00+00:00
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  

overall winrate => 0.26145038167938933
long winrate    => 0.2857142857142857
short winrate    => 0.25068870523415976



Start                         2025-02-15 00:01:00+00:00
End                           2025-04-15 00:00:00+00:00
Period                                 59 days 00:00:00
Start Value                                       100.0
End Value                                     99.656176
Total Return [%]                              -0.343824
Benchmark Return [%]                         -20.308387
Max Gross Exposure [%]                         1.022845
Total Fees Paid                                0.204952
Max Drawdown [%]                                0.44742
Max Drawdown Duration                  54 days 15:00:00
Total Trades                                        205
Total Closed Trades                                 205
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  30.243902
Best Trade [%]                                 2.216323
Worst Trade [%]                               -1

In [None]:
position_test_backtest, vectorbt_test_backtest = evaluate_token_strategy(
    model=nn_ada,
    data=test_data_ada,
    data_1m=df_test_ada,
    target_x=target_x,
    target_y=target_y,
    val_end_date=VAL_END_DATE,
    token_name="ADA",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = True
)

overall_winrate = position_test_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_test_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_test_backtest['metrics']['short']['trade_metrics']['win_rate']
print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_test_backtest.stats()

[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
|   class |    ADA_precision        |
|---------|-------------------------|
|       1 | 0.28 (141 | 586 | 2688) |
|       2 | 0.22 (412 | 533 | 2688) |
Prediction time - 0.21
Start Date => 2025-04-15 00:30:00+00:00, End Date => 2025-06-10 00:00:00+00:00
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

overall winrate => 0.321880650994575
long winrate    => 0.3404255319148936
short winrate    => 0.3155339805825243



Start                         2025-04-15 00:01:00+00:00
End                           2025-06-10 00:00:00+00:00
Period                                 56 days 00:00:00
Start Value                                       100.0
End Value                                     99.913934
Total Return [%]                              -0.086066
Benchmark Return [%]                          11.062156
Max Gross Exposure [%]                         1.019869
Total Fees Paid                                0.197955
Max Drawdown [%]                               0.186897
Max Drawdown Duration                  25 days 00:36:00
Total Trades                                        198
Total Closed Trades                                 198
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  33.333333
Best Trade [%]                                 2.958968
Worst Trade [%]                               -1