### IMPORTS

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
root = os.environ.get("ROOT_PATH", ".")
os.chdir(root)
from datetime import datetime, timedelta
from data_preprocessing.fetch_data import fetch_ohlcv_to_df
from data_preprocessing.training_helpers import evaluate_token_strategy
from data_preprocessing.training_helpers import create_merged_feature
import tensorflow as tf
from data_preprocessing.training_helpers import create_all_features
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.utils import to_categorical
import warnings
from sklearn.decomposition import PCA
from scikeras.wrappers import KerasClassifier
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping

warnings.filterwarnings("ignore")

2025-07-05 22:10:56.975259: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-05 22:10:56.979728: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-05 22:10:56.991216: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751733657.010984  633798 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751733657.016804  633798 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1751733657.031573  633798 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

### CONSTATNTS

In [None]:
EXCHANGE = "binance"
TRAIN_START_DATE = "2024-07-01 00:00:00"
TRAIN_END_DATE = "2025-02-15 00:01:00"
SET_OVERLAPPING_DAYS = 20
VAL_START_DATE = (datetime.strptime(TRAIN_END_DATE, "%Y-%m-%d %H:%M:%S") - timedelta(days=SET_OVERLAPPING_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
VAL_END_DATE = "2025-03-15 00:01:00"
TEST_START_DATE = (datetime.strptime(VAL_END_DATE, "%Y-%m-%d %H:%M:%S") - timedelta(days=SET_OVERLAPPING_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
TEST_END_DATE = "2025-06-10 00:01:00"
ALL_START_DATE = "2024-10-20 00:00:00"
ALL_END_DATE = "2025-06-10 00:01:00"
LONG_PROBA = 0.6
SHORT_PROBA = 0.6
TAKE_PROFIT = 2
STOP_LOSS = 0.8
TAKE_PROFIT_PCT = TAKE_PROFIT/100
STOP_LOSS_PCT = STOP_LOSS / 100
CLASS_WEIGHT = { 0:0.3, 1: 1, 2:1 }

### LOAD DATA

In [3]:
df_train_sol = await fetch_ohlcv_to_df("SOL/USDT", TRAIN_START_DATE, TRAIN_END_DATE)
df_val_sol = await fetch_ohlcv_to_df("SOL/USDT", VAL_START_DATE, VAL_END_DATE)
df_test_sol = await fetch_ohlcv_to_df("SOL/USDT", TEST_START_DATE, TEST_END_DATE)

df_train_btc = await fetch_ohlcv_to_df("BTC/USDT", TRAIN_START_DATE, TRAIN_END_DATE)
df_val_btc = await fetch_ohlcv_to_df("BTC/USDT", VAL_START_DATE, VAL_END_DATE)
df_test_btc = await fetch_ohlcv_to_df("BTC/USDT", TEST_START_DATE, TEST_END_DATE)

### PREPROCESS DATA

In [4]:
train_data_btc = create_all_features(df=df_train_btc, token="btc", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
val_data_btc = create_all_features(df=df_val_btc, token="btc", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
test_data_btc = create_all_features(df=df_test_btc, token="btc", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)

train_data_sol = create_all_features(df=df_train_sol, token="sol", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
val_data_sol = create_all_features(df=df_val_sol, token="sol", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
test_data_sol = create_all_features(df=df_test_sol, token="sol", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)

In [5]:
train_data = create_merged_feature(
    main_feature_df=df_train_sol,
    complementry_data_df=df_train_btc,
    main_token_name="sol",
    complementry_token_name="btc",
    target_type="pct",
    tp=2,
    sl=0.8,
    max_bars=60 * 6,
    atr_period=60 * 6,
)

val_data = create_merged_feature(
    main_feature_df=df_val_sol,
    complementry_data_df=df_val_btc,
    main_token_name="sol",
    complementry_token_name="btc",
    target_type="pct",
    tp=2,
    sl=0.8,
    max_bars=60 * 6,
    atr_period=60 * 6,
)

test_data = create_merged_feature(
    main_feature_df=df_test_sol,
    complementry_data_df=df_test_btc,
    main_token_name="sol",
    complementry_token_name="btc",
    target_type="pct",
    tp=2,
    sl=0.8,
    max_bars=60 * 6,
    atr_period=60 * 6,
)

In [6]:
target_y = "y_combined_1h_2h_4h"
target_x = "X_combined_1h_2h_4h"

### DEFINE MODEL

In [7]:
def get_model(input_shape, num_classes=3):
    inputs = layers.Input(shape=input_shape, name="features")
    x = layers.BatchNormalization(name="batchnorm_input")(inputs)
    x = layers.Dense(
        128, activation="relu", kernel_regularizer=regularizers.l2(1e-4), name="dense_1"
    )(x)
    x = layers.Dropout(0.3, name="dropout_1")(x)
    x = layers.Dense(
        64, activation="relu", kernel_regularizer=regularizers.l2(1e-4), name="dense_2"
    )(x)
    x = layers.Dropout(0.3, name="dropout_2")(x)
    x = layers.Dense(
        32, activation="relu", kernel_regularizer=regularizers.l2(1e-4), name="dense_3"
    )(x)
    x = layers.Dropout(0.2, name="dropout_3")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="output")(x)
    model = models.Model(inputs=inputs, outputs=outputs, name="mlp_3class")
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.003),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

In [None]:
def create_model():
    return get_model(input_shape=(250,), num_classes=3)

clf = KerasClassifier(
    model=create_model,
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["precision"],
    epochs=50,
    batch_size=32,
    verbose=0,
)

nn_pipeline = Pipeline([
    ('pca', PCA(n_components=250)),
    ('clf', clf),
])

nn_sol = get_model(input_shape=(train_data_btc[target_x].shape[1],), num_classes=3)
nn_btc = get_model(input_shape=(train_data_btc[target_x].shape[1],), num_classes=3)

es = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)


E0000 00:00:1751733836.817643  633798 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1751733836.818536  633798 gpu_device.cc:2341] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


### TRAIN MODEL ( EXPERIMENT )

In [None]:
nn_pipeline.fit(
    train_data[target_x],
    train_data[target_y],
)
nn_pipeline.score(val_data[target_x], val_data[target_y])

0.588545790446028

In [None]:
X_train = train_data_btc[target_x].values
y_int = train_data_btc[target_y].values.reshape(-1)
y_ohe = to_categorical(y_int, num_classes=3)

nn_btc.fit(
    X_train,
    y_ohe,
    epochs=25,
    batch_size=32,
    verbose=0,
    callbacks=[es],
    class_weight=CLASS_WEIGHT,
)

y_int_val = val_data_btc[target_y].values.reshape(-1)
y_ohe_val = to_categorical(y_int_val, num_classes=3)
print(nn_btc.evaluate(val_data_btc[target_x], y_ohe_val))

y_int_test = test_data_btc[target_y].values.reshape(-1)
y_ohe_test = to_categorical(y_int_test, num_classes=3)
print(nn_btc.evaluate(test_data_btc[target_x], y_ohe_test))

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8069 - loss: 0.8403


[1.240455985069275, 0.7635260224342346]

In [None]:
X_train = train_data_sol[target_x].values
y_int = train_data_sol[target_y].values.reshape(-1)
y_ohe = to_categorical(y_int, num_classes=3)

nn_sol.fit(
    X_train,
    y_ohe,
    epochs=25,
    batch_size=32,
    verbose=0,
    callbacks=[es],
    class_weight=CLASS_WEIGHT,
)

y_int_val = val_data_sol[target_y].values.reshape(-1)
y_ohe_val = to_categorical(y_int_val, num_classes=3)
print(nn_sol.evaluate(val_data_sol[target_x], y_ohe_val))

y_int_test = test_data_sol[target_y].values.reshape(-1)
y_ohe_test = to_categorical(y_int_test, num_classes=3)
print(nn_sol.evaluate(test_data_sol[target_x], y_ohe_test))

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6124 - loss: 1.2367


[1.5435606241226196, 0.5420955419540405]

### EVALUATION

##### GROUPED FEATURE MODEL

In [12]:
break_even_precision = 1/(1+(TAKE_PROFIT/STOP_LOSS))
print(f"MINIMUM PRECISION TO BREAK EVEN (BEFORE FEES) - {break_even_precision}")

MINIMUM PRECISION TO BREAK EVEN (BEFORE FEES) - 0.2857142857142857


In [13]:
position_val_backtest, vectorbt_val_backtest = evaluate_token_strategy(
    model=nn_pipeline,
    data=val_data,
    data_1m=df_val_sol,
    target_x=target_x,
    target_y=target_y,
    val_end_date=TRAIN_END_DATE,
    token_name="SOL",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = False
)

overall_winrate = position_val_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_val_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_val_backtest['metrics']['short']['trade_metrics']['win_rate']

print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_val_backtest.stats()

|   class |    SOL_precision        |
|---------|-------------------------|
|       1 | 0.24 (290 | 609 | 2832) |
|       2 | 0.28 (130 | 689 | 2832) |
Prediction time - 0.15
Start Date => 2025-02-15 00:30:00+00:00, End Date => 2025-04-15 00:00:00+00:00

overall winrate => 0.29523809523809524
long winrate    => 0.2689655172413793
short winrate    => 0.35384615384615387



Start                         2025-02-15 00:01:00+00:00
End                           2025-04-15 00:00:00+00:00
Period                                 59 days 00:00:00
Start Value                                       100.0
End Value                                     99.689043
Total Return [%]                              -0.310957
Benchmark Return [%]                         -34.827274
Max Gross Exposure [%]                         1.023055
Total Fees Paid                                 0.24985
Max Drawdown [%]                               0.513929
Max Drawdown Duration                  48 days 09:45:00
Total Trades                                        250
Total Closed Trades                                 250
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       31.2
Best Trade [%]                                 3.332987
Worst Trade [%]                               -2

In [14]:
position_test_backtest, vectorbt_test_backtest = evaluate_token_strategy(
    model=nn_pipeline,
    data=test_data,
    data_1m=df_test_sol,
    target_x=target_x,
    target_y=target_y,
    val_end_date=VAL_END_DATE,
    token_name="SOL",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = False
)

overall_winrate = position_test_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_test_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_test_backtest['metrics']['short']['trade_metrics']['win_rate']
print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_test_backtest.stats()

|   class |    SOL_precision        |
|---------|-------------------------|
|       1 | 0.21 (407 | 564 | 2688) |
|       2 | 0.15 (121 | 468 | 2688) |
Prediction time - 0.20
Start Date => 2025-04-15 00:30:00+00:00, End Date => 2025-06-10 00:00:00+00:00

overall winrate => 0.2727272727272727
long winrate    => 0.28746928746928746
short winrate    => 0.2231404958677686



Start                         2025-04-15 00:01:00+00:00
End                           2025-06-10 00:00:00+00:00
Period                                 56 days 00:00:00
Start Value                                       100.0
End Value                                     99.819329
Total Return [%]                              -0.180671
Benchmark Return [%]                          24.599799
Max Gross Exposure [%]                         1.020223
Total Fees Paid                                0.219114
Max Drawdown [%]                                0.30369
Max Drawdown Duration                  28 days 12:07:00
Total Trades                                        219
Total Closed Trades                                 219
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  35.616438
Best Trade [%]                                 2.676052
Worst Trade [%]                               -1

##### INDIVIDUAL FEATURE MODEL

In [15]:
position_val_backtest, vectorbt_val_backtest = evaluate_token_strategy(
    model=nn_sol,
    data=val_data_sol,
    data_1m=df_val_sol,
    target_x=target_x,
    target_y=target_y,
    val_end_date=TRAIN_END_DATE,
    token_name="SOL",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = True
)

overall_winrate = position_val_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_val_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_val_backtest['metrics']['short']['trade_metrics']['win_rate']

print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_val_backtest.stats()

[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
|   class |    SOL_precision        |
|---------|-------------------------|
|       1 | 0.15 (108 | 609 | 2832) |
|       2 | 0.16 (32 | 689 | 2832)  |
Prediction time - 0.05
Start Date => 2025-02-15 00:30:00+00:00, End Date => 2025-04-15 00:00:00+00:00
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  

overall winrate => 0.2028985507246377
long winrate    => 0.20754716981132076
short winrate    => 0.1875



Start                         2025-02-15 00:01:00+00:00
End                           2025-04-15 00:00:00+00:00
Period                                 59 days 00:00:00
Start Value                                       100.0
End Value                                     99.810922
Total Return [%]                              -0.189078
Benchmark Return [%]                         -34.827274
Max Gross Exposure [%]                         1.021277
Total Fees Paid                                0.085483
Max Drawdown [%]                               0.217386
Max Drawdown Duration                  54 days 21:31:00
Total Trades                                         86
Total Closed Trades                                  85
Total Open Trades                                     1
Open Trade PnL                                -0.000082
Win Rate [%]                                  27.058824
Best Trade [%]                                 2.167312
Worst Trade [%]                               -1

In [16]:
position_test_backtest, vectorbt_test_backtest = evaluate_token_strategy(
    model=nn_sol,
    data=test_data_sol,
    data_1m=df_test_sol,
    target_x=target_x,
    target_y=target_y,
    val_end_date=VAL_END_DATE,
    token_name="SOL",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = True
)

overall_winrate = position_test_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_test_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_test_backtest['metrics']['short']['trade_metrics']['win_rate']
print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_test_backtest.stats()

[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
|   class |    SOL_precision        |
|---------|-------------------------|
|       1 | 0.23 (115 | 564 | 2688) |
|       2 | 0.11 (123 | 468 | 2688) |
Prediction time - 0.09
Start Date => 2025-04-15 00:30:00+00:00, End Date => 2025-06-10 00:00:00+00:00
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

overall winrate => 0.2773109243697479
long winrate    => 0.3130434782608696
short winrate    => 0.24390243902439024



Start                         2025-04-15 00:01:00+00:00
End                           2025-06-10 00:00:00+00:00
Period                                 56 days 00:00:00
Start Value                                       100.0
End Value                                     99.729503
Total Return [%]                              -0.270497
Benchmark Return [%]                          24.599799
Max Gross Exposure [%]                         1.022727
Total Fees Paid                                0.109106
Max Drawdown [%]                               0.323153
Max Drawdown Duration                  55 days 14:25:00
Total Trades                                        109
Total Closed Trades                                 109
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  24.770642
Best Trade [%]                                 2.629401
Worst Trade [%]                               -1