### IMPORTS

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
root = os.environ.get("ROOT_PATH", ".")
os.chdir(root)
from datetime import datetime, timedelta
from data_preprocessing.fetch_data import fetch_ohlcv_to_df
from data_preprocessing.training_helpers import evaluate_token_strategy
from data_preprocessing.training_helpers import create_merged_feature
import tensorflow as tf
from data_preprocessing.training_helpers import create_all_features
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.utils import to_categorical
import warnings
from sklearn.decomposition import PCA
from scikeras.wrappers import KerasClassifier
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping

warnings.filterwarnings("ignore")

### CONSTATNTS

In [None]:
EXCHANGE = "binance"
TRAIN_START_DATE = "2024-07-01 00:00:00"
TRAIN_END_DATE = "2025-02-15 00:01:00"
SET_OVERLAPPING_DAYS = 20
VAL_START_DATE = (datetime.strptime(TRAIN_END_DATE, "%Y-%m-%d %H:%M:%S") - timedelta(days=SET_OVERLAPPING_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
VAL_END_DATE = "2025-03-15 00:01:00"
TEST_START_DATE = (datetime.strptime(VAL_END_DATE, "%Y-%m-%d %H:%M:%S") - timedelta(days=SET_OVERLAPPING_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
TEST_END_DATE = "2025-06-10 00:01:00"
ALL_START_DATE = "2024-10-20 00:00:00"
ALL_END_DATE = "2025-06-10 00:01:00"
LONG_PROBA = 0.6
SHORT_PROBA = 0.6
TAKE_PROFIT = 2
STOP_LOSS = 0.8
TAKE_PROFIT_PCT = TAKE_PROFIT/100
STOP_LOSS_PCT = STOP_LOSS / 100
CLASS_WEIGHT = { 0:0.3, 1: 1, 2:1 }

### LOAD DATA

In [None]:
df_train_trx = await fetch_ohlcv_to_df("TRX/USDT", TRAIN_START_DATE, TRAIN_END_DATE)
df_val_trx = await fetch_ohlcv_to_df("TRX/USDT", VAL_START_DATE, VAL_END_DATE)
df_test_trx = await fetch_ohlcv_to_df("TRX/USDT", TEST_START_DATE, TEST_END_DATE)

df_train_btc = await fetch_ohlcv_to_df("BTC/USDT", TRAIN_START_DATE, TRAIN_END_DATE)
df_val_btc = await fetch_ohlcv_to_df("BTC/USDT", VAL_START_DATE, VAL_END_DATE)
df_test_btc = await fetch_ohlcv_to_df("BTC/USDT", TEST_START_DATE, TEST_END_DATE)

### PREPROCESS DATA

In [None]:
train_data_btc = create_all_features(df=df_train_btc, token="btc", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
val_data_btc = create_all_features(df=df_val_btc, token="btc", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
test_data_btc = create_all_features(df=df_test_btc, token="btc", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)

train_data_trx = create_all_features(df=df_train_trx, token="trx", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
val_data_trx = create_all_features(df=df_val_trx, token="trx", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)
test_data_trx = create_all_features(df=df_test_trx, token="trx", target_type="pct", tp=2, sl=0.8, max_bars=60*6, atr_period=60*6)

In [None]:
train_data = create_merged_feature(
    main_feature_df=df_train_trx,
    complementry_data_df=df_train_btc,
    main_token_name="trx",
    complementry_token_name="btc",
    target_type="pct",
    tp=2,
    sl=0.8,
    max_bars=60 * 6,
    atr_period=60 * 6,
)

val_data = create_merged_feature(
    main_feature_df=df_val_trx,
    complementry_data_df=df_val_btc,
    main_token_name="trx",
    complementry_token_name="btc",
    target_type="pct",
    tp=2,
    sl=0.8,
    max_bars=60 * 6,
    atr_period=60 * 6,
)

test_data = create_merged_feature(
    main_feature_df=df_test_trx,
    complementry_data_df=df_test_btc,
    main_token_name="trx",
    complementry_token_name="btc",
    target_type="pct",
    tp=2,
    sl=0.8,
    max_bars=60 * 6,
    atr_period=60 * 6,
)

In [None]:
target_y = "y_combined_1h_2h_4h"
target_x = "X_combined_1h_2h_4h"

### DEFINE MODEL

In [None]:
def get_model(input_shape, num_classes=3):
    inputs = layers.Input(shape=input_shape, name="features")
    x = layers.BatchNormalization(name="batchnorm_input")(inputs)
    x = layers.Dense(
        128, activation="relu", kernel_regularizer=regularizers.l2(1e-4), name="dense_1"
    )(x)
    x = layers.Dropout(0.3, name="dropout_1")(x)
    x = layers.Dense(
        64, activation="relu", kernel_regularizer=regularizers.l2(1e-4), name="dense_2"
    )(x)
    x = layers.Dropout(0.3, name="dropout_2")(x)
    x = layers.Dense(
        32, activation="relu", kernel_regularizer=regularizers.l2(1e-4), name="dense_3"
    )(x)
    x = layers.Dropout(0.2, name="dropout_3")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="output")(x)
    model = models.Model(inputs=inputs, outputs=outputs, name="mlp_3class")
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.003),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

In [None]:
def create_model():
    return get_model(input_shape=(250,), num_classes=3)

clf = KerasClassifier(
    model=create_model,
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["precision"],
    epochs=50,
    batch_size=32,
    verbose=0,
)

nn_pipeline = Pipeline([
    ('pca', PCA(n_components=250)),
    ('clf', clf),
])

nn_trx = get_model(input_shape=(train_data_btc[target_x].shape[1],), num_classes=3)
nn_btc = get_model(input_shape=(train_data_btc[target_x].shape[1],), num_classes=3)

es = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)


### TRAIN MODEL ( EXPERIMENT )

In [None]:
nn_pipeline.fit(
    train_data[target_x],
    train_data[target_y],
)
nn_pipeline.score(val_data[target_x], val_data[target_y])

0.6661175178473366

In [None]:
X_train = train_data_btc[target_x].values
y_int = train_data_btc[target_y].values.reshape(-1)
y_ohe = to_categorical(y_int, num_classes=3)

nn_btc.fit(
    X_train,
    y_ohe,
    epochs=25,
    batch_size=32,
    verbose=0,
    callbacks=[es],
    class_weight=CLASS_WEIGHT,
)

y_int_val = val_data_btc[target_y].values.reshape(-1)
y_ohe_val = to_categorical(y_int_val, num_classes=3)
print(nn_btc.evaluate(val_data_btc[target_x], y_ohe_val))

y_int_test = test_data_btc[target_y].values.reshape(-1)
y_ohe_test = to_categorical(y_int_test, num_classes=3)
print(nn_btc.evaluate(test_data_btc[target_x], y_ohe_test))

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8742 - loss: 0.5659 


[1.091492772102356, 0.7924217581748962]

In [None]:
X_train = train_data_trx[target_x].values
y_int = train_data_trx[target_y].values.reshape(-1)
y_ohe = to_categorical(y_int, num_classes=3)

nn_trx.fit(
    X_train,
    y_ohe,
    epochs=25,
    batch_size=32,
    verbose=0,
    callbacks=[es],
    class_weight=CLASS_WEIGHT,
)

y_int_val = val_data_trx[target_y].values.reshape(-1)
y_ohe_val = to_categorical(y_int_val, num_classes=3)
print(nn_trx.evaluate(val_data_trx[target_x], y_ohe_val))

y_int_test = test_data_trx[target_y].values.reshape(-1)
y_ohe_test = to_categorical(y_int_test, num_classes=3)
print(nn_trx.evaluate(test_data_trx[target_x], y_ohe_test))

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7132 - loss: 0.8610 


[1.147325873374939, 0.6507413387298584]

### EVALUATION

##### GROUPED FEATURE MODEL

In [None]:
break_even_precision = 1/(1+(TAKE_PROFIT/STOP_LOSS))
print(f"MINIMUM PRECISION TO BREAK EVEN (BEFORE FEES) - {break_even_precision}")

MINIMUM PRECISION TO BREAK EVEN (BEFORE FEES) - 0.27586206896551724


In [None]:
position_val_backtest, vectorbt_val_backtest = evaluate_token_strategy(
    model=nn_pipeline,
    data=val_data,
    data_1m=df_val_trx,
    target_x=target_x,
    target_y=target_y,
    val_end_date=TRAIN_END_DATE,
    token_name="TRX",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = False
)

overall_winrate = position_val_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_val_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_val_backtest['metrics']['short']['trade_metrics']['win_rate']

print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_val_backtest.stats()

|   class |    BTC_precision       |
|---------|------------------------|
|       1 | 0.09 (33 | 208 | 1344) |
|       2 | 0.26 (38 | 269 | 1344) |
Prediction time - 0.05
Start Date => 2025-02-15 00:30:00+00:00, End Date => 2025-03-15 00:00:00+00:00

overall winrate => 0.19718309859154928
long winrate    => 0.12121212121212122
short winrate    => 0.2631578947368421



Start                         2025-02-15 00:01:00+00:00
End                           2025-03-15 00:00:00+00:00
Period                                 28 days 00:00:00
Start Value                                       100.0
End Value                                     99.767538
Total Return [%]                              -0.232462
Benchmark Return [%]                          -29.81089
Max Gross Exposure [%]                         1.020448
Total Fees Paid                                0.043976
Max Drawdown [%]                               0.233207
Max Drawdown Duration                  25 days 10:55:00
Total Trades                                         44
Total Closed Trades                                  44
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  15.909091
Best Trade [%]                                 2.020386
Worst Trade [%]                               -1

In [None]:
position_test_backtest, vectorbt_test_backtest = evaluate_token_strategy(
    model=nn_pipeline,
    data=test_data,
    data_1m=df_test_trx,
    target_x=target_x,
    target_y=target_y,
    val_end_date=VAL_END_DATE,
    token_name="TRX",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = False
)

overall_winrate = position_test_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_test_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_test_backtest['metrics']['short']['trade_metrics']['win_rate']
print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_test_backtest.stats()

|   class |    BTC_precision        |
|---------|-------------------------|
|       1 | 0.12 (52 | 633 | 4176)  |
|       2 | 0.10 (159 | 542 | 4176) |
Prediction time - 0.05
Start Date => 2025-03-15 00:30:00+00:00, End Date => 2025-06-10 00:00:00+00:00

overall winrate => 0.24170616113744076
long winrate    => 0.19230769230769232
short winrate    => 0.2578616352201258



Start                         2025-03-15 00:01:00+00:00
End                           2025-06-10 00:00:00+00:00
Period                                 87 days 00:00:00
Start Value                                       100.0
End Value                                     99.609413
Total Return [%]                              -0.390587
Benchmark Return [%]                           40.36924
Max Gross Exposure [%]                         1.022657
Total Fees Paid                                0.113104
Max Drawdown [%]                               0.433094
Max Drawdown Duration                  79 days 00:22:00
Total Trades                                        113
Total Closed Trades                                 113
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  22.123894
Best Trade [%]                                 2.545638
Worst Trade [%]                               -1

##### INDIVIDUAL FEATURE MODEL

In [None]:
position_val_backtest, vectorbt_val_backtest = evaluate_token_strategy(
    model=nn_trx,
    data=val_data_trx,
    data_1m=df_val_trx,
    target_x=target_x,
    target_y=target_y,
    val_end_date=TRAIN_END_DATE,
    token_name="TRX",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = True
)

overall_winrate = position_val_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_val_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_val_backtest['metrics']['short']['trade_metrics']['win_rate']

print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_val_backtest.stats()

[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
|   class |    BTC_precision       |
|---------|------------------------|
|       1 | 0.00 (3 | 208 | 1344)  |
|       2 | 0.33 (3 | 269 | 1344)  |
Prediction time - 0.00
Start Date => 2025-02-15 00:30:00+00:00, End Date => 2025-03-15 00:00:00+00:00
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 

overall winrate => 0.16666666666666666
long winrate    => 0.0
short winrate    => 0.3333333333333333



Start                         2025-02-15 00:01:00+00:00
End                           2025-03-15 00:00:00+00:00
Period                                 28 days 00:00:00
Start Value                                       100.0
End Value                                     99.979481
Total Return [%]                              -0.020519
Benchmark Return [%]                          -29.81089
Max Gross Exposure [%]                         1.004569
Total Fees Paid                                0.004989
Max Drawdown [%]                               0.028557
Max Drawdown Duration                  24 days 17:53:00
Total Trades                                          5
Total Closed Trades                                   5
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       20.0
Best Trade [%]                                 1.750352
Worst Trade [%]                               -1

In [None]:
position_test_backtest, vectorbt_test_backtest = evaluate_token_strategy(
    model=nn_trx,
    data=test_data_trx,
    data_1m=df_test_trx,
    target_x=target_x,
    target_y=target_y,
    val_end_date=VAL_END_DATE,
    token_name="TRX",
    classes=(1,2),
    long_proba=LONG_PROBA,
    short_proba=SHORT_PROBA,
    trade_size=1,
    use_proba=True,
    atr_target=False,
    tp = TAKE_PROFIT,
    sl = STOP_LOSS,
    atr_period = 60 * 5,
    tp_pct = TAKE_PROFIT_PCT,
    sl_pct = STOP_LOSS_PCT,
    nn = True
)

overall_winrate = position_test_backtest['metrics']['overall']['trade_metrics']['win_rate']
long_win_rate = position_test_backtest['metrics']['long']['trade_metrics']['win_rate']
short_win_rate = position_test_backtest['metrics']['short']['trade_metrics']['win_rate']
print(f"""
overall winrate => {overall_winrate}
long winrate    => {long_win_rate}
short winrate    => {short_win_rate}
""")

vectorbt_test_backtest.stats()

[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
|   class |    BTC_precision       |
|---------|------------------------|
|       1 | 0.25 (4 | 633 | 4176)  |
|       2 | 0.00 (11 | 542 | 4176) |
Prediction time - 0.00
Start Date => 2025-03-15 00:30:00+00:00, End Date => 2025-06-10 00:00:00+00:00
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  

overall winrate => 0.06666666666666667
long winrate    => 0.25
short winrate    => 0.0



Start                         2025-03-15 00:01:00+00:00
End                           2025-06-10 00:00:00+00:00
Period                                 87 days 00:00:00
Start Value                                       100.0
End Value                                      99.94216
Total Return [%]                               -0.05784
Benchmark Return [%]                           40.36924
Max Gross Exposure [%]                           1.0208
Total Fees Paid                                0.008018
Max Drawdown [%]                               0.061359
Max Drawdown Duration                  80 days 08:45:00
Total Trades                                          8
Total Closed Trades                                   8
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       12.5
Best Trade [%]                                 1.856659
Worst Trade [%]                               -1