In [None]:
import numpy as np
from jesse import helpers, research

_, trading_1m = research.get_candles(
    "Binance Spot",
    "BTC-USDT",
    "1m",
    helpers.date_to_timestamp("2020-01-01"),
    helpers.date_to_timestamp("2025-04-30"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

print(trading_1m.shape)

np.save("data/btc_1m.npy", trading_1m)

In [None]:
import numpy as np

candles = np.load("data/btc_1m.npy")
candles.shape


In [None]:
import pandas as pd
from jesse.utils import numpy_candles_to_dataframe

df = numpy_candles_to_dataframe(candles)
df["hlret"] = np.log(df["high"] / df["low"])
ret_list = []
for i in range(1, 150):
    ret_series = np.log(df["close"] / df["close"].shift(i))
    ret_series.name = f"ret{i}"
    ret_list.append(ret_series)
df = pd.concat([df, pd.concat(ret_list, axis=1)], axis=1)
del ret_list
df = df[df["ret149"].notna()]
df.head()


In [None]:
from scipy.stats import gaussian_kde, norm

kde = gaussian_kde(df["ret149"])
mu, sigma = df["ret149"].mean(), df["ret149"].std(ddof=0)


def f(t):
    return kde(t) - norm.pdf(t, loc=mu, scale=sigma)


y = [f(i) for i in df["ret149"].tolist()]
y

In [None]:
from gplearn.fitness import make_fitness
from gplearn.genetic import SymbolicRegressor
from scipy import stats

from custom_indicators.toolbox.bar.build import (
    build_bar_by_threshold_greater_than,
    build_bar_by_threshold_less_than,
)


def gp_kurtosis(y, y_pred, w):
    candles_in_metrics = candles[149:]
    assert len(candles_in_metrics) == len(y_pred), (
        f"{len(candles_in_metrics)} != {len(y_pred)}"
    )
    # sigmoid y_pred
    sigmoid_y_pred = 1 / (1 + np.exp(-y_pred))
    if all(sigmoid_y_pred > 0.5) or all(sigmoid_y_pred < 0.5):
        return 1000

    merged_bar_1 = build_bar_by_threshold_greater_than(
        candles_in_metrics,
        sigmoid_y_pred,
        0.5,
        reverse=False,
    )
    log_ret_1 = np.log(merged_bar_1[1:, 2] / merged_bar_1[:-1, 2])
    std_log_ret_1 = (log_ret_1 - log_ret_1.meam()) / log_ret_1.std()
    merged_bar_2 = build_bar_by_threshold_less_than(
        candles_in_metrics,
        sigmoid_y_pred,
        0.5,
        reverse=True,
    )
    log_ret_2 = np.log(merged_bar_2[1:, 2] / merged_bar_2[:-1, 2])
    std_log_ret_2 = (log_ret_2 - log_ret_2.meam()) / log_ret_2.std()

    if len(merged_bar_1) < 3000 or len(merged_bar_2) < 3000:
        return 1000

    kurtosis_1 = stats.kurtosis(std_log_ret_1)
    kurtosis_2 = stats.kurtosis(std_log_ret_2)
    return min(kurtosis_1, kurtosis_2)


custom_kurtosis_loss = make_fitness(function=gp_kurtosis, greater_is_better=False)

cols = [f"ret{i}" for i in range(1, 150)] + ["hlret", "volume"]

est_gp = SymbolicRegressor(
    metric=custom_kurtosis_loss,
    population_size=5000,
    generations=50,
    tournament_size=20,
    stopping_criteria=0,
    function_set=["add", "sub", "sqrt", "log", "abs", "neg", "max", "min"],
    p_crossover=0.7,
    p_subtree_mutation=0.15,
    p_hoist_mutation=0.01,
    p_point_mutation=0.1,
    max_samples=1,
    parsimony_coefficient="auto",
    feature_names=cols,
    verbose=1,
    n_jobs=-1,
)

est_gp.fit(df[cols], [1] * len(df))