In [1]:
import numpy as np
from jesse import helpers, research

_, candles = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "1m",
    helpers.date_to_timestamp("2020-01-01"),
    helpers.date_to_timestamp("2025-07-01"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

print(candles.shape)

np.save("data/btc_1m.npy", candles)

(2891520, 6)


In [1]:
import numpy as np

candles = np.load("data/btc_1m.npy")
candles = candles[candles[:, 5] > 0]
print(candles.shape)

(2891336, 6)


In [2]:
import pandas as pd
from jesse.utils import numpy_candles_to_dataframe
from joblib import Parallel, delayed

from custom_indicators.toolbox.entropy.apen_sampen import sample_entropy_numba
from custom_indicators.utils.math_tools import log_ret_from_candles

df = numpy_candles_to_dataframe(candles)

df_feat_and_label = []

# label - 这里的y不重要，因为损失函数中不使用y
label = pd.Series(
    np.zeros(len(df)), index=df.index, name="label"  # 使用零向量作为占位符
)
df_feat_and_label.append(label)

# high low range
hl_range = pd.Series(np.log(df["high"] / df["low"]), index=df.index, name="hlr")
df_feat_and_label.append(hl_range)

RANGE = [60, 120, 240]

# log return
for i in RANGE:
    series = pd.Series(
        np.log(df["close"] / df["close"].shift(i)), index=df.index, name=f"r{i}"
    )
    df_feat_and_label.append(series)

# volume change
for i in RANGE:
    series = pd.Series(
        np.log(df["volume"] / df["volume"].shift(i)), index=df.index, name=f"vol{i}"
    )
    df_feat_and_label.append(series)

# entropy
for i in RANGE:
    log_ret_list = log_ret_from_candles(candles, [i] * len(candles))
    entropy_array = list(
        Parallel(n_jobs=-1)(delayed(sample_entropy_numba)(i) for i in log_ret_list)
    )
    len_gap = len(df) - len(entropy_array)
    entropy_array = [np.nan] * len_gap + entropy_array
    entropy_series = pd.Series(entropy_array, index=df.index, name=f"r{i}_en")
    df_feat_and_label.append(entropy_series)

df_feat_and_label = pd.concat(df_feat_and_label, axis=1)
print(df_feat_and_label.shape)
df_feat_and_label

(2891336, 11)


Unnamed: 0,label,hlr,r60,r120,r240,vol60,vol120,vol240,r60_en,r120_en,r240_en
2020-01-01 00:00:00,0.0,0.001882,,,,,,,,,
2020-01-01 00:01:00,0.0,0.000514,,,,,,,,,
2020-01-01 00:02:00,0.0,0.000524,,,,,,,,,
2020-01-01 00:03:00,0.0,0.000780,,,,,,,,,
2020-01-01 00:04:00,0.0,0.000858,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
2025-06-30 23:55:00,0.0,0.000169,9.342856e-07,-0.000922,-0.004325,-0.639182,-1.633861,-2.590498,0.281723,0.118130,0.041178
2025-06-30 23:56:00,0.0,0.000159,-1.588411e-04,0.000836,-0.004484,-0.248488,-2.339388,-0.703709,0.280828,0.116760,0.040295
2025-06-30 23:57:00,0.0,0.000211,-1.289294e-04,0.000287,-0.004803,2.343435,-1.407833,0.470337,0.303329,0.166472,0.037730
2025-06-30 23:58:00,0.0,0.000551,2.773823e-04,-0.000255,-0.005565,0.108565,0.273464,-1.633023,0.308496,0.176016,0.037504


In [3]:
DATA_PATH = "/Users/yangqiuyu/Github/jesse-trade/data/btc_1m.npy"

with open("data/pysr_loss.jl", "r") as f:
    custom_julia_loss = f.read().replace("btc_1m.npy", DATA_PATH)

print(custom_julia_loss)

using NPZ
using Statistics

function kurtosis_loss(tree, dataset::Dataset{T,L}, options) where {T,L}
    raw_candles = npzread("/Users/yangqiuyu/Github/jesse-trade/data/btc_1m.npy")
    raw_candles = raw_candles[raw_candles[:, 6].>0, :]

    # build bar function
    function build_bar_by_cumsum(candles, condition, threshold)
        n = size(candles, 1)
        @assert n>0 "no candles"
        bars = zeros(Float64, n, 6)
        bar_index = 1

        # 初始化第一个bar
        bar_timestamp = candles[1, 1]
        bar_open = candles[1, 2]
        bar_close = candles[1, 3]
        bar_high = candles[1, 4]
        bar_low = candles[1, 5]
        bar_volume = candles[1, 6]
        bar_cumsum = condition[1]

        for i in 2:n
            if bar_cumsum <= threshold
                bar_cumsum += condition[i]
                bar_timestamp = max(bar_timestamp, candles[i, 1])
                bar_volume += candles[i, 6]
                bar_high = max(bar_high, candles[i, 4])
                bar_low

In [4]:
from pysr import PySRRegressor

SCALE_FACTOR = 1.0

model = PySRRegressor(
    early_stop_condition="f(loss, complexity) = (loss < 2.5) && (complexity < 10)",
    annealing=True,
    alpha=3.17,
    # 种群设置
    populations=31,
    population_size=650,
    # 迁移设置（岛屿间交流）
    migration=True,  # 开启迁移
    fraction_replaced=0.00036,  # 默认迁移率
    fraction_replaced_hof=0.0614,  # 从名人堂迁移
    topn=12,  # 每次迁移最好的12个个体
    niterations=30,
    ncycles_per_iteration=1000,
    # 算子
    binary_operators=["+", "-", "max", "min"],
    unary_operators=["abs", "neg"],
    # 损失函数设置
    loss_function=custom_julia_loss,
    # 复杂度控制
    parsimony=0.009,
    maxsize=50,
    # 突变权重
    crossover_probability=0.7,
    weight_add_node=2.47 * SCALE_FACTOR,
    weight_insert_node=0.0112 * SCALE_FACTOR,
    weight_delete_node=0.870 * SCALE_FACTOR,
    weight_do_nothing=0.273 / SCALE_FACTOR,  # 减少"不做任何事"
    weight_mutate_constant=0.0346 * SCALE_FACTOR,
    weight_mutate_operator=0.293 * SCALE_FACTOR,
    weight_swap_operands=0.198 * SCALE_FACTOR,
    weight_rotate_tree=4.26 * SCALE_FACTOR,
    weight_randomize=0.000502 * SCALE_FACTOR,
    weight_simplify=0.00209 * SCALE_FACTOR,
    weight_optimize=0.001,
    # 选择
    tournament_selection_n=50,
    tournament_selection_p=0.95,
    # 优化
    should_optimize_constants=True,
    optimizer_algorithm="BFGS",
    optimize_probability=0.14,
)

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [None]:
na_max = df_feat_and_label.isna().sum().max()
df_feat_and_label = df_feat_and_label.iloc[na_max:]

X = df_feat_and_label.drop(columns="label")
y = df_feat_and_label["label"]
model.fit(X, y)

Compiling Julia backend...
