In [1]:
import numpy as np
from jesse.helpers import timestamp_to_time

candles = np.load("data/btc_1m.npy")
print(timestamp_to_time(candles[0, 0]))
candles.shape

2024-01-01T00:00:00+00:00


(807840, 6)

In [2]:
import pandas as pd
from jesse.utils import numpy_candles_to_dataframe
from joblib import Parallel, delayed

from custom_indicators.toolbox.entropy.apen_sampen import sample_entropy_numba
from custom_indicators.utils.math_tools import log_ret_from_candles

# label and features
candles = candles[candles[:, 5] > 0]

df = numpy_candles_to_dataframe(candles)

feature_and_label = []

# label
label = np.log(df["close"].shift(-1) / df["close"]).fillna(0)
label.name = "label"
feature_and_label.append(label)

# high low range
hl_range = np.log(df["high"] / df["low"])
hl_range.name = "hl_range"
feature_and_label.append(hl_range)

# price position
price_pos = (df["close"] - df["low"]) / (df["high"] - df["low"] + 1e-10)
price_pos.name = "price_position"
feature_and_label.append(price_pos)

RANGE = [30, 60, 120]

# log return
for i in RANGE:
    series = np.log(df["close"] / df["close"].shift(i))
    series.name = f"r{i}"
    feature_and_label.append(series)

# volume features
for i in RANGE:
    vol_series = np.log(df["volume"] / df["volume"].shift(i))
    vol_series.name = f"vol{i}"
    feature_and_label.append(vol_series)

# sample entropy
for i in RANGE:
    log_ret_list = log_ret_from_candles(candles, [i] * len(candles))
    entropy_array = Parallel(n_jobs=-1)(
        delayed(sample_entropy_numba)(i) for i in log_ret_list
    )
    len_gap = len(df) - len(entropy_array)

    entropy_array = [np.nan] * len_gap + entropy_array
    entropy_series = pd.Series(entropy_array, index=df.index) / 10
    entropy_series.name = f"r{i}_en"

df_features_and_label = pd.concat(feature_and_label, axis=1)

NA_MAX_NUM = df_features_and_label.isna().sum().max()
df_features_and_label = df_features_and_label.iloc[NA_MAX_NUM:]

cols = [col for col in df_features_and_label.columns if col != "label"]
X = df_features_and_label[cols].values.astype(np.float32)

In [3]:
from bar_research.symbolic_regression_deap_advanced import (
    AdvancedSymbolicRegressionDEAP,
)

sr_model = AdvancedSymbolicRegressionDEAP(
    population_size=10000,
    generations=100,
    tournament_size=7,
    crossover_prob=0.85,
    mutation_prob=0.15,
    max_depth=17,
    init_depth=(2, 8),
    elite_size=20,
    n_islands=10,
    migration_rate=0.05,
    local_search_prob=0.05,
    adaptive_mutation=True,
    n_jobs=-1,
)

sr_model.fit(X, cols, NA_MAX_NUM)

岛屿权重分配: [(-1.0, -0.1), (-1.0, -0.1), (-1.0, -0.1), (-1.0, -1.0), (-1.0, -1.0), (-1.0, -1.0), (-1.0, -1.0), (-0.5, -1.0), (-0.5, -1.0), (-0.5, -1.0)]
开始多目标符号回归进化...
种群大小: 10000, 进化代数: 100
岛屿数量: 10, 迁移率: 0.05
使用多进程加速: 10 个进程
Generation 0/100 completed
Generation 20: 清理缓存完成
Generation 20/100 completed
Generation 40: 清理缓存完成
Generation 40/100 completed
Generation 60: 清理缓存完成
Generation 60/100 completed
Generation 80: 清理缓存完成
Generation 80/100 completed
注意：去重后只有 13 个唯一表达式（期望 20 个）

进化完成! Pareto前沿大小: 9950
最佳峰度偏差: 2.993345
对应复杂度: 21
缓存命中率: 0.00%


In [4]:
num_indiv = 0
for lst in sr_model.best_individuals:
    num_indiv += len(lst)
num_indiv

91

In [5]:
equtions = set()

for i in sr_model.get_best_expressions(50):
    if i["expression"] not in equtions:
        print(i)
        equtions.add(i["expression"])


{'rank': 1, 'expression': 'min(sub(vol60, min(price_position, vol60)), min(min(price_position, abs(r60)), abs(r120)))', 'kurtosis_deviation': 2.9933453408610666, 'complexity': 21.0, 'height': 4, 'size': 13, 'num_bars': 6488, 'actual_kurtosis': 2.9933453408610675, 'skewness': 0.014284077952855688, 'sharpe_ratio': 0.045430378731937934}
{'rank': 2, 'expression': 'min(sub(vol60, min(price_position, vol60)), min(price_position, min(abs(r60), abs(r120))))', 'kurtosis_deviation': 2.9933453408610666, 'complexity': 21.0, 'height': 4, 'size': 13, 'num_bars': 6488, 'actual_kurtosis': 2.9933453408610675, 'skewness': 0.014284077952855688, 'sharpe_ratio': 0.045430378731937934}
{'rank': 3, 'expression': 'min(sub(vol60, min(0.2037783268893829, vol60)), min(price_position, abs(add(r120, r60))))', 'kurtosis_deviation': 3.028258539441368, 'complexity': 20.0, 'height': 4, 'size': 12, 'num_bars': 6565, 'actual_kurtosis': 3.028258539441368, 'skewness': -0.047615405042699094, 'sharpe_ratio': 0.04529430576750

In [None]:
for i in sr_model.get_best_expressions(20):
    print(i)


In [None]:
for i in sr_model.get_best_expressions(20):
    print(i)

In [None]:
sr_model.visualize_pareto_front()