In [None]:
import numpy as np

np.show_config()

In [None]:
import numpy as np
from jesse import helpers, research

_, candles = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "1m",
    helpers.date_to_timestamp("2020-01-01"),
    helpers.date_to_timestamp("2025-06-16"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

candles.shape

In [2]:
np.save("data/btc_1m.npy", candles)

In [1]:
import numpy as np

candles = np.load("data/btc_1m.npy")
candles.shape

(2869920, 6)

In [7]:
from custom_indicators.prod_indicator.diff.fracdiff import (
    frac_diff_candles,
)

res = frac_diff_candles(
    candles[-10000:], source_type="close", window=100, d=0.5, sequential=True
)
res

array([          nan,           nan,           nan, ..., 6004.25018203,
       5995.85352277, 6022.51133832])

In [None]:
import pandas as pd
from jesse.utils import numpy_candles_to_dataframe

df = numpy_candles_to_dataframe(candles)
df["hlret"] = np.log(df["high"] / df["low"])
ret_list = []
for i in range(1, 150):
    ret_series = np.log(df["close"] / df["close"].shift(i))
    ret_series.name = f"ret{i}"
    ret_list.append(ret_series)
df = pd.concat([df, pd.concat(ret_list, axis=1)], axis=1)
del ret_list
df = df[df["ret149"].notna()]
df.head()

In [None]:
from custom_indicators.utils.plot import find_kde_cross

target = df["ret149"].to_numpy()
roots = find_kde_cross(target)

In [None]:
series = df["ret149"].copy().sort_values()
label = ((series <= roots[0]) | (series >= roots[-1])).astype(int)
label.sort_index(inplace=True)
label.value_counts()

In [None]:
import numpy as np
from gplearn.fitness import make_fitness
from gplearn.genetic import SymbolicClassifier
from sklearn.metrics import f1_score


def gp_f1(y, y_pred, w):
    y_pred_labels = (y_pred > 0.5).astype(int)
    return f1_score(y, y_pred_labels, sample_weight=w)


my_custom_f1_fitness = make_fitness(function=gp_f1, greater_is_better=True)

cols = [f"ret{i}" for i in range(1, 150)] + ["hlret", "volume"]

est_gp = SymbolicClassifier(
    metric=my_custom_f1_fitness,
    population_size=5000,
    stopping_criteria=0,
    function_set=["add", "sub", "sqrt", "log", "abs", "neg", "max", "min"],
    p_crossover=0.7,
    p_subtree_mutation=0.05,
    p_hoist_mutation=0.05,
    p_point_mutation=0.05,
    max_samples=1,
    parsimony_coefficient=0.005,
    class_weight="balanced",
    feature_names=cols,
    verbose=1,
    n_jobs=-1,
)


est_gp.fit(df[cols], label)

In [6]:
label_pred = est_gp.predict(df[cols])

In [None]:
np.unique(label_pred, return_counts=True)

In [None]:
from scipy import stats

for lag in range(10, 300):
    log_ret = np.log(candles[lag:, 2] / candles[:-lag, 2])
    standard = (log_ret - log_ret.mean()) / log_ret.std()
    kurtosis = stats.kurtosis(standard, axis=None, fisher=False, nan_policy="omit")
    print(f"{lag = }: {kurtosis = }")

In [None]:
import os
from multiprocessing import Pool

from tqdm.auto import tqdm

from custom_indicators.toolbox.entropy.apen_sampen import sample_entropy

LAG = 159
log_ret = [
    np.log(candles[i, 2] / candles[-LAG + i : i, 2]) for i in range(LAG, len(candles))
]
with Pool(processes=os.cpu_count() - 1) as pool:
    entropy_array = list(
        tqdm(pool.imap(sample_entropy, log_ret), total=len(log_ret), desc="计算样本熵")
    )

candles = candles[LAG:]

In [None]:
# 计算threshold
DURATION = 298

threshold = np.sum(entropy_array) / (len(candles) // DURATION)
threshold

In [None]:
from bar import build_bar_by_cumsum

merged_bar = build_bar_by_cumsum(candles, entropy_array, threshold)
merged_bar.shape

In [None]:
from custom_indicators.utils.plot import plot_kde

plot_kde(merged_bar[:, 2])

In [None]:
import jesse.indicators as ta
import numpy as np

raw_candles = np.load("data/btc_1m.npy")
print(raw_candles.shape)
candles = ta.heikin_ashi_candles(raw_candles, sequential=True)

In [None]:
import os
from multiprocessing import Pool

import jesse.indicators as ta
import optuna
from scipy import stats

from bar import build_bar_by_cumsum
from custom_indicators.toolbox.entropy.apen_sampen import sample_entropy


def objective(trial):
    duration = trial.suggest_int("duration", 60, 360)
    lag = trial.suggest_int("lag", 20, 360)
    use_weight = trial.suggest_categorical("use_weight", [True, False])
    use_heikin_ashi = trial.suggest_categorical("use_heikin_ashi", [True, False])

    candles = np.load("data/btc_1m.npy")
    if use_heikin_ashi:
        ha = ta.heikin_ashi_candles(candles, sequential=True)
        candles[:, 1] = ha.open
        candles[:, 2] = ha.close
        candles[:, 3] = ha.high
        candles[:, 4] = ha.low
        candles = candles[1:]

    log_ret = [
        np.log(candles[i, 2] / candles[-lag + i : i, 2])
        for i in range(lag, len(candles))
    ]

    if use_weight:
        weight = np.flip(np.arange(1, len(log_ret[0]) + 1) / len(log_ret[0]))
    else:
        weight = np.ones(len(log_ret[0]))
    log_ret = [log_ret[i] * weight for i in range(len(log_ret))]

    with Pool(processes=os.cpu_count() - 1) as pool:
        entropy_array = list(pool.map(sample_entropy, log_ret))
    candles = candles[lag:]

    threshold = np.sum(entropy_array) / (len(candles) // duration)
    merged_bar = build_bar_by_cumsum(candles, entropy_array, threshold)
    close_arr = merged_bar[:, 2]
    ret = np.log(close_arr[1:] / close_arr[:-1])
    standard = (ret - ret.mean()) / ret.std()
    kurtosis = stats.kurtosis(standard, axis=None, fisher=False, nan_policy="omit")
    return kurtosis


study = optuna.create_study(
    direction="minimize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(),
)
study.optimize(objective, n_trials=500, n_jobs=1)

In [None]:
import numpy as np

from bar import (
    build_dollar_bar,
    estimate_dollar_bar_threshold,
)

candles = np.load("data/btc_1m.npy")
print(candles.shape)

threshold = estimate_dollar_bar_threshold(candles, 300)
print(threshold)

dollar_bar = build_dollar_bar(candles, threshold)
dollar_bar.shape

In [None]:
from custom_indicators.utils.plot import plot_kde

plot_kde(dollar_bar[:, 2])

In [None]:
from pathlib import Path

from bar import np_merge_bars

N = int(candles.shape[0] / 240)

for lag in [100, 140, 145, 146, 147, 148, 149, 150, 151, 160, 200, 250]:
    print(f"------------------{lag = }---------------------")
    if not Path(f"data/btc_1m_m4h_lag_{lag}.npy").exists():
        new_candles = np_merge_bars(candles, N, lag=lag)
        print(new_candles.shape)
        np.save(f"data/btc_1m_m4h_lag_{lag}.npy", new_candles)

In [None]:
from pathlib import Path

from bar import np_merge_bars

for t in [-10, 10]:
    print(f"------------------{t = }---------------------")
    N = int(candles.shape[0] / (4 * 60 + t))
    path = Path(f"data/btc_1m_m4h{t}_lag_149.npy")
    if not path.exists():
        new_candles = np_merge_bars(candles, N, lag=149)
        print(new_candles.shape)
        np.save(path, new_candles)

In [None]:
import numpy as np
from jesse.utils import numpy_candles_to_dataframe

merged_bar = np.load("data/btc_1m_m4h_lag_149.npy")
df_merged_bar = numpy_candles_to_dataframe(merged_bar)
df_merged_bar.head()

In [None]:
from lightweight_charts import Chart

chart = Chart()
chart.set(df_merged_bar[["date", "open", "high", "low", "close", "volume"]])
chart.show(block=True)

In [None]:
"""
lag 100: kurtosis = 46.53
lag 140: kurtosis = 21.92
lag 147: kurtosis = 21.76
lag 148: kurtosis = 21.01
lag 149: kurtosis = 20.64
lag 150: kurtosis = 20.95
lag 160: kurtosis = 24.14
lag 200: kurtosis = 33.81
"""

merged_bar = np.load("data/btc_1m_m4h_lag_149.npy")
print(merged_bar.shape)
plot_kde(merged_bar[:, 2], lag=1)