In [1]:
# 集成打标器

from research.model_pick.candle_fetch import FusionCandles
from research.labeler.gmm_labeler import GMMLabeler

candle_container = FusionCandles(
    exchange="Binance Perpetual Futures", symbol="BTC-USDT", timeframe="1m"
)
candles = candle_container.get_candles("2022-08-01", "2025-12-20")
print(candles.shape)

labeler = GMMLabeler(candles, 4, random_seed=369)

PyTorch configured: device=cpu, dtype=torch.float32


  import pkg_resources


(38467, 6)


In [8]:
labeler.label_double_prob

array([[1.00000000e+00, 6.14300728e-24],
       [9.88899610e-01, 1.11003896e-02],
       [7.42651538e-01, 2.57348462e-01],
       ...,
       [5.42170737e-02, 9.45782926e-01],
       [3.13179785e-01, 6.86820215e-01],
       [3.14694889e-01, 6.85305111e-01]])

In [3]:
# 教学原版打标器

import pandas as pd
import numpy as np

L = 4  ### 1阶与X阶
mix = 3  ### GMM mix参数

openn = candles[:, 1]
close = candles[:, 2]
high = candles[:, 3][L:]
low = candles[:, 4][L:]
eob = candles[:, 0]
datelist = pd.to_datetime(eob[L:], unit="ms")

logreturn = (np.log(np.array(close[1:])) - np.log(np.array(close[:-1])))[(L - 1) :]
logreturnX = np.log(np.array(close[L:])) - np.log(np.array(close[:-L]))
HLdiff = np.log(np.array(high)) - np.log(np.array(low))

closeidx = close[L:]

X = np.column_stack([logreturnX, logreturn, HLdiff])

In [4]:
from hmmlearn.hmm import GMMHMM

gmm = GMMHMM(
    n_components=2, n_mix=mix, covariance_type="diag", n_iter=369, random_state=369
).fit(X)
latent_states_sequence = gmm.predict(X)  ### 硬标签
state_probabilities = gmm.predict_proba(X)  ### 概率标签

In [5]:
data = pd.DataFrame(
    {"datelist": datelist, "logreturn": logreturn, "state": latent_states_sequence}
).set_index("datelist")

for i in range(gmm.n_components):
    state = latent_states_sequence == i
    idx = np.append(0, state[:-1])
    data["state %d_return" % i] = data.logreturn.multiply(idx, axis=0)

### 硬标签：state
### 自动判断，将标签的 1与0，变为buy为1，sell为0

if sum(data["state 0_return"]) > sum(data["state 1_return"]):
    data["state"] = abs(data["state"] - 1)
    buy_state = 0  ### 翻转后state=0为做多
else:
    buy_state = 1  #### state=1为做多

### 双概率标签：state_0_prob 与 state_1_prob，相当于label_double_prob

data["state_0_prob"] = state_probabilities[:, 0]
data["state_1_prob"] = state_probabilities[:, 1]

if buy_state == 0:
    ### 如果 buy_state=0 就交换概率标签
    data["state_0_prob"], data["state_1_prob"] = (
        data["state_1_prob"],
        data["state_0_prob"],
    )

### 独立带方向的原始概率标签:state_p1

data["state_p1"] = np.where(
    data["state"] == 1, data["state_1_prob"], -data["state_0_prob"]
)

### 独立概率差值标签：state_p2

data["state_p2"] = data["state_1_prob"] - data["state_0_prob"]

In [6]:
data

Unnamed: 0_level_0,logreturn,state,state 0_return,state 1_return,state_0_prob,state_1_prob,state_p1,state_p2
datelist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-08-01 11:42:00,0.006114,0,0.000000,0.000000,1.000000,6.143007e-24,-1.000000,-1.000000
2022-08-01 13:04:00,-0.006356,0,-0.006356,-0.000000,0.988900,1.110039e-02,-0.988900,-0.977799
2022-08-01 13:41:00,0.003095,0,0.003095,0.000000,0.742652,2.573485e-01,-0.742652,-0.485303
2022-08-01 14:00:00,0.006302,1,0.006302,0.000000,0.012393,9.876067e-01,0.987607,0.975213
2022-08-01 14:25:00,0.001691,1,0.000000,0.001691,0.028348,9.716516e-01,0.971652,0.943303
...,...,...,...,...,...,...,...,...
2025-12-19 20:35:00,0.002470,1,0.000000,0.002470,0.000170,9.998296e-01,0.999830,0.999659
2025-12-19 20:47:00,-0.002175,1,-0.000000,-0.002175,0.020525,9.794748e-01,0.979475,0.958950
2025-12-19 20:54:00,-0.000220,1,-0.000000,-0.000220,0.054217,9.457829e-01,0.945783,0.891566
2025-12-19 22:00:00,-0.001877,1,-0.000000,-0.001877,0.313180,6.868202e-01,0.686820,0.373640
