In [1]:
from provenquant.core.bars import get_dollar_bars
from provenquant.core.labeling import filtrate_tripple_label_barrier, fit_ou_ols, get_tripple_label_barrier, optimize_triple_barriers
from provenquant.core.sample_weight import compute_abs_return_uniqueness, compute_average_uniqueness
from provenquant.utils import get_volatility
import os
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
current_dir = os.getcwd()
dataframe_path = os.path.join(current_dir, '..', '..', 'data', 'btc_usdt.feather')
dataframe = pd.read_feather(dataframe_path)
dataframe

Unnamed: 0,date,open,high,low,close,volume
0,2025-12-01 00:00:00+00:00,90360.01,90417.00,90268.77,90408.34,59.95250
1,2025-12-01 00:01:00+00:00,90408.34,90408.34,90300.00,90300.01,9.99673
2,2025-12-01 00:02:00+00:00,90300.01,90340.25,90278.33,90305.55,31.79438
3,2025-12-01 00:03:00+00:00,90305.55,90305.55,90194.27,90194.27,57.04552
4,2025-12-01 00:04:00+00:00,90194.28,90194.28,89510.00,89816.17,503.41775
...,...,...,...,...,...,...
43196,2025-12-30 23:56:00+00:00,88506.00,88532.55,88505.99,88532.55,1.28329
43197,2025-12-30 23:57:00+00:00,88532.55,88532.55,88523.10,88523.11,1.29438
43198,2025-12-30 23:58:00+00:00,88523.11,88523.11,88523.10,88523.10,0.65562
43199,2025-12-30 23:59:00+00:00,88523.10,88523.11,88485.49,88485.49,6.39077


# Create Tripple Barrier Labeling

In [3]:
volatility = get_volatility(
    dataframe,
    window=100,
)
threshold = volatility.mean() * 10
threshold

np.float64(0.005408195057124239)

In [4]:
filtered_dataframe = filtrate_tripple_label_barrier(
    dataframe,
    cusum_threshold=threshold,
    vertical_barrier=20,
    datetime_col='date'
)
filtered_dataframe

Unnamed: 0,t1,open,high,low,close,volume
2025-12-01 00:04:00+00:00,2025-12-01 00:24:00+00:00,90194.28,90194.28,89510.00,89816.17,503.41775
2025-12-01 00:07:00+00:00,2025-12-01 00:27:00+00:00,89480.01,89536.00,89200.20,89208.76,101.52469
2025-12-01 00:19:00+00:00,2025-12-01 00:39:00+00:00,88808.35,88808.35,88626.04,88719.99,67.58118
2025-12-01 00:32:00+00:00,2025-12-01 00:52:00+00:00,88319.58,88319.58,88044.17,88083.84,167.45610
2025-12-01 00:45:00+00:00,2025-12-01 01:05:00+00:00,87737.65,87775.51,87620.34,87650.09,60.11226
...,...,...,...,...,...,...
2025-12-30 16:18:00+00:00,2025-12-30 16:38:00+00:00,89133.39,89284.68,89117.38,89268.32,44.64159
2025-12-30 16:35:00+00:00,2025-12-30 16:55:00+00:00,88960.99,89040.00,88852.56,88883.34,80.35831
2025-12-30 17:45:00+00:00,2025-12-30 18:05:00+00:00,88533.43,88533.43,88333.34,88373.43,92.94989
2025-12-30 20:53:00+00:00,2025-12-30 21:13:00+00:00,87978.37,87978.38,87920.00,87920.01,31.21459


In [5]:
close_series = pd.Series(data=dataframe['close'].values, index=dataframe['date'])

labeld_dataframe = get_tripple_label_barrier(
    dataframe=filtered_dataframe,
    close_series=close_series,
    tp=0.02,
    sl=0.01,
)
labeld_dataframe

Unnamed: 0,t1,open,high,low,close,volume,label,return,max_return,min_return,mapped_label
2025-12-01 00:04:00+00:00,2025-12-01 00:24:00+00:00,90194.28,90194.28,89510.00,89816.17,503.41775,-1,-0.011333,0.000000,-0.015403,0
2025-12-01 00:07:00+00:00,2025-12-01 00:27:00+00:00,89480.01,89536.00,89200.20,89208.76,101.52469,0,-0.009077,0.000350,-0.009077,1
2025-12-01 00:19:00+00:00,2025-12-01 00:39:00+00:00,88808.35,88808.35,88626.04,88719.99,67.58118,-1,-0.010330,0.000000,-0.011336,0
2025-12-01 00:32:00+00:00,2025-12-01 00:52:00+00:00,88319.58,88319.58,88044.17,88083.84,167.45610,0,-0.007788,0.000679,-0.008540,1
2025-12-01 00:45:00+00:00,2025-12-01 01:05:00+00:00,87737.65,87775.51,87620.34,87650.09,60.11226,0,-0.002773,0.000000,-0.007417,1
...,...,...,...,...,...,...,...,...,...,...,...
2025-12-30 16:18:00+00:00,2025-12-30 16:38:00+00:00,89133.39,89284.68,89117.38,89268.32,44.64159,0,-0.004433,0.001374,-0.004821,1
2025-12-30 16:35:00+00:00,2025-12-30 16:55:00+00:00,88960.99,89040.00,88852.56,88883.34,80.35831,0,-0.002063,0.000000,-0.003442,1
2025-12-30 17:45:00+00:00,2025-12-30 18:05:00+00:00,88533.43,88533.43,88333.34,88373.43,92.94989,0,-0.000135,0.000557,-0.001921,1
2025-12-30 20:53:00+00:00,2025-12-30 21:13:00+00:00,87978.37,87978.38,87920.00,87920.01,31.21459,0,0.001379,0.001968,-0.000351,1


# Create TBL with Dollar Bars

In [6]:
dollar_bars = get_dollar_bars(
    dataframe=dataframe,
    threshold=10000000,
    datetime_col='date',
)
dollar_bars

Unnamed: 0_level_0,start_date,open,high,low,close,volume,cum_ticks,cum_dollar,buy_volume,sell_volume
end_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2025-12-01 00:03:00+00:00,2025-12-01 00:00:00+00:00,90360.01,90417.00,90194.27,90194.27,158.78913,4,1.433930e+07,61.77063,97.01850
2025-12-01 00:04:00+00:00,2025-12-01 00:04:00+00:00,90194.28,90194.28,89510.00,89816.17,503.41775,1,4.521505e+07,0.00000,503.41775
2025-12-01 00:06:00+00:00,2025-12-01 00:05:00+00:00,89816.18,89831.89,89436.98,89480.01,185.68060,2,1.662368e+07,0.00000,185.68060
2025-12-01 00:08:00+00:00,2025-12-01 00:07:00+00:00,89480.01,89536.00,88965.00,89112.00,274.29586,2,2.445288e+07,0.00000,274.29586
2025-12-01 00:10:00+00:00,2025-12-01 00:09:00+00:00,89112.00,89289.12,88901.00,89059.69,178.54982,2,1.590925e+07,89.82282,88.72700
...,...,...,...,...,...,...,...,...,...,...
2025-12-30 22:43:00+00:00,2025-12-30 22:19:00+00:00,88369.07,88455.51,88310.00,88334.83,113.59457,25,1.003878e+07,49.61665,63.97792
2025-12-30 23:06:00+00:00,2025-12-30 22:44:00+00:00,88334.84,88495.89,88288.02,88458.55,113.49570,23,1.003203e+07,54.33101,59.16469
2025-12-30 23:22:00+00:00,2025-12-30 23:07:00+00:00,88458.55,88556.00,88418.37,88477.94,114.15011,16,1.010125e+07,84.90525,29.24486
2025-12-30 23:59:00+00:00,2025-12-30 23:23:00+00:00,88477.95,88573.65,88475.04,88485.49,115.92175,37,1.026309e+07,65.04439,50.87736


In [7]:
volatility = get_volatility(
    dollar_bars,
    window=100,
)
threshold = volatility.mean()
threshold

np.float64(0.0020728432209691566)

In [8]:
filtered_dataframe = filtrate_tripple_label_barrier(
    dollar_bars,
    cusum_threshold=threshold,
    vertical_barrier=20,
    datetime_col='index'
)
filtered_dataframe

Unnamed: 0,t1,start_date,open,high,low,close,volume,cum_ticks,cum_dollar,buy_volume,sell_volume
2025-12-01 00:06:00+00:00,2025-12-01 00:53:00+00:00,2025-12-01 00:05:00+00:00,89816.18,89831.89,89436.98,89480.01,185.68060,2,1.662368e+07,0.00000,185.68060
2025-12-01 00:08:00+00:00,2025-12-01 00:55:00+00:00,2025-12-01 00:07:00+00:00,89480.01,89536.00,88965.00,89112.00,274.29586,2,2.445288e+07,0.00000,274.29586
2025-12-01 00:18:00+00:00,2025-12-01 01:03:00+00:00,2025-12-01 00:17:00+00:00,88988.90,88988.90,88647.48,88808.35,169.67246,2,1.506728e+07,65.40721,104.26525
2025-12-01 00:21:00+00:00,2025-12-01 01:06:00+00:00,2025-12-01 00:19:00+00:00,88808.35,88848.04,88460.11,88556.98,289.25623,3,2.563154e+07,0.00000,289.25623
2025-12-01 00:30:00+00:00,2025-12-01 01:17:00+00:00,2025-12-01 00:28:00+00:00,88398.99,88444.48,88221.96,88293.75,131.61826,3,1.162675e+07,41.50545,90.11281
...,...,...,...,...,...,...,...,...,...,...,...
2025-12-30 19:05:00+00:00,2025-12-31 00:00:00+00:00,2025-12-30 18:49:00+00:00,88281.47,88350.00,88132.59,88181.84,123.06211,17,1.085564e+07,33.13694,89.92517
2025-12-30 19:37:00+00:00,2025-12-31 00:00:00+00:00,2025-12-30 19:17:00+00:00,88084.04,88422.22,88030.51,88299.99,115.72593,21,1.021458e+07,88.87143,26.85450
2025-12-30 20:45:00+00:00,2025-12-31 00:00:00+00:00,2025-12-30 20:32:00+00:00,88137.28,88180.22,88000.36,88000.37,133.49437,14,1.175883e+07,50.56595,82.92842
2025-12-30 21:17:00+00:00,2025-12-31 00:00:00+00:00,2025-12-30 20:59:00+00:00,87922.17,88150.00,87887.14,88150.00,119.59775,19,1.052753e+07,80.39326,39.20449


In [9]:
labeled_dataframe = get_tripple_label_barrier(
    dataframe=filtered_dataframe,
    close_series=close_series,
    tp=0.02,
    sl=0.01,
)
labeled_dataframe

Unnamed: 0,t1,start_date,open,high,low,close,volume,cum_ticks,cum_dollar,buy_volume,sell_volume,label,return,max_return,min_return,mapped_label
2025-12-01 00:06:00+00:00,2025-12-01 00:53:00+00:00,2025-12-01 00:05:00+00:00,89816.18,89831.89,89436.98,89480.01,185.68060,2,1.662368e+07,0.00000,185.68060,-1,-0.010315,0.000000,-0.025299,0
2025-12-01 00:08:00+00:00,2025-12-01 00:55:00+00:00,2025-12-01 00:07:00+00:00,89480.01,89536.00,88965.00,89112.00,274.29586,2,2.445288e+07,0.00000,274.29586,-1,-0.011538,0.001437,-0.023081,0
2025-12-01 00:18:00+00:00,2025-12-01 01:03:00+00:00,2025-12-01 00:17:00+00:00,88988.90,88988.90,88647.48,88808.35,169.67246,2,1.506728e+07,65.40721,104.26525,-1,-0.011314,0.000000,-0.020362,0
2025-12-01 00:21:00+00:00,2025-12-01 01:06:00+00:00,2025-12-01 00:19:00+00:00,88808.35,88848.04,88460.11,88556.98,289.25623,3,2.563154e+07,0.00000,289.25623,-1,-0.010241,0.000572,-0.017582,0
2025-12-01 00:30:00+00:00,2025-12-01 01:17:00+00:00,2025-12-01 00:28:00+00:00,88398.99,88444.48,88221.96,88293.75,131.61826,3,1.162675e+07,41.50545,90.11281,-1,-0.010897,0.000292,-0.014653,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-12-30 19:05:00+00:00,2025-12-31 00:00:00+00:00,2025-12-30 18:49:00+00:00,88281.47,88350.00,88132.59,88181.84,123.06211,17,1.085564e+07,33.13694,89.92517,0,0.002988,0.004443,-0.003320,1
2025-12-30 19:37:00+00:00,2025-12-31 00:00:00+00:00,2025-12-30 19:17:00+00:00,88084.04,88422.22,88030.51,88299.99,115.72593,21,1.021458e+07,88.87143,26.85450,0,0.001646,0.003099,-0.004653,1
2025-12-30 20:45:00+00:00,2025-12-31 00:00:00+00:00,2025-12-30 20:32:00+00:00,88137.28,88180.22,88000.36,88000.37,133.49437,14,1.175883e+07,50.56595,82.92842,0,0.005056,0.006515,-0.001264,1
2025-12-30 21:17:00+00:00,2025-12-31 00:00:00+00:00,2025-12-30 20:59:00+00:00,87922.17,88150.00,87887.14,88150.00,119.59775,19,1.052753e+07,80.39326,39.20449,0,0.003350,0.004806,-0.000017,1


In [10]:
labeled_dataframe['uniqueness'] = compute_average_uniqueness(
    dataframe=labeled_dataframe,
    t1_col='t1',
    
)
sample_weights = compute_abs_return_uniqueness(
    dataframe=labeled_dataframe,
    return_col='return',
)
sample_weights

2025-12-01 00:06:00+00:00    3.501664
2025-12-01 00:08:00+00:00    2.749353
2025-12-01 00:18:00+00:00    2.187352
2025-12-01 00:21:00+00:00    1.724007
2025-12-01 00:30:00+00:00    1.650478
                               ...   
2025-12-30 19:05:00+00:00    0.465496
2025-12-30 19:37:00+00:00    0.251519
2025-12-30 20:45:00+00:00    0.769105
2025-12-30 21:17:00+00:00    0.507551
2025-12-30 22:18:00+00:00    0.134810
Length: 1496, dtype: float64

# Optimal TBL Config

In [11]:
kappa, theta, sigma = fit_ou_ols(
    series=dataframe['close'].pct_change().dropna(),
    dt=1,
)

print(f'kappa: {kappa}, theta: {theta}, sigma: {sigma}')

tp, sl, vb = optimize_triple_barriers(
    kappa,
    theta,
    sigma,
    n_paths=100000,
    n_steps=1440 * 5,
    T=5.0,
    n_trials=100,
    show_progress=True
)

kappa: 1.0159798721817328, theta: -2.8912013189644216e-07, sigma: 0.0006188923540446823


Best trial: 24. Best value: -1.48463e-05: 100%|██████████| 100/100 [00:30<00:00,  3.24it/s]


Optimal: TP=0.0106 (17.14σ), SL=-0.0051 (-8.24σ), VB=80 bars, Avg hold=80.0 bars
Best Sharpe: -0.0000


In [12]:
filtered_dataframe = filtrate_tripple_label_barrier(
    dataframe,
    cusum_threshold=threshold,
    vertical_barrier=vb,
    datetime_col='date'
)
filtered_dataframe

Unnamed: 0,t1,open,high,low,close,volume
2025-12-01 00:04:00+00:00,2025-12-01 01:24:00+00:00,90194.28,90194.28,89510.00,89816.17,503.41775
2025-12-01 00:05:00+00:00,2025-12-01 01:25:00+00:00,89816.18,89831.89,89522.84,89596.94,76.81928
2025-12-01 00:07:00+00:00,2025-12-01 01:27:00+00:00,89480.01,89536.00,89200.20,89208.76,101.52469
2025-12-01 00:12:00+00:00,2025-12-01 01:32:00+00:00,89240.01,89276.00,88963.10,88963.10,50.80782
2025-12-01 00:17:00+00:00,2025-12-01 01:37:00+00:00,88988.90,88988.90,88712.40,88798.29,104.26525
...,...,...,...,...,...,...
2025-12-30 20:53:00+00:00,2025-12-30 22:13:00+00:00,87978.37,87978.38,87920.00,87920.01,31.21459
2025-12-30 21:08:00+00:00,2025-12-30 22:28:00+00:00,88036.58,88095.77,88036.58,88093.05,9.80037
2025-12-30 21:31:00+00:00,2025-12-30 22:51:00+00:00,88204.60,88275.56,88204.60,88269.47,27.77848
2025-12-30 22:13:00+00:00,2025-12-30 23:33:00+00:00,88369.06,88458.92,88369.06,88447.85,13.62043


In [13]:
labeled_dataframe = get_tripple_label_barrier(
    dataframe=filtered_dataframe,
    close_series=close_series,
    tp=tp,
    sl=sl,
)

In [14]:
labeled_dataframe.value_counts('label')

label
 0    1840
-1     973
 1     350
Name: count, dtype: int64