In [1]:
import numpy as np
import pandas as pd
from mlfinlab.util import volatility
from mlfinlab.filters import filters
from mlfinlab.labeling import labeling
from mlfinlab.sampling import bootstrapping
from mlfinlab.sampling import concurrent
# Use dollar bars example dataset to generate an indicator matrix
data = pd.read_csv("https://raw.githubusercontent.com/hudson-and-thames/example-data/main/dollar_bars.csv")
data = data.iloc[:2000, :]  # slice the dataset so example doesn't run too long
data.index = pd.to_datetime(data["date_time"])
data = data.drop("date_time", axis=1)
# Select the data from 1st September 2011
data = data["2011-09-01":]
# Based on the simple moving average cross-over strategy.
# Compute moving averages
fast_window = 20
slow_window = 50
data["fast_mavg"] = (
     data["close"]
     .rolling(window=fast_window, min_periods=fast_window, center=False)
     .mean()
 )
data["slow_mavg"] = (
     data["close"]
     .rolling(window=slow_window, min_periods=slow_window, center=False)
     .mean()
)
# Compute sides
data["side"] = np.nan
long_signals = data["fast_mavg"] >= data["slow_mavg"]
short_signals = data["fast_mavg"] < data["slow_mavg"]
data.loc[long_signals, "side"] = 1
data.loc[short_signals, "side"] = -1
# Remove Look ahead biase by lagging the signal
data["side"] = data["side"].shift(1)
# Duplicate the raw data
raw_data = data.copy()
# Drop the NaN values from our data set
data.dropna(axis=0, how="any", inplace=True)
# Compute daily volatility
daily_vol = volatility.get_daily_vol(close=data["close"], lookback=50)
# Apply Symmetric CUSUM filter and get timestamps for events
# Note: Only the CUSUM filter needs a point estimate for volatility
cusum_events = filters.cusum_filter(
     data["close"], threshold=daily_vol["2011-09-01":"2018-01-01"].mean() * 0.5
)
# Compute (triple barrier labeling) vertical barrier
vertical_barriers = labeling.add_vertical_barrier(
     t_events=cusum_events, close=data["close"], num_days=1
)
pt_sl = [1, 2]
min_ret = 0.005
barrier_events = labeling.get_events(
     close=data["close"],
     t_events=cusum_events,
     pt_sl=pt_sl,
     target=daily_vol,
     min_ret=min_ret,
     num_threads=3,
     vertical_barrier_times=vertical_barriers,
     side_prediction=data["side"],
)
barrier_events
# Use the close prices from dollar bars dataset as the price bars for the indicator matrix.
close_prices = pd.read_csv(
     "https://raw.githubusercontent.com/hudson-and-thames/example-data/main/dollar_bars.csv",
     index_col=0,
     parse_dates=[0, 2],
)
# Create the indicator matrix
triple_barrier_ind_mat = bootstrapping.get_ind_matrix(barrier_events, close_prices)
# MlFinlab can also get average label uniqueness on the indicator matrix
ind_mat_uniqueness = bootstrapping.get_ind_mat_average_uniqueness(
     triple_barrier_ind_mat
)
av_unique = concurrent.get_av_uniqueness_from_triple_barrier(
     pd.DataFrame(barrier_events), close_prices, num_threads=1
)
# Draw sequential bootstrap
bootstrapping.seq_bootstrap(
     triple_barrier_ind_mat, sample_length=4, warmup_samples=[1]
)

2025-11-03 08:42:50.754711 100.0% apply_pt_sl_on_t1 done after 0.07 minutes. Remaining 0.0 minutes..
  close_prices = pd.read_csv(


[1, 83, 172, 199]