In [None]:
import numpy as np
import pandas as pd
from mlfinlab.util import volatility
from mlfinlab.filters import filters
from mlfinlab.labeling import labeling
from mlfinlab.sampling import bootstrapping
from mlfinlab.sampling import concurrent
from mlfinlab.data_structures import standard_data_structures

sp_read = pd.read_csv('../data/SP.csv')
sp = sp_read.loc[pd.to_datetime(sp_read['date'].astype(str)) >= pd.Timestamp('2010-01-01')].copy()
sp['date_time'] = pd.to_datetime(sp['date'].astype(str) + ' ' + sp['time'].astype(str))
sp['date'] = sp['date_time'].dt.normalize()
sp['date_time'] = pd.to_datetime(sp['date'].astype(str) + ' ' + sp['time'].astype(str))
sp_processed = sp[['date_time', 'price', 'volume']].copy()
sp_processed.columns = ['date_time', 'price', 'volume']

data = standard_data_structures.get_dollar_bars(
    sp_processed, threshold=1000000, batch_size=100000, verbose=False
)

data = data.iloc[:2000, :]  # slice the dataset so example doesn't run too long
#data.index = pd.to_datetime(data["date_time"])
#data = data.drop("date_time", axis=1)
# Select the data from 1st September 2011
pdata1 = data.copy()

data = data["2011-09-01":]
# Based on the simple moving average cross-over strategy.
# Compute moving averages
fast_window = 20
slow_window = 50
data["fast_mavg"] = (
     data["close"]
     .rolling(window=fast_window, min_periods=fast_window, center=False)
     .mean()
 )
data["slow_mavg"] = (
     data["close"]
     .rolling(window=slow_window, min_periods=slow_window, center=False)
     .mean()
)
# Compute sides
data["side"] = np.nan
long_signals = data["fast_mavg"] >= data["slow_mavg"]
short_signals = data["fast_mavg"] < data["slow_mavg"]
data.loc[long_signals, "side"] = 1
data.loc[short_signals, "side"] = -1
# Remove Look ahead biase by lagging the signal
data["side"] = data["side"].shift(1)
# Duplicate the raw data
raw_data = data.copy()
# Drop the NaN values from our data set
data.dropna(axis=0, how="any", inplace=True)
# Compute daily volatility
daily_vol = volatility.get_daily_vol(close=data["close"], lookback=50)
# Apply Symmetric CUSUM filter and get timestamps for events
# Note: Only the CUSUM filter needs a point estimate for volatility
cusum_events = filters.cusum_filter(
     data["close"], threshold=daily_vol["2011-09-01":"2018-01-01"].mean() * 0.5
)
# Compute (triple barrier labeling) vertical barrier
vertical_barriers = labeling.add_vertical_barrier(
     t_events=cusum_events, close=data["close"], num_days=1
)
pt_sl = [1, 2]
min_ret = 0.005
barrier_events = labeling.get_events(
     close=data["close"],
     t_events=cusum_events,
     pt_sl=pt_sl,
     target=daily_vol,
     min_ret=min_ret,
     num_threads=3,
     vertical_barrier_times=vertical_barriers,
     side_prediction=data["side"],
)
barrier_events

# Check for NaN values
print(f"Shape of barrier_events: {barrier_events.shape}")
print(f"NaN counts in barrier_events:\n{barrier_events.isna().sum()}")
print(f"\nFirst few rows:\n{barrier_events.head()}")

# Remove rows with NaN values
barrier_events_clean = barrier_events.dropna()
print(f"\nShape after dropping NaN: {barrier_events_clean.shape}")

# Use the close prices from dollar bars dataset as the price bars for the indicator matrix.
close_prices = pdata1[['close']].copy() 


# Create the indicator matrix
triple_barrier_ind_mat = bootstrapping.get_ind_matrix(barrier_events_clean, close_prices)
# MlFinlab can also get average label uniqueness on the indicator matrix
ind_mat_uniqueness = bootstrapping.get_ind_mat_average_uniqueness(
     triple_barrier_ind_mat
)
av_unique = concurrent.get_av_uniqueness_from_triple_barrier(
     pd.DataFrame(barrier_events_clean), close_prices, num_threads=1
)
# Draw sequential bootstrap
bootstrapping.seq_bootstrap(
     triple_barrier_ind_mat, sample_length=4, warmup_samples=[1]
)

2025-11-03 14:20:02.546317 100.0% apply_pt_sl_on_t1 done after 0.03 minutes. Remaining 0.0 minutes..


Shape of barrier_events: (96, 5)
NaN counts in barrier_events:
t1      1
trgt    0
side    0
pt      0
sl      0
dtype: int64

First few rows:
                                             t1      trgt  side  pt  sl
2011-09-25 18:14:00.972 2011-09-26 02:08:27.034  0.014822  -1.0   1   2
2011-09-26 02:08:27.034 2011-09-26 21:36:21.122  0.017268  -1.0   1   2
2011-09-26 05:43:18.372 2011-09-27 07:20:54.166  0.019288  -1.0   1   2
2011-09-26 21:36:21.122 2011-09-28 02:14:52.078  0.020637  -1.0   1   2
2011-09-27 05:14:37.147 2011-09-28 08:00:46.833  0.023376  -1.0   1   2

Shape after dropping NaN: (95, 5)


[1, 62, 68, 2]