# Chapter 4: Sampling, Uniqueness, and Sequential Bootstrap
AFML focus: overlap-aware sampling for robust model training.


In [1]:
import math
import matplotlib.pyplot as plt
import polars as pl
import openquant
import sys
from pathlib import Path
sys.path.insert(0, str(Path('notebooks/python/scripts').resolve()))
from afml_chapter_utils import (
    fetch_panel,
    simple_returns,
    probs_and_sides_from_momentum,
    timestamps_from_dates,
    lag_corr,
    fracdiff_ffd,
)

panel = fetch_panel(window=900)
dates = panel['date'].to_list()
uso = panel['USO'].to_list()
uso_ret = simple_returns(uso)
probs, sides = probs_and_sides_from_momentum(uso)
timestamps = timestamps_from_dates(dates)
asset_names = ['USO', 'BNO', 'XLE', 'GLD', 'UNG']
asset_prices = panel.select(asset_names).rows()
print('rows', panel.height, 'range', dates[0], dates[-1])


rows 900 range 2022-07-08 2026-02-06


In [2]:
idx = openquant.filters.cusum_filter_indices(uso, 0.001)
label_endtime = [(i, min(i + 5, len(uso)-1)) for i in idx[:250]]
bar_index = list(range(len(uso)))
ind_mat = openquant.sampling.get_ind_matrix(label_endtime, bar_index)
uniq = openquant.sampling.get_ind_mat_average_uniqueness(ind_mat)
boot = openquant.sampling.seq_bootstrap(ind_mat, sample_length=200, warmup_samples=[0,1])

plt.figure(figsize=(10,4))
plt.hist(boot, bins=30)
plt.title('Chapter 4: Sequential Bootstrap Sample Frequency')
plt.tight_layout()
plt.show()

print('indicator rows', len(ind_mat), 'labels', len(label_endtime), 'avg uniqueness', uniq)


indicator rows 900 labels 250 avg uniqueness 0.1793333333333329


## Interpretation
Average uniqueness quantifies label overlap pressure; sequential bootstrap preferentially samples higher-uniqueness observations.

