In [101]:
import vectorbtpro as vbt
import numpy as np
import pandas as pd
from numba import njit

In [102]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

## Data

Using GBM generated data to ensure consistency

In [103]:
data = vbt.GBMOHLCData.pull(
    symbols=['CHADCOIN'],
    start='2020-01-01 UTC',
    end='2021-01-01 UTC',
    seed=vbt.symbol_dict(BTCUSD=42),
    n_ticks=1000,
)

# Creating Splitters

- Sets / Splits / Ranges

In [104]:
splitter = vbt.Splitter.from_n_rolling(
    index=data.index,
    n=5,
    split=0.5,
    set_labels=["train","test"],
)
splitter.plot()

FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: train',
              'legendgroup': 'train',
              'name': 'train',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': 'efcff0d7-cfb5-41e7-b713-d6b33f710841',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 2, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 3, 0, 0, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 29, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 30, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020,

In [105]:
splitter = vbt.Splitter.from_n_rolling(
    index=data.index,
    n=7,
    length="180 days",
    split=(
        0.5,
        vbt.RelRange(length=0.2, length_space="all"),
        vbt.RelRange()
    ),
    set_labels=["train","validation","test"],
)
splitter.plot()

FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: train',
              'legendgroup': 'train',
              'name': 'train',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': 'd1de91a1-cf33-4957-930f-94c4860ef060',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 2, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 3, 0, 0, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 29, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 30, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020,

# Applying Splitters

In [106]:
splitter.take(data)[0, "train"].close # into=

symbol,CHADCOIN
2020-01-01 00:00:00+00:00,98.932718
2020-01-02 00:00:00+00:00,103.115133
2020-01-03 00:00:00+00:00,104.124695
2020-01-04 00:00:00+00:00,101.475360
2020-01-05 00:00:00+00:00,107.728529
...,...
2020-03-26 00:00:00+00:00,127.149680
2020-03-27 00:00:00+00:00,125.433032
2020-03-28 00:00:00+00:00,133.827713
2020-03-29 00:00:00+00:00,131.648345


## Simple UDFs

In [107]:
def buy_and_hold_return(data):
    return vbt.Portfolio.from_holding(data).total_return

In [108]:
results = splitter.apply(
    buy_and_hold_return,
    vbt.Takeable(data),
    merge_func="concat" # column_stack
)
results

split  set         symbol  
0      train       CHADCOIN    0.239386
       validation  CHADCOIN   -0.169171
       test        CHADCOIN   -0.387466
1      train       CHADCOIN    0.086286
       validation  CHADCOIN   -0.371946
       test        CHADCOIN   -0.015134
2      train       CHADCOIN   -0.324001
       validation  CHADCOIN   -0.264697
       test        CHADCOIN   -0.265233
3      train       CHADCOIN   -0.513936
       validation  CHADCOIN    0.042102
       test        CHADCOIN   -0.072892
4      train       CHADCOIN   -0.344568
       validation  CHADCOIN   -0.319716
       test        CHADCOIN   -0.028434
5      train       CHADCOIN   -0.414003
       validation  CHADCOIN    0.303277
       test        CHADCOIN   -0.119211
6      train       CHADCOIN   -0.109621
       validation  CHADCOIN   -0.159280
       test        CHADCOIN   -0.059226
Name: total_return, dtype: float64

## Dynamic Splitter creation

In [109]:
@vbt.split(
    splitter="from_n_rolling",
    splitter_kwargs=dict(n=5, split=0.5, set_labels=["train","test"]),
    takeable_args=["data"],
    merge_func="reset_column_stack",
    attach_bounds="index",
)
def buy_and_hold_return_series(data):
    return data.returns

In [110]:
buy_and_hold_return_series(data)

split,0,0,1,1,2,2,3,3,4,4
set,train,test,train,test,train,test,train,test,train,test
start,2020-01-01 00:00:00+00:00,2020-02-06 00:00:00+00:00,2020-03-14 00:00:00+00:00,2020-04-19 00:00:00+00:00,2020-05-26 00:00:00+00:00,2020-07-01 00:00:00+00:00,2020-08-07 00:00:00+00:00,2020-09-12 00:00:00+00:00,2020-10-19 00:00:00+00:00,2020-11-24 00:00:00+00:00
end,2020-02-06 00:00:00+00:00,2020-03-14 00:00:00+00:00,2020-04-19 00:00:00+00:00,2020-05-26 00:00:00+00:00,2020-07-01 00:00:00+00:00,2020-08-07 00:00:00+00:00,2020-09-12 00:00:00+00:00,2020-10-19 00:00:00+00:00,2020-11-24 00:00:00+00:00,2020-12-31 00:00:00+00:00
symbol,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.042275,0.004489,0.022292,-0.025179,-0.036851,-0.026914,-0.004443,0.047314,-0.009819,0.019245
2,0.009791,0.005861,-0.00018,-0.006299,0.030669,0.037432,-0.0134,-0.00676,-0.001232,-0.00184
3,-0.025444,-0.002955,0.002207,-0.039156,0.020811,0.026583,0.023849,-0.045149,0.013071,0.038482
4,0.061623,-0.028683,0.013372,-0.017012,-0.017798,-0.013095,-0.012649,0.025853,-0.020889,-0.028911
5,-0.020144,-0.020564,0.041158,0.029616,-0.027552,-0.019625,-0.0349,0.018352,0.009491,0.055586
6,-0.03616,-0.021748,-0.026457,-0.067166,-0.067564,-0.017541,-0.028683,0.05142,-0.036156,-0.009867
7,0.005358,0.055876,-0.002044,0.048934,-0.056268,0.020336,-0.026712,0.05329,-0.050817,-0.03548
8,0.013569,-0.045957,-0.071815,0.010424,0.03582,0.065799,-0.033798,-0.011283,-0.00871,0.033994
9,0.039297,0.017553,0.028635,-0.012782,-0.043405,-0.028124,-0.009517,0.021376,-0.010455,0.050444


In [111]:
buy_and_hold_return_series(
    data, 
    _apply_kwargs=dict(set_="train", merge_func="column_stack", attach_bounds=None)
)

split,0,1,2,3,4
symbol,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN
2020-01-01 00:00:00+00:00,0.000000,,,,
2020-01-02 00:00:00+00:00,0.042275,,,,
2020-01-03 00:00:00+00:00,0.009791,,,,
2020-01-04 00:00:00+00:00,-0.025444,,,,
2020-01-05 00:00:00+00:00,0.061623,,,,
...,...,...,...,...,...
2020-11-19 00:00:00+00:00,,,,,0.016409
2020-11-20 00:00:00+00:00,,,,,-0.003083
2020-11-21 00:00:00+00:00,,,,,-0.014457
2020-11-22 00:00:00+00:00,,,,,0.038265


# Pipelines

In [112]:
data = vbt.GBMOHLCData.pull(
    symbols=['CHADCOIN'],
    start='2020-01-01 UTC',
    end='2021-01-01 UTC', # '2030-01-01 UTC'
    seed=vbt.symbol_dict(CHADCOIN=10),
    timeframe="15 min",
    n_ticks=20,
)
#data.close.plot()

In [113]:
@vbt.cv_split(
    merge_func="concat",
    splitter="from_n_rolling",
    splitter_kwargs=dict(n=4, split=0.5, set_labels=["train","test"]),
    takeable_args=["data"],
    return_grid='all',
)
def rsi_pipeline(data, upper_threshold=70, lower_threshold=30):
    rsi = vbt.RSI.run(close=data).rsi
    entries = rsi.vbt.crossed_below(lower_threshold)
    exits = rsi.vbt.crossed_above(upper_threshold)
    pf = vbt.Portfolio.from_signals(
        data,
        long_entries=entries,
        long_exits=exits,
    )

    if len(pf.trades) < 1:
        return vbt.NoResult

    return pf.sharpe_ratio

In [114]:
grid, best = rsi_pipeline(
    data.close["CHADCOIN"],
    vbt.Param(range(70, 80, 1), name="Upper"),
    vbt.Param(range(20, 30, 1), name="Lower"),
)

 25%|##5       | 1/4 [00:04<00:14,  4.88s/it, split=1]

In [115]:
best.xs("test", axis=0, level="set")

split  Upper  Lower
0      71     23       1.856017
1      76     20      -0.012611
2      79     29       0.985265
3      72     25       3.224272
dtype: float64

## Compare to buy and hold

In [116]:
splitter = rsi_pipeline(
    data,
    _return_splitter=True
)
#splitter3.plot()

In [117]:
stacked_data = splitter.take(data.close, into="reset_stacked")
benchmark_sharpe = vbt.Portfolio.from_holding(stacked_data, freq="15m").sharpe_ratio
benchmark_sharpe

split  set    symbol  
0      train  CHADCOIN   -2.459066
       test   CHADCOIN   -1.588828
1      train  CHADCOIN   -0.049191
       test   CHADCOIN    2.743796
2      train  CHADCOIN    3.918398
       test   CHADCOIN   -0.308933
3      train  CHADCOIN    1.149608
       test   CHADCOIN   -1.683509
Name: sharpe_ratio, dtype: float64

In [118]:
benchmark_sharpe.xs("test", axis=0, level="set")

split  symbol  
0      CHADCOIN   -1.588828
1      CHADCOIN    2.743796
2      CHADCOIN   -0.308933
3      CHADCOIN   -1.683509
Name: sharpe_ratio, dtype: float64

In [119]:
best.xs("test", axis=0, level="set")

split  Upper  Lower
0      71     23       1.856017
1      76     20      -0.012611
2      79     29       0.985265
3      72     25       3.224272
dtype: float64

## Custom Split function

Extra context variables:
- split_idx: Current split index, starting at 0
- splits: Nested list of splits appended up to this point
- bounds: Nested list of bounds appended up to this point
- prev_start: Left bound of the previous split
- prev_end: Right bound of the previous split

In [130]:
def custom_split_func(index, prev_start, prev_end, test_weeks=2, initial_offset=14):
    if prev_start is None:
        prev_start = index[0] + index.freq * initial_offset
        if not (prev_start.day == 1 and prev_start.time() == pd.Timestamp("00:00:00").time()):
            prev_start = (prev_start + pd.DateOffset(months=1)).normalize()

        prev_end = prev_start + pd.offsets.MonthBegin(1)

    new_end = prev_end + pd.offsets.MonthBegin(1)
    if new_end > index[-1]:
        return None
    return [
        slice(prev_start, new_end - pd.Timedelta(weeks=test_weeks)),  
        slice(new_end - pd.Timedelta(weeks=test_weeks), new_end)
    ]

In [128]:
vbt.Splitter.from_split_func(
    data.index,
    split_func=custom_split_func,
    split_args=(vbt.Rep("index"), vbt.Rep("prev_start"), vbt.Rep("prev_end")),
    split_kwargs=dict(test_weeks=4, initial_offset=14),
    range_bounds_kwargs=dict(index_bounds=True),
    set_labels=["test","train"]
).plot()

FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: test',
              'legendgroup': 'test',
              'name': 'test',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': '3f051d52-a81b-469c-a54e-66e2c15ff958',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 1, 0, 15, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 1, 0, 30, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 31, 23, 15, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 31, 23, 30, tzinfo=datetime.timezone.utc),
                          datetime.datetime(20

In [121]:
@vbt.cv_split(
    merge_func="concat",
    splitter="from_split_func",
    splitter_kwargs=dict(
        split_func=custom_split_func,
        split_args=(vbt.Rep("index"), vbt.Rep("prev_start"), vbt.Rep("prev_end")),
        split_kwargs=dict(test_weeks=4, initial_offset=14),
        range_bounds_kwargs=dict(index_bounds=True),
        set_labels=["train","test"]
    ),
    takeable_args=["data"],
    return_grid='all',
)
def rsi_pipeline(data, upper_threshold=70, lower_threshold=30):
    window_length=14
    rsi = vbt.RSI.run(close=data, window=window_length).rsi
    entries = rsi.vbt.crossed_below(lower_threshold)
    exits = rsi.vbt.crossed_above(upper_threshold)
    pf = vbt.Portfolio.from_signals(
        data,
        long_entries=entries,
        long_exits=exits,
    )

    if len(pf.trades) < 1:
        return vbt.NoResult

    return pf.sharpe_ratio

In [122]:
grid, best = rsi_pipeline(
    data.close["CHADCOIN"],
    vbt.Param(range(70, 80, 1), name="Upper"),
    vbt.Param(range(20, 30, 1), name="Lower"),
    _splitter_kwargs=dict(split_kwargs=dict(test_weeks=1, initial_offset=14)),
    #_apply_kwargs=dict(split=1)
)
best

 11%|#1        | 1/9 [00:02<00:23,  2.97s/it, split=1]

split  set    Upper  Lower
0      train  71     21        2.076925
1      train  71     21        2.347230
       test   71     21       16.429512
2      train  73     20        2.077370
3      train  73     20        2.030224
4      train  73     20        1.087867
       test   73     20       -3.029785
5      train  71     21        1.500760
6      train  71     21        0.917148
7      train  73     26        1.329837
       test   73     26       -0.513515
8      train  73     26        1.323866
       test   73     26        1.601448
dtype: float64

## Indicator Warmup

- `range_` is provided in context of `splitter.apply`

In [123]:
@vbt.cv_split(
    merge_func="concat",
    splitter="from_split_func",
    splitter_kwargs=dict(
        split_func=custom_split_func,
        split_args=(vbt.Rep("index"), vbt.Rep("prev_start"), vbt.Rep("prev_end")),
        split_kwargs=dict(test_weeks=4, initial_offset=14),
        range_bounds_kwargs=dict(index_bounds=True),
        set_labels=["train","test"]
    ),
    takeable_args=None, #!
    index_from="data", #!
    return_grid=True,
)
def rsi_pipeline_warmup(range_, data, upper_threshold=70, lower_threshold=30, window_length=14):

    buffered_range = slice(range_.start - window_length, range_.stop)  
    data_buffered = data.iloc[buffered_range]
    
    rsi = vbt.RSI.run(close=data_buffered, window=window_length).rsi
    entries_buffered = rsi.vbt.crossed_below(lower_threshold)
    exits_buffered = rsi.vbt.crossed_above(upper_threshold)

    pf = vbt.Portfolio.from_signals(
        data_buffered.iloc[window_length:],
        long_entries=entries_buffered.iloc[window_length:],
        long_exits=exits_buffered.iloc[window_length:],
    )

    if len(pf.trades) < 1:
        return vbt.NoResult

    return pf.sharpe_ratio

In [124]:
grid, best = rsi_pipeline_warmup(
    vbt.Rep("range_"),
    data.close["CHADCOIN"],
    vbt.Param(range(70, 80, 1), name="Upper"),
    vbt.Param(range(20, 30, 1), name="Lower"),
    _splitter_kwargs=dict(split_kwargs=dict(test_weeks=1, initial_offset=14)),
    _apply_kwargs=dict(split=1)
)
best

set    Upper  Lower
train  71     21        2.347230
test   71     21       16.429512
dtype: float64