In [2]:
import vectorbtpro as vbt
import numpy as np
import pandas as pd
from numba import njit

In [3]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

## Data

Using GBM generated data to ensure consistency

In [4]:
data = vbt.GBMOHLCData.pull(
    symbols=['CHADCOIN'],
    start='2020-01-01 UTC',
    end='2021-01-01 UTC',
    seed=vbt.symbol_dict(BTCUSD=42),
    n_ticks=1000,
)

# Creating Splitters

- Sets / Splits / Ranges

In [5]:
splitter = vbt.Splitter.from_n_rolling(
    index=data.index,
    n=5,
    split=0.5,
    set_labels=["train","test"],
)
splitter.plot()

FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: train',
              'legendgroup': 'train',
              'name': 'train',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': '523ad84e-48c6-4d7b-bec7-47c696b87b12',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 2, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 3, 0, 0, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 29, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 30, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020,

In [6]:
splitter = vbt.Splitter.from_n_rolling(
    index=data.index,
    n=7,
    length="180 days",
    split=(
        0.5,
        vbt.RelRange(length=0.2, length_space="all"),
        vbt.RelRange()
    ),
    set_labels=["train","validation","test"],
)
splitter.plot()

FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: train',
              'legendgroup': 'train',
              'name': 'train',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': '8c119f91-6ead-40f9-9fe8-9fbf161e487c',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 2, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 3, 0, 0, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 29, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 30, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020,

# Applying Splitters

In [7]:
splitter.take(data)[0, "train"].close # into=

symbol,CHADCOIN
2020-01-01 00:00:00+00:00,100.772002
2020-01-02 00:00:00+00:00,96.861179
2020-01-03 00:00:00+00:00,100.158066
2020-01-04 00:00:00+00:00,101.096793
2020-01-05 00:00:00+00:00,99.925879
...,...
2020-03-26 00:00:00+00:00,91.902985
2020-03-27 00:00:00+00:00,93.238044
2020-03-28 00:00:00+00:00,92.312437
2020-03-29 00:00:00+00:00,95.095055


## Simple UDFs

In [8]:
def buy_and_hold_return(data):
    return vbt.Portfolio.from_holding(data).total_return

In [9]:
results = splitter.apply(
    buy_and_hold_return,
    vbt.Takeable(data),
    merge_func="concat" # column_stack
)
results

split  set         symbol  
0      train       CHADCOIN   -0.073107
       validation  CHADCOIN    0.235382
       test        CHADCOIN    0.089705
1      train       CHADCOIN    0.141124
       validation  CHADCOIN    0.362168
       test        CHADCOIN   -0.210278
2      train       CHADCOIN    0.673684
       validation  CHADCOIN   -0.197744
       test        CHADCOIN   -0.230603
3      train       CHADCOIN    0.129656
       validation  CHADCOIN    0.058770
       test        CHADCOIN   -0.215346
4      train       CHADCOIN    0.116988
       validation  CHADCOIN   -0.320939
       test        CHADCOIN   -0.176358
5      train       CHADCOIN   -0.342140
       validation  CHADCOIN   -0.125786
       test        CHADCOIN   -0.280652
6      train       CHADCOIN   -0.187940
       validation  CHADCOIN   -0.356044
       test        CHADCOIN    0.183985
Name: total_return, dtype: float64

## Dynamic Splitter creation

In [10]:
@vbt.split(
    splitter="from_n_rolling",
    splitter_kwargs=dict(n=5, split=0.5, set_labels=["train","test"]),
    takeable_args=["data"],
    merge_func="reset_column_stack",
    attach_bounds="index",
)
def buy_and_hold_return_series(data):
    return data.returns

In [11]:
buy_and_hold_return_series(data)

split,0,0,1,1,2,2,3,3,4,4
set,train,test,train,test,train,test,train,test,train,test
start,2020-01-01 00:00:00+00:00,2020-02-06 00:00:00+00:00,2020-03-14 00:00:00+00:00,2020-04-19 00:00:00+00:00,2020-05-26 00:00:00+00:00,2020-07-01 00:00:00+00:00,2020-08-07 00:00:00+00:00,2020-09-12 00:00:00+00:00,2020-10-19 00:00:00+00:00,2020-11-24 00:00:00+00:00
end,2020-02-06 00:00:00+00:00,2020-03-14 00:00:00+00:00,2020-04-19 00:00:00+00:00,2020-05-26 00:00:00+00:00,2020-07-01 00:00:00+00:00,2020-08-07 00:00:00+00:00,2020-09-12 00:00:00+00:00,2020-10-19 00:00:00+00:00,2020-11-24 00:00:00+00:00,2020-12-31 00:00:00+00:00
symbol,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.038809,0.007643,-0.030664,0.001956,0.009027,0.007581,0.004259,0.00444,-0.004664,0.024336
2,0.034037,-0.006423,0.005686,-0.027074,0.052083,-0.046735,-0.009089,-0.043182,-0.007885,-0.041078
3,0.009372,0.014913,0.053308,-0.015775,-0.011891,-0.011763,-0.021184,0.029386,-0.008888,-0.051475
4,-0.011582,-0.009771,-0.010204,-0.040726,-0.034687,0.035618,-0.009758,0.012952,-0.034413,-0.003216
5,-0.031233,-0.04601,-0.011632,0.049652,-0.021702,0.010886,-0.024498,0.090064,-0.007874,0.046541
6,-0.031079,0.046718,0.008075,0.031748,-0.005588,0.010817,-0.004803,0.024,0.016766,0.014082
7,-0.021939,0.011766,-0.041223,-0.01129,0.02481,-0.017391,-0.067758,0.018807,0.005866,-0.000538
8,-0.01985,0.018125,0.028067,0.041055,-0.024413,-0.027074,-0.021995,-0.013807,-0.007744,-0.026026
9,0.025605,-0.006178,0.022055,0.064768,0.010291,0.022613,0.000137,-0.001886,0.010638,-0.019397


In [12]:
buy_and_hold_return_series(
    data, 
    _apply_kwargs=dict(set_="train", merge_func="column_stack", attach_bounds=None)
)

split,0,1,2,3,4
symbol,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN
2020-01-01 00:00:00+00:00,0.000000,,,,
2020-01-02 00:00:00+00:00,-0.038809,,,,
2020-01-03 00:00:00+00:00,0.034037,,,,
2020-01-04 00:00:00+00:00,0.009372,,,,
2020-01-05 00:00:00+00:00,-0.011582,,,,
...,...,...,...,...,...
2020-11-19 00:00:00+00:00,,,,,-0.047842
2020-11-20 00:00:00+00:00,,,,,-0.026197
2020-11-21 00:00:00+00:00,,,,,0.039486
2020-11-22 00:00:00+00:00,,,,,-0.014820


# Pipelines

In [13]:
data = vbt.GBMOHLCData.pull(
    symbols=['CHADCOIN'],
    start='2020-01-01 UTC',
    end='2021-01-01 UTC', # '2030-01-01 UTC'
    seed=vbt.symbol_dict(CHADCOIN=10),
    timeframe="15 min",
    n_ticks=20,
)
#data.close.plot()

In [14]:
@vbt.cv_split(
    merge_func="concat",
    splitter="from_n_rolling",
    splitter_kwargs=dict(n=4, split=0.5, set_labels=["train","test"]),
    takeable_args=["data"],
    return_grid='all',
)
def rsi_pipeline(data, upper_threshold=70, lower_threshold=30):
    rsi = vbt.RSI.run(close=data).rsi
    entries = rsi.vbt.crossed_below(lower_threshold)
    exits = rsi.vbt.crossed_above(upper_threshold)
    pf = vbt.Portfolio.from_signals(
        data,
        long_entries=entries,
        long_exits=exits,
    )

    if len(pf.trades) < 1:
        return vbt.NoResult

    return pf.sharpe_ratio

In [15]:
grid, best = rsi_pipeline(
    data.close["CHADCOIN"],
    vbt.Param(range(70, 80, 1), name="Upper"),
    vbt.Param(range(20, 30, 1), name="Lower"),
)

 25%|##5       | 1/4 [00:04<00:13,  4.62s/it, split=1]

In [16]:
best.xs("test", axis=0, level="set")

split  Upper  Lower
0      71     23       1.856017
1      76     20      -0.012611
2      79     29       0.985265
3      72     25       3.224272
dtype: float64

## Compare to buy and hold

In [17]:
splitter = rsi_pipeline(
    data,
    _return_splitter=True
)
#splitter3.plot()

In [18]:
stacked_data = splitter.take(data.close, into="reset_stacked")
benchmark_sharpe = vbt.Portfolio.from_holding(stacked_data, freq="15m").sharpe_ratio
benchmark_sharpe

split  set    symbol  
0      train  CHADCOIN   -2.459066
       test   CHADCOIN   -1.588828
1      train  CHADCOIN   -0.049191
       test   CHADCOIN    2.743796
2      train  CHADCOIN    3.918398
       test   CHADCOIN   -0.308933
3      train  CHADCOIN    1.149608
       test   CHADCOIN   -1.683509
Name: sharpe_ratio, dtype: float64

In [19]:
benchmark_sharpe.xs("test", axis=0, level="set")

split  symbol  
0      CHADCOIN   -1.588828
1      CHADCOIN    2.743796
2      CHADCOIN   -0.308933
3      CHADCOIN   -1.683509
Name: sharpe_ratio, dtype: float64

In [20]:
best.xs("test", axis=0, level="set")

split  Upper  Lower
0      71     23       1.856017
1      76     20      -0.012611
2      79     29       0.985265
3      72     25       3.224272
dtype: float64

## Custom Split function

Extra context variables:
- split_idx: Current split index, starting at 0
- splits: Nested list of splits appended up to this point
- bounds: Nested list of bounds appended up to this point
- prev_start: Left bound of the previous split
- prev_end: Right bound of the previous split

In [21]:
def custom_split_func(index, prev_start, prev_end, test_weeks=2, initial_offset=14):
    if prev_start is None:
        prev_start = index[0] + index.freq * initial_offset
        if not (prev_start.day == 1 and prev_start.time() == pd.Timestamp("00:00:00").time()):
            prev_start = (prev_start + pd.DateOffset(months=1)).normalize()

        prev_end = prev_start + pd.offsets.MonthBegin(1)

    new_end = prev_end + pd.offsets.MonthBegin(1)
    if new_end > index[-1]:
        return None
    return [
        slice(prev_start, new_end - pd.Timedelta(weeks=test_weeks)),  
        slice(new_end - pd.Timedelta(weeks=test_weeks), new_end)
    ]

In [22]:
vbt.Splitter.from_split_func(
    data.index,
    split_func=custom_split_func,
    split_args=(vbt.Rep("index"), vbt.Rep("prev_start"), vbt.Rep("prev_end")),
    split_kwargs=dict(test_weeks=4, initial_offset=14),
    range_bounds_kwargs=dict(index_bounds=True),
    set_labels=["test","train"]
).plot()

FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: test',
              'legendgroup': 'test',
              'name': 'test',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': '649e04d9-25f9-4f78-9c18-947f916f48ee',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 1, 0, 15, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 1, 0, 30, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 31, 23, 15, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 31, 23, 30, tzinfo=datetime.timezone.utc),
                          datetime.datetime(20

In [23]:
@vbt.cv_split(
    merge_func="concat",
    splitter="from_split_func",
    splitter_kwargs=dict(
        split_func=custom_split_func,
        split_args=(vbt.Rep("index"), vbt.Rep("prev_start"), vbt.Rep("prev_end")),
        split_kwargs=dict(test_weeks=4, initial_offset=14),
        range_bounds_kwargs=dict(index_bounds=True),
        set_labels=["train","test"]
    ),
    takeable_args=["data"],
    return_grid='all',
)
def rsi_pipeline(data, upper_threshold=70, lower_threshold=30):
    window_length=14
    rsi = vbt.RSI.run(close=data, window=window_length).rsi
    entries = rsi.vbt.crossed_below(lower_threshold)
    exits = rsi.vbt.crossed_above(upper_threshold)
    pf = vbt.Portfolio.from_signals(
        data,
        long_entries=entries,
        long_exits=exits,
    )

    if len(pf.trades) < 1:
        return vbt.NoResult

    return pf.sharpe_ratio

In [24]:
grid, best = rsi_pipeline(
    data.close["CHADCOIN"],
    vbt.Param(range(70, 80, 1), name="Upper"),
    vbt.Param(range(20, 30, 1), name="Lower"),
    _splitter_kwargs=dict(split_kwargs=dict(test_weeks=1, initial_offset=14)),
    #_apply_kwargs=dict(split=1)
)
best

 11%|#1        | 1/9 [00:03<00:26,  3.26s/it, split=1]

split  set    Upper  Lower
0      train  71     21        2.076925
1      train  71     21        2.347230
       test   71     21       16.429512
2      train  73     20        2.077370
3      train  73     20        2.030224
4      train  73     20        1.087867
       test   73     20       -3.029785
5      train  71     21        1.500760
6      train  71     21        0.917148
7      train  73     26        1.329837
       test   73     26       -0.513515
8      train  73     26        1.323866
       test   73     26        1.601448
dtype: float64

## Indicator Warmup

- `range_` is provided in context of `splitter.apply`

In [25]:
@vbt.cv_split(
    merge_func="concat",
    splitter="from_split_func",
    splitter_kwargs=dict(
        split_func=custom_split_func,
        split_args=(vbt.Rep("index"), vbt.Rep("prev_start"), vbt.Rep("prev_end")),
        split_kwargs=dict(test_weeks=4, initial_offset=14),
        range_bounds_kwargs=dict(index_bounds=True),
        set_labels=["train","test"]
    ),
    takeable_args=None, #!
    index_from="data", #!
    return_grid=True,
)
def rsi_pipeline_warmup(range_, data, upper_threshold=70, lower_threshold=30, window_length=14):

    buffered_range = slice(range_.start - window_length, range_.stop)  
    data_buffered = data.iloc[buffered_range]
    
    rsi = vbt.RSI.run(close=data_buffered, window=window_length).rsi
    entries_buffered = rsi.vbt.crossed_below(lower_threshold)
    exits_buffered = rsi.vbt.crossed_above(upper_threshold)

    pf = vbt.Portfolio.from_signals(
        data_buffered.iloc[window_length:],
        long_entries=entries_buffered.iloc[window_length:],
        long_exits=exits_buffered.iloc[window_length:],
    )

    if len(pf.trades) < 1:
        return vbt.NoResult

    return pf.sharpe_ratio

In [None]:
grid, best = rsi_pipeline_warmup(
    vbt.Rep("range_"),
    data.close["CHADCOIN"],
    vbt.Param(range(70, 90, 1), name="Upper"),
    vbt.Param(range(20, 40, 1), name="Lower"),
    _splitter_kwargs=dict(split_kwargs=dict(test_weeks=1, initial_offset=14)),
    _apply_kwargs=dict(split=1),
    #_execute_kwargs=dict(n_chunks="auto", distribute="chunks", engine="pathos"),
    # _parameterized_kwargs=dict(  
    #     execute_kwargs=dict(n_chunks="auto", distribute="chunks", engine="pathos")
    # )
)
best

 44%|####4     | 4/9 [00:02<00:02,  1.69it/s, split=4]

split  set    Upper  Lower
0      train  71     21        2.076925
1      train  71     21        2.347230
       test   71     21       16.429512
2      train  73     20        2.077370
3      train  73     20        2.030224
4      train  73     20        1.087867
       test   73     20       -3.029785
5      train  71     21        1.500760
6      train  87     20        0.945586
7      train  73     26        1.329837
       test   73     26       -0.513515
8      train  73     26        1.323866
       test   73     26        1.601448
dtype: float64