In [1]:
import vectorbtpro as vbt
import numpy as np
import pandas as pd
from numba import njit

In [113]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

## Data

Using GBM generated data to ensure consistency

In [3]:
data = vbt.GBMOHLCData.pull(
    symbols=['CHADCOIN'],
    start='2020-01-01 UTC',
    end='2021-01-01 UTC',
    seed=vbt.symbol_dict(BTCUSD=42),
    n_ticks=1000,
)

# Creating Splitters

- Sets / Splits / Ranges

In [121]:
splitter = vbt.Splitter.from_n_rolling(
    index=data.index,
    n=5,
    split=0.5,
    set_labels=["train","test"],
)
splitter.plot()

FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: train',
              'legendgroup': 'train',
              'name': 'train',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': '562dab7b-117c-4863-9ceb-145071326dea',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 2, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 3, 0, 0, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 29, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 30, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020,

In [30]:
splitter2 = vbt.Splitter.from_n_rolling(
    index=data.index,
    n=5,
    #length="180 days",
    split=(0.5,0.2,0.3),
    set_labels=["train","validation","test"],
)
splitter2.plot()


Message serialization failed with:
Out of range float values are not JSON compliant: nan
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant


Message serialization failed with:
Out of range float values are not JSON compliant: nan
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant


Message serialization failed with:
Out of range float values are not JSON compliant: nan
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant



FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: train',
              'legendgroup': 'train',
              'name': 'train',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': 'b5b4973c-4ef5-4e48-8aa7-1a098c5d197b',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 2, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 3, 0, 0, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 29, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 30, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020,

# Applying Splitters

In [6]:
splitter2.take(data)[4, "test"].close

symbol,CHADCOIN
2020-09-08 00:00:00+00:00,144.623461
2020-09-09 00:00:00+00:00,134.573582
2020-09-10 00:00:00+00:00,127.158331
2020-09-11 00:00:00+00:00,124.563202
2020-09-12 00:00:00+00:00,122.899317
2020-09-13 00:00:00+00:00,122.432707
2020-09-14 00:00:00+00:00,123.751676
2020-09-15 00:00:00+00:00,127.119288
2020-09-16 00:00:00+00:00,123.857773


## Simple UDFs

In [7]:
def buy_and_hold_return(data):
    return vbt.Portfolio.from_holding(data).total_return

In [8]:
results = splitter2.apply(
    buy_and_hold_return,
    vbt.Takeable(data),
    merge_func="concat"
)
results

split  set         symbol  
0      train       CHADCOIN   -0.141987
       validation  CHADCOIN   -0.054555
       test        CHADCOIN    0.054051
1      train       CHADCOIN    0.077356
       validation  CHADCOIN    0.079714
       test        CHADCOIN    0.017928
2      train       CHADCOIN    0.079998
       validation  CHADCOIN   -0.056170
       test        CHADCOIN    0.089008
3      train       CHADCOIN   -0.032343
       validation  CHADCOIN    0.064377
       test        CHADCOIN   -0.018575
4      train       CHADCOIN    0.218608
       validation  CHADCOIN   -0.009936
       test        CHADCOIN   -0.143585
5      train       CHADCOIN   -0.021707
       validation  CHADCOIN    0.021110
       test        CHADCOIN   -0.106084
Name: total_return, dtype: float64

## Dynamic Splitter creation

In [9]:
@vbt.split(
    splitter="from_n_rolling",
    splitter_kwargs=dict(n=5, split=0.5, set_labels=["train","test"]),
    takeable_args=["data"],
    merge_func="reset_column_stack",
    attach_bounds="index",
)
def buy_and_hold_return_series(data):
    return data.returns

In [10]:
buy_and_hold_return_series(data)

split,0,0,1,1,2,2,3,3,4,4
set,train,test,train,test,train,test,train,test,train,test
start,2020-01-01 00:00:00+00:00,2020-02-06 00:00:00+00:00,2020-03-14 00:00:00+00:00,2020-04-19 00:00:00+00:00,2020-05-26 00:00:00+00:00,2020-07-01 00:00:00+00:00,2020-08-07 00:00:00+00:00,2020-09-12 00:00:00+00:00,2020-10-19 00:00:00+00:00,2020-11-24 00:00:00+00:00
end,2020-02-06 00:00:00+00:00,2020-03-14 00:00:00+00:00,2020-04-19 00:00:00+00:00,2020-05-26 00:00:00+00:00,2020-07-01 00:00:00+00:00,2020-08-07 00:00:00+00:00,2020-09-12 00:00:00+00:00,2020-10-19 00:00:00+00:00,2020-11-24 00:00:00+00:00,2020-12-31 00:00:00+00:00
symbol,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.004735,0.010826,-0.018737,-0.015834,0.012009,0.01978,0.047722,-0.003797,0.055951,-0.059017
2,0.005992,-0.003721,0.015123,0.021469,0.01327,0.022999,-0.032806,0.010773,-0.025718,0.019053
3,-0.034626,-0.040257,0.030173,0.040755,0.016942,0.038294,-0.045313,0.027213,0.017976,0.001052
4,-0.026014,0.011202,-0.033697,0.017959,0.014962,-0.038013,0.049291,-0.025657,0.006043,-0.048386
5,-0.040578,-0.004552,0.011672,-0.033955,0.026155,0.035843,-0.05449,-0.020595,-0.028882,-0.056273
6,-0.023394,-0.028222,-0.021777,0.051364,0.026684,0.025026,-0.053233,0.009575,0.020123,0.032198
7,-0.047874,-0.023518,0.044854,-0.042426,-0.013792,-0.01761,0.001261,-0.031485,0.013451,-0.013358
8,-0.053722,0.047355,-0.051222,-0.032862,0.013256,-0.012523,-0.009113,0.003429,-0.004029,-0.021802
9,0.022638,-0.004342,-0.026101,-0.030142,-0.010941,-0.008925,0.055516,0.015803,0.014849,-0.03753


In [11]:
buy_and_hold_return_series(data, _apply_kwargs=dict(set_="train", merge_func="column_stack", attach_bounds=None) )

split,0,1,2,3,4
symbol,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN,CHADCOIN
2020-01-01 00:00:00+00:00,0.000000,,,,
2020-01-02 00:00:00+00:00,-0.004735,,,,
2020-01-03 00:00:00+00:00,0.005992,,,,
2020-01-04 00:00:00+00:00,-0.034626,,,,
2020-01-05 00:00:00+00:00,-0.026014,,,,
...,...,...,...,...,...
2020-11-19 00:00:00+00:00,,,,,0.041713
2020-11-20 00:00:00+00:00,,,,,-0.006273
2020-11-21 00:00:00+00:00,,,,,0.007597
2020-11-22 00:00:00+00:00,,,,,-0.030566


# Pipelines

In [302]:
data2 = vbt.GBMOHLCData.pull(
    symbols=['CHADCOIN'],
    start='2020-01-01 UTC',
    end='2021-01-01 UTC', # '2030-01-01 UTC'
    seed=vbt.symbol_dict(CHADCOIN=10),
    timeframe="15 min",
    n_ticks=20,
)
#data2.close.plot()

In [104]:
@vbt.cv_split(
    merge_func="concat",
    splitter="from_n_rolling",
    splitter_kwargs=dict(n=4, split=0.5, set_labels=["train","test"]),
    takeable_args=["data"],
    return_grid='all',
)
def rsi_pipeline(data, upper_threshold=70, lower_threshold=30):
    rsi = vbt.RSI.run(close=data).rsi
    entries = rsi.vbt.crossed_below(lower_threshold)
    exits = rsi.vbt.crossed_above(upper_threshold)
    pf = vbt.Portfolio.from_signals(
        data,
        long_entries=entries,
        long_exits=exits,
    )

    if len(pf.trades) < 1:
        return vbt.NoResult

    return pf.sharpe_ratio

In [105]:
grid, best = rsi_pipeline(
    data2.close["CHADCOIN"],
    vbt.Param(range(70, 80, 1), name="Upper"),
    vbt.Param(range(20, 30, 1), name="Lower"),
)

 25%|##5       | 1/4 [00:04<00:13,  4.39s/it, split=1]

In [106]:
best.xs("test", axis=0, level="set")

split  Upper  Lower
0      71     23       1.856017
1      76     20      -0.012611
2      79     29       0.985265
3      72     25       3.224272
dtype: float64

## Compare to buy and hold

In [107]:
splitter3 = rsi_pipeline(
    data2,
    _return_splitter=True
)
#splitter3.plot()

In [108]:
stacked_data = splitter3.take(data2.close, into="reset_stacked")
benchmark_sharpe = vbt.Portfolio.from_holding(stacked_data, freq="15m").sharpe_ratio
benchmark_sharpe

split  set    symbol  
0      train  CHADCOIN   -2.459066
       test   CHADCOIN   -1.588828
1      train  CHADCOIN   -0.049191
       test   CHADCOIN    2.743796
2      train  CHADCOIN    3.918398
       test   CHADCOIN   -0.308933
3      train  CHADCOIN    1.149608
       test   CHADCOIN   -1.683509
Name: sharpe_ratio, dtype: float64

In [109]:
benchmark_sharpe.xs("test", axis=0, level="set")

split  symbol  
0      CHADCOIN   -1.588828
1      CHADCOIN    2.743796
2      CHADCOIN   -0.308933
3      CHADCOIN   -1.683509
Name: sharpe_ratio, dtype: float64

In [110]:
best.xs("test", axis=0, level="set")

split  Upper  Lower
0      71     23       1.856017
1      76     20      -0.012611
2      79     29       0.985265
3      72     25       3.224272
dtype: float64

## Indicator Warmup time

In [357]:
def custom_split_func(index, prev_start, prev_end, test_weeks=2, initial_offset=14):
    if prev_start is None:
        prev_start = index[0] + index.freq * initial_offset
        if not (prev_start.day == 1 and prev_start.time() == pd.Timestamp("00:00:00").time()):
            prev_start = (prev_start + pd.DateOffset(months=1)).normalize()

        prev_end = prev_start + pd.offsets.MonthBegin(1)

    new_end = prev_end + pd.offsets.MonthBegin(1)
    if new_end > index[-1]:
        return None
    return [
        slice(prev_start, new_end - pd.Timedelta(weeks=test_weeks)),  
        slice(new_end - pd.Timedelta(weeks=test_weeks), new_end)
    ]

vbt.Splitter.from_split_func(
    data2.index,
    split_func=custom_split_func,
    split_args=(vbt.Rep("index"), vbt.Rep("prev_start"), vbt.Rep("prev_end")),
    split_kwargs=dict(test_weeks=4, initial_offset=14),
    range_bounds_kwargs=dict(index_bounds=True),
    set_labels=["test","train"]
).plot()

FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: test',
              'legendgroup': 'test',
              'name': 'test',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': 'af0f5c25-3644-4dc9-abcb-1b3dfab74e7f',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 1, 0, 15, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 1, 0, 30, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 31, 23, 15, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 31, 23, 30, tzinfo=datetime.timezone.utc),
                          datetime.datetime(20

In [None]:
@vbt.cv_split(
    merge_func="concat",
    splitter="from_split_func",
    splitter_kwargs=dict(
        split_func=custom_split_func,
        split_args=(vbt.Rep("index"), vbt.Rep("prev_start"), vbt.Rep("prev_end")),
        split_kwargs=dict(test_weeks=4, initial_offset=14),
        range_bounds_kwargs=dict(index_bounds=True),
        set_labels=["train","test"]
    ),
    takeable_args=["data"],
    return_grid='all',
)
def rsi_pipeline(data, upper_threshold=70, lower_threshold=30):
    window_length=14
    rsi = vbt.RSI.run(close=data, window=window_length).rsi
    entries = rsi.vbt.crossed_below(lower_threshold)
    exits = rsi.vbt.crossed_above(upper_threshold)
    pf = vbt.Portfolio.from_signals(
        data,
        long_entries=entries,
        long_exits=exits,
    )



    if len(pf.trades) < 1:
        return vbt.NoResult
    
    print(pf.sharpe_ratio)

    return pf.sharpe_ratio

In [250]:
splitter = rsi_pipeline(
    data2.close["CHADCOIN"],
    vbt.Param(range(70, 80, 1), name="Upper"),
    vbt.Param(range(20, 30, 1), name="Lower"),
    _splitter_kwargs=dict(split_kwargs=dict(test_weeks=2, initial_offset=14)),
    _return_splitter=True
)
splitter.plot()

FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgba(31,119,180,0.8000)'], [1.0,
                             'rgba(31,119,180,0.8000)']],
              'hoverongaps': False,
              'hovertemplate': '%{x}<br>Split: %{y}<br>Set: test',
              'legendgroup': 'test',
              'name': 'test',
              'showlegend': True,
              'showscale': False,
              'type': 'heatmap',
              'uid': '07281e32-a602-4106-ad1c-1e33d5193e82',
              'x': array([datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 1, 0, 15, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 1, 1, 0, 30, tzinfo=datetime.timezone.utc), ...,
                          datetime.datetime(2020, 12, 31, 23, 15, tzinfo=datetime.timezone.utc),
                          datetime.datetime(2020, 12, 31, 23, 30, tzinfo=datetime.timezone.utc),
                          datetime.datetime(20

In [386]:
grid, best = rsi_pipeline(
    data2.close["CHADCOIN"],
    71,
    21,
    _splitter_kwargs=dict(split_kwargs=dict(test_weeks=1, initial_offset=14)),
    _apply_kwargs=dict(split=1)
)
best

2.347230231136428
2.347230231136428
2.347230231136428
16.429512169799573
16.429512169799573


set
train     2.347230
test     16.429512
dtype: float64

## Indicator Warmup

In [396]:
@vbt.cv_split(
    merge_func="concat",
    splitter="from_split_func",
    splitter_kwargs=dict(
        split_func=custom_split_func,
        split_args=(vbt.Rep("index"), vbt.Rep("prev_start"), vbt.Rep("prev_end")),
        split_kwargs=dict(test_weeks=4, initial_offset=14),
        range_bounds_kwargs=dict(index_bounds=True),
        set_labels=["train","test"]
    ),
    takeable_args=None, #!
    index_from="data", #!
    return_grid=True,
)
def rsi_pipeline_warmup(range_, data, upper_threshold=70, lower_threshold=30):

    window_length=14
    buffered_range = slice(range_.start - window_length, range_.stop)  
    data_buffered = data.iloc[buffered_range]
    
    rsi = vbt.RSI.run(close=data_buffered, window=window_length).rsi
    entries_buffered = rsi.vbt.crossed_below(lower_threshold)
    exits_buffered = rsi.vbt.crossed_above(upper_threshold)

    pf = vbt.Portfolio.from_signals(
        data.iloc[window_length:],
        long_entries=entries_buffered.iloc[window_length:],
        long_exits=exits_buffered.iloc[window_length:],
    )

    if len(pf.trades) < 1:
        return vbt.NoResult
    
    print(pf.sharpe_ratio)

    return pf.sharpe_ratio

In [397]:
grid, best = rsi_pipeline_warmup(
    vbt.Rep("range_"),
    data2.close["CHADCOIN"],
    71,
    21,
    _splitter_kwargs=dict(split_kwargs=dict(test_weeks=1, initial_offset=14)),
    _apply_kwargs=dict(split=1)
)
best

1.1179835310478663
1.1179835310478663
2.26568663783191


set
train    1.117984
test     2.265687
dtype: float64