In [1]:
import numpy as np
import pandas as pd
import s2spy.time

Create an example pandas `DataFrame`


In [2]:
time_index = pd.date_range('20151020', '20211001', freq='15d')
random_data = np.random.random(len(time_index))
example_series = pd.Series(random_data, index=time_index)
example_dataframe = pd.DataFrame(example_series.rename('data1'))
example_dataframe['data2'] = example_dataframe['data1']


Resample the `DataFrame` to an `AdventCalendar`

In [3]:
cal = s2spy.time.AdventCalendar(freq='30d')
dfr = cal.resample(example_dataframe)
dfr

Unnamed: 0,anchor_year,i_interval,interval,data1,data2,target
0,2016,0,"(2016-10-31, 2016-11-30]",0.481519,0.481519,True
1,2016,1,"(2016-10-01, 2016-10-31]",0.511295,0.511295,False
2,2016,2,"(2016-09-01, 2016-10-01]",0.185496,0.185496,False
3,2016,3,"(2016-08-02, 2016-09-01]",0.603798,0.603798,False
4,2016,4,"(2016-07-03, 2016-08-02]",0.71413,0.71413,False
5,2016,5,"(2016-06-03, 2016-07-03]",0.571613,0.571613,False
6,2016,6,"(2016-05-04, 2016-06-03]",0.785708,0.785708,False
7,2016,7,"(2016-04-04, 2016-05-04]",0.545363,0.545363,False
8,2016,8,"(2016-03-05, 2016-04-04]",0.665551,0.665551,False
9,2016,9,"(2016-02-04, 2016-03-05]",0.72403,0.72403,False


Import a splitter class from `sklearn`, and pass it to the wrapper function.

Example: `KFold`

In [4]:
from sklearn.model_selection import KFold
import s2spy.traintest

cv = KFold(n_splits=3)

s2spy.traintest.fold_by_anchor(cv, dfr)

Unnamed: 0,anchor_year,i_interval,interval,data1,data2,target,fold_0,fold_1,fold_2
0,2016,0,"(2016-10-31, 2016-11-30]",0.481519,0.481519,True,test,train,train
1,2016,1,"(2016-10-01, 2016-10-31]",0.511295,0.511295,False,test,train,train
2,2016,2,"(2016-09-01, 2016-10-01]",0.185496,0.185496,False,test,train,train
3,2016,3,"(2016-08-02, 2016-09-01]",0.603798,0.603798,False,test,train,train
4,2016,4,"(2016-07-03, 2016-08-02]",0.71413,0.71413,False,test,train,train
5,2016,5,"(2016-06-03, 2016-07-03]",0.571613,0.571613,False,test,train,train
6,2016,6,"(2016-05-04, 2016-06-03]",0.785708,0.785708,False,test,train,train
7,2016,7,"(2016-04-04, 2016-05-04]",0.545363,0.545363,False,test,train,train
8,2016,8,"(2016-03-05, 2016-04-04]",0.665551,0.665551,False,test,train,train
9,2016,9,"(2016-02-04, 2016-03-05]",0.72403,0.72403,False,test,train,train


Example: `ShuffleSplit`

In [5]:
from sklearn.model_selection import ShuffleSplit

cv = ShuffleSplit(n_splits=3, test_size=0.2)
#note that for shuffle split, folds may not be different

s2spy.traintest.fold_by_anchor(cv, dfr)

Unnamed: 0,anchor_year,i_interval,interval,data1,data2,target,fold_0,fold_1,fold_2
0,2016,0,"(2016-10-31, 2016-11-30]",0.481519,0.481519,True,train,train,train
1,2016,1,"(2016-10-01, 2016-10-31]",0.511295,0.511295,False,train,train,train
2,2016,2,"(2016-09-01, 2016-10-01]",0.185496,0.185496,False,train,train,train
3,2016,3,"(2016-08-02, 2016-09-01]",0.603798,0.603798,False,train,train,train
4,2016,4,"(2016-07-03, 2016-08-02]",0.71413,0.71413,False,train,train,train
5,2016,5,"(2016-06-03, 2016-07-03]",0.571613,0.571613,False,train,train,train
6,2016,6,"(2016-05-04, 2016-06-03]",0.785708,0.785708,False,train,train,train
7,2016,7,"(2016-04-04, 2016-05-04]",0.545363,0.545363,False,train,train,train
8,2016,8,"(2016-03-05, 2016-04-04]",0.665551,0.665551,False,train,train,train
9,2016,9,"(2016-02-04, 2016-03-05]",0.72403,0.72403,False,train,train,train


Example: `TimeSeriesSplit`

In [6]:
from sklearn.model_selection import TimeSeriesSplit

cv = TimeSeriesSplit(n_splits=4)

s2spy.traintest.fold_by_anchor(cv, dfr)

Unnamed: 0,anchor_year,i_interval,interval,data1,data2,target,fold_0,fold_1,fold_2,fold_3
0,2016,0,"(2016-10-31, 2016-11-30]",0.481519,0.481519,True,train,train,train,train
1,2016,1,"(2016-10-01, 2016-10-31]",0.511295,0.511295,False,train,train,train,train
2,2016,2,"(2016-09-01, 2016-10-01]",0.185496,0.185496,False,train,train,train,train
3,2016,3,"(2016-08-02, 2016-09-01]",0.603798,0.603798,False,train,train,train,train
4,2016,4,"(2016-07-03, 2016-08-02]",0.71413,0.71413,False,train,train,train,train
5,2016,5,"(2016-06-03, 2016-07-03]",0.571613,0.571613,False,train,train,train,train
6,2016,6,"(2016-05-04, 2016-06-03]",0.785708,0.785708,False,train,train,train,train
7,2016,7,"(2016-04-04, 2016-05-04]",0.545363,0.545363,False,train,train,train,train
8,2016,8,"(2016-03-05, 2016-04-04]",0.665551,0.665551,False,train,train,train,train
9,2016,9,"(2016-02-04, 2016-03-05]",0.72403,0.72403,False,train,train,train,train
