In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
from cpcv import CombPurgedKFoldCVLocal

In [2]:
def generate_time_series(start, end, num_series):
    date_range = pd.date_range(start=start, end=end)
    data = np.random.rand(len(date_range), num_series)
    df = pd.DataFrame(data, index=date_range)
    df.columns = [f'feature_{i}' for i in range(len(df.columns))]
    df.index.name = 'date'
    return df

In [3]:
start_date = datetime(2020, 1, 1)
end_date = datetime(2020, 12, 31)
num_series = 10

In [4]:
df = generate_time_series(start_date, end_date, num_series)

In [5]:
df.head()

Unnamed: 0_level_0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-01,0.73517,0.56145,0.176071,0.90994,0.288086,0.656838,0.027345,0.131999,0.421339,0.595504
2020-01-02,0.308086,0.871461,0.556096,0.277902,0.3342,0.873631,0.786464,0.225314,0.050216,0.577433
2020-01-03,0.826081,0.972999,0.986267,0.03601,0.37186,0.050569,0.692302,0.576769,0.410743,0.76279
2020-01-04,0.19459,0.347619,0.860034,0.757695,0.874374,0.230655,0.369402,0.011118,0.744574,0.438098
2020-01-05,0.96557,0.841828,0.511187,0.505123,0.259248,0.483402,0.85456,0.452118,0.041779,0.527069


In [6]:
X, y = df.drop(columns=['feature_0']), df['feature_0']
print(f'{X.shape}, {y.shape}')

(366, 9), (366,)


In [7]:
pred_times = pd.Series(df.index, index=df.index)
eval_times = pd.Series(df.index, index=df.index)

In [8]:
cpcv = CombPurgedKFoldCVLocal(
    n_splits=10,
    n_test_splits=1,
    embargo_td=pd.Timedelta(days=2)
)

In [12]:
for i, (train_idx, test_idx) in enumerate(cpcv.split(X, y, pred_times, eval_times)):
    print(f'(Fold {i}): train rows - ({train_idx[0]}:{train_idx[-1]}); test rows - ({test_idx[0]}:{test_idx[-1]})')

(Fold 0): train rows - (0:329); test rows - (330:365)
(Fold 1): train rows - (0:365); test rows - (294:329)
(Fold 2): train rows - (0:365); test rows - (258:293)
(Fold 3): train rows - (0:365); test rows - (222:257)
(Fold 4): train rows - (0:365); test rows - (185:221)
(Fold 5): train rows - (0:365); test rows - (148:184)
(Fold 6): train rows - (0:365); test rows - (111:147)
(Fold 7): train rows - (0:365); test rows - (74:110)
(Fold 8): train rows - (0:365); test rows - (37:73)
(Fold 9): train rows - (39:365); test rows - (0:36)
