In [None]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
from dateutil.relativedelta import *

In [None]:
class TimeBasedCV(object):
    """
    Time based cross validation class
    
    Parameters
    ----------
    train_period : int
        The number of time units to incude in each train set
        default: 30
    test_period : int
        The number of time units to incude in each test set
        default: 7
    freq : str
        The frequency of input parameters. possible values are year, months, days,  weeks, hours, minutes, seconds
        default: 'days'
    """
    
    def __init__(self, train_period=30, val_period=7, test_period=7, freq='days'):
        self.train_period = train_period
        self.val_period = val_period
        self.test_period = test_period
        self.freq = freq
        
    def split(self, data, validation_split_data, second_split_data, date_column, gap='0'):
        """
        Generate indices to split data into training and test set

        Parameters
        ----------
        data: pandas Dataframe
            your data, contain one column for the recodrd date
        validation_split data: datetime.date()
            first date to perform the splitting on.
            if not provided will set to be the minimum date in the data after the first training set
        date_column: str
            date of each record
        gap: int, default=0
            for cases the test set does not come right after the rain set
            gap days are left between train and test sets
        
        Returns
        -------
        train_index, test_index:
            list of tuples (train index, test index) similar to sklearn model selection
        """
        
        #check that date_column exist in the data
        try:
            data[date_column]  
        except:
            raise ValueError('date_column not found in the data')
        
        train_indices_list =[]
        valid_indicies_list = []
        test_indicies_list = []
        
        start_train = first_split_date - eval('relativedelta')