In [None]:
#define target variable
#df['target'] = df['mood'].shift(-1)

In [None]:
#split data into train and test
def rolling_time_series_split(df, train_size:Union[int,float], val_size:Union[int,float]=0.2, jumpsize:int=None):
    '''
    train_size -> size of the training set.
        - if int: number of instances
        - if float between 0-1: proportion of dataset\n
    val_size=None -> size of the validation set.
        - if int: number of instances
        - if float between 0-1: proportion of training size\n
    jumpsize=None -> number of indices that will be skipped for the next fold: 
        - if 1: folds with a single increment, 
        - if None: results in folds without validation repetition (jumpsize=validation_size)
        - if equal to train+val sizes: no data will appear dubble in next fold.\n
    returns -> generator object with 2 sets of indices per fold: train, val
    '''

    if not jumpsize:
        jumpsize = val_size

    n_samples = len(df) #100
    indices = df['index'].to_numpy() #100

    train_size = translate_sizing(train_size, n_samples)
    val_size = translate_sizing(val_size, train_size)
    final_training_index = n_samples - val_size

    print(f'With {round((final_training_index-train_size)/jumpsize,3)} folds:')
    print(final_training_index,train_size,jumpsize)

    for i in range(train_size, final_training_index, jumpsize):
        yield indices[i-train_size:i], indices[i:i+val_size]


def translate_sizing(size, total)->int:
    typing = str(type(size))

    switch = {
        "<class 'int'>":size,
        "<class 'float'>": int(size * total)
    }

    if size <= 0:
        raise IndexError("Incorrect sizing for train-validation splitting")

    return switch[typing]