In [1]:
from math import floor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [2]:
import numpy as np

In [9]:
def general(dataframe, percent):
    
    # SPLIT DATAFRAME INTO FEATURES & LABELS
    features = dataframe.loc[:, dataframe.columns != 'label'].to_numpy()
    labels = dataframe[['label']].to_numpy()
    
    # LENGTH TO SPLIT AT
    limit = floor(len(features) * percent)
    
    # TRAIN/TEST FEATURES
    train = features[:limit]
    test = features[limit:]
    
    # SCALED FEATURES
    scaled_train, scaled_test, scaler = normalize(train, test)
    
    return {
        'train': {
            'features': scaled_train,
            'labels': np.ndarray.flatten(labels[:limit])
        },
        'test': {
            'features': scaled_test,
            'labels': np.ndarray.flatten(labels[limit:])
        }
    }, scaler

In [4]:
def normalize(temp_train, temp_test):
    
    # SCALE EVERYTHING FROM 0 TO 1
    scaler = MinMaxScaler(feature_range=(0, 1))
    
    # FIT ON TRAIN DATA, THEN TRANSFORM TEST DATA
    train = scaler.fit_transform(temp_train)
    test = scaler.transform(temp_test)
    
    return train, test, scaler

In [5]:
def timeseries(dataset, folds, window=0):
    
    # DECONSTRUCT PARAMS
    features = dataset['features']
    labels = dataset['labels']
    
    # INITIALIZE A X FOLD SPLIT
    tss = TimeSeriesSplit(n_splits=folds)
    data = []
    
    # SPLIT DATASET X TIMES
    for train_index, test_index in tss.split(features):
        
        # SHIFT THE VALIDATION INDEX WHEN A SLIDING WINDOW IS GIVEN
        if window:
            test_index = np.concatenate((train_index[-window:], test_index))
        
        # APPEND TO CONTAINER
        data.append({
            'train': {
                'features': features[train_index],
                'labels': labels[train_index]
            },
            'test': {
                'features': features[test_index],
                'labels': labels[test_index]
            }
        })
    
    return data

In [6]:
def generator(dataset, params, shuffle=True):
    
    # DECONSTRUCT DATASET
    features = dataset['features']
    labels = dataset['labels']
    
    # DECONSTRUCT PARAMS
    batch = params['batch']
    window = params['window']
    
    # GENERATE & RETURN
    return TimeseriesGenerator(
        features,
        labels,
        length=window,
        batch_size=batch,
        shuffle=shuffle
    )

In [8]:
def grid(dataset):
    
    # COMBINE SCALED FEATURES & LABELS FOR GRID SEARCHING
    features = np.concatenate((dataset['train']['features'], dataset['test']['features']))
    labels = np.concatenate((dataset['train']['labels'], dataset['test']['labels']))
    
    return features, labels