### IMPORTS

In [20]:
import math
import numpy as np
from sklearn.preprocessing import MinMaxScaler

### MODEL FEATURE EXTRACTION

### TYPE-1 FEATURES

In [1]:
def stochastic_k(dataframe, timeframe):
    return ((dataframe['close'] - dataframe['low'].rolling(timeframe).min()) / (dataframe['high'].rolling(timeframe).max() - dataframe['low'].rolling(timeframe).min())) * 100

In [2]:
def stochastic_d(dataframe, timeframe):
    return dataframe['sk'].rolling(timeframe).mean()

In [3]:
def momentum(dataframe, timeframe):
    return dataframe['close'].diff(periods=timeframe)

### DATASET SPLITTING & NORMALIZING FUNCTIONS

In [12]:
def reg_split(dataframe, settings):
    
    # CONVERT DF TO NUMPY ARRAY
    rows = dataframe.to_numpy()
    
    # TRAIN SET INDEX LIMIT
    limit = math.ceil(len(rows) * settings['split']['train'])
    
    # INSTANTIATE NORMALIZER
    scaler = MinMaxScaler(feature_range=(0, 1))
    
    # FIT NORMALIZER WITH TRAIN DATA, THEN TRANSFORM TEST DATA
    rows[:limit] = scaler.fit_transform(rows[:limit])
    rows[limit:] = scaler.transform(rows[limit:])
    
    # CONTAINERS
    features = []
    labels = []
    
    # LOOP THROUGH
    for row in rows:
        features.append(row[:-1])
        labels.append(row[-1])

    # RETURN AS DICT
    return {
        'train': {
            'features': np.array(features[:limit]),
            'labels': np.array(labels[:limit])
        },
        'test': {
            'features': np.array(features[limit:]),
            'labels': np.array(labels[limit:])
        },
        'scaler': scaler
    }

In [10]:
def lstm_split(dataframe, settings):
    
    # CONVERT DF TO NUMPY ARRAY
    rows = dataframe.to_numpy()
    
    # TRAIN & VALIDATION SET INDEX LIMITS - 60/20/20 SPLIT
    train_limit = math.ceil(len(rows) * 0.6)
    validation_limit = math.ceil(len(rows) * 0.8)
    
    # INSTANTIATE NORMALIZER
    scaler = MinMaxScaler(feature_range=(0, 1))
    
    # FIT NORMALIZER WITH TRAIN DATA, THEN TRANSFORM VALIDATION & TEST DATA
    rows[:train_limit] = scaler.fit_transform(rows[:train_limit])
    rows[train_limit:] = scaler.transform(rows[train_limit:])
    
    # CONTAINERS
    features = []
    labels = []
    
    # LOOP THROUGH
    for row in rows:
        features.append(row[:-1])
        labels.append(row[-1])

    # RETURN AS DICT
    return {
        'train': {
            'features': np.array(features[:train_limit]),
            'labels': np.array(labels[:train_limit])
        },
        'validation': {
            'features': np.array(features[train_limit:validation_limit]),
            'labels': np.array(labels[train_limit:validation_limit])
        },
        'test': {
            'features': np.array(features[validation_limit:]),
            'labels': np.array(labels[validation_limit:])
        },
        'scaler': scaler
    }

### RESHAPE FEATURES TO BE THREE DIMENSIONAL

In [26]:
def reshape_input(dataset):
    
    # RESHAPE TRAIN & TEST FEATURES
    dataset['train']['features'] = np.reshape(dataset['train']['features'], (dataset['train']['features'].shape[0], dataset['train']['features'].shape[1], 1))
    dataset['validation']['features'] = np.reshape(dataset['validation']['features'], (dataset['test']['features'].shape[0], dataset['validation']['features'].shape[1], 1))
    dataset['test']['features'] = np.reshape(dataset['test']['features'], (dataset['test']['features'].shape[0], dataset['test']['features'].shape[1], 1))
    
    return dataset

### ADD FEATUERS TO DATAFRAME

In [1]:
def add(dataframe, config):
    
    # DECONSTRUCT RELEVANT PARAMS
    window = config['window']
    features = config['add']
    post_filter = config['filter']
    
    # AVAILABLE FEATURES
    available = {
        'sk': stochastic_k,
        'sd': stochastic_d,
        'momentum': momentum
    }
    
    # ADD EACH FEATURE AS A COLUMN
    for name in features:
        dataframe[name] = available[name](dataframe, window)
    
    # FILTER OUT GARBAGE & RETURN
    return dataframe.dropna().filter(post_filter)

### NORMALIZE & SPLIT DATASET

In [28]:
def split(dataset, name, settings):
    
    # REGRESSION
    if (name == 'linreg'):
        return reg_split(dataset, settings)
    
    # LSTM
    elif (name == 'lstm'):
        data = lstm_split(dataset, settings)
        return reshape_input(data)
    
    # OTHERWISE, WRITE ERROR
    else:
        print('BAD SPLITTER TYPE')
        return False