## IMPORTS

In [1]:
import math
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

## SPLIT & SCALE FUCNTIONS

In [2]:
def train_test_split(df, config):
    
    #CONVERT DF TO NUMPY ARRAY
    rows = df.to_numpy()
    
    scalerName = config['scalerName']
    percentage = config['trainTestPercentage']
    splitValue = int(len(df) * percentage)
    
    scalers = {
    'MM': MinMaxScaler,
    'SS': StandardScaler
    }
    

    # SCALER
    scaler = scalers[scalerName]().fit(rows[:splitValue])
    
    rows[:splitValue] = scaler.transform(rows[:splitValue])
    rows[splitValue:] = scaler.transform(rows[splitValue:])
    
    # CONTAINERS
    features = []
    labels = []
    
    # LOOP THROUGH
    for row in rows:
        features.append(row[1:])
        labels.append(row[0])

    x_train, x_test = np.array(features[:splitValue]), np.array(features[splitValue:])
    y_train, y_test = np.array(labels[:splitValue]), np.array(labels[splitValue:])

    return x_train, x_test, y_train, y_test, scaler

In [3]:
def train_test_validate_split(df, config):
    
    #CONVERT DF TO NUMPY ARRAY
    rows = df.to_numpy()
    
    # PARAMS
    scalerName = config['scalerName']
    trainPerc = config['trainTestValidate']['train']
    testPerc= config['trainTestValidate']['test'] 
    validatePerc = config['trainTestValidate']['validate'] 
    
    trainValue = int(len(df) * trainPerc)
    testValue = int(len(df) * (trainPerc + testPerc))
    
    scalers = {
    'MM': MinMaxScaler,
    'SS': StandardScaler
    }
    
    # SCALER
    scaler = scalers[scalerName]().fit(rows[:trainValue])
    
    rows[:trainValue] = scaler.transform(rows[:trainValue]) # scale train
    rows[trainValue:] = scaler.transform(rows[trainValue:]) # scale test & validation
    
    # CONTAINERS
    features = []
    labels = []
    
    # LOOP THROUGH
    for row in rows:
        features.append(row[1:])
        labels.append(row[0])
    
    x_train, x_test, x_validate = np.array(features[:trainValue]), np.array(features[trainValue:testValue]), np.array(features[testValue:])
    y_train, y_test, y_validate = np.array(labels[:trainValue]), np.array(labels[trainValue:testValue]),  np.array(labels[testValue:])
    
    return x_train, x_test, x_validate, y_train, y_test, y_validate, scaler