# Toolbox - Script with helper functions


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### helper functions

In [2]:
def create_classes(var,transaction_threshold):
    """
    Generates buy / sell / hold labels for classification models.
    Factors for transaction costs with threshold set in 'transaction_threshold'
    - Input a series (Close prices)
    - Output 
        - if price rise and profit > transaction cost: buy : 1
        - if price fall and profit > transaction cost: sell : 2
        - else 0 (stay put)
    """
    val_list = []
    var = np.exp(var)
    for index in var.index:
        # fix bias 1st nan
        # t-1 value
        if not index == 0:
            val_past = (var.values[index - 1]) 
        else: 
            val_past = (var.values[index])    
        # t value 
        val_now = (var.values[index]) 
        # Hard Bound Thresholds 
        if val_now == val_past: val_now = 0 # not profitable aka neutral
        elif val_now > val_past: 
            profit = (val_now - val_past)*100 # expect price rise  
            if profit > transaction_threshold: val_now=1 # buy
            else: val_now = 0 # not profitable
        elif val_now < val_past:   #sell
            profit = (val_past - val_now)*100 # expect price fall
            if profit > transaction_threshold: val_now= 2 # sell
            else: val_now = 0 # not profitable
        val_now = int(val_now)
        val_list.append(val_now) 
    return val_list  


#### GAF images

In [9]:
"""
# FUNCTIONS GENERATES LABELS FROM A TIME SERIES 
# EVERY n_steps (ROLLING WINDOW)
# train_X are the features , train_Y are the price labels 
"""
class pre_processing ():
    def __init__(self, df, n_steps , split_threshold):
        self.df = df
        self.n_steps = n_steps # the size of the sliding window
        self.split_threshold = split_threshold  # the ratio for the train:test split
        self.df_train = None
        self.df_test = None
    def split_data(self):
        # 80 - 20 data split 
        self.df_train = self.df.iloc[ : int(len(self.df)*self.split_threshold), : ]
        self.df_test = self.df.iloc[ int(len(self.df)*self.split_threshold):, : ]         
        return self.df_train, self.df_test        
    def tensor_samples_labels (self,df_train,df_test,table):
        # create training data and labels for the time_series
        def split_time_series(dfx):
            dfx_train = dfx.loc[:, dfx.columns != 'Price']
            dfx_train = dfx_train.to_numpy()
            dfx_labels = dfx['Price']
            dfx_labels = dfx_labels.to_numpy()
            X, y = list(), list()
            for i in range(len(dfx_train)):
                # find the end of this pattern
                end_ix = i + self.n_steps
                # check if we are beyond the dataset
                if end_ix > len(dfx_train)-1:
                    break
                # gather input and output parts of the pattern
                seq_x, seq_y = dfx_train[i:end_ix, :], dfx_labels[end_ix] 
                X.append(seq_x)
                y.append(seq_y)
            return array(X), array(y) 
        # convert into input/output
        train_X, train_Y = split_time_series(df_train)
        val_X, val_Y = split_time_series(df_test)
        return train_X, train_Y , val_X, val_Y

In [8]:
def scale_and_polarise(train_data,test_data):
    # def prep_data
    " Scale data between [ 0 , 1 ] with MinMax  "
    from sklearn.preprocessing import MinMaxScaler
    scaler1 = MinMaxScaler()
    scaler2 = MinMaxScaler()

    scaled_train = scaler1.fit_transform(train_data)
    scaled_test= scaler2.fit_transform(test_data)
    #print(np.max(scaled_train) , np.max(scaled_test)) # sanity check

    " Encode angles as Polar Coordinates  "
    polar_train = np.arccos(scaled_train)
    polar_test = np.arccos(scaled_test)
    #print(np.max(polar_train) , np.max(polar_test)) # sanity check

    return pd.DataFrame(polar_train), pd.DataFrame(polar_test) 