## IMPORTS

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

## FUNCTIONS

### Data Features
Data features are based on this study: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4873195/

### TYPE 1 FEATURES

In [2]:
# Stochastic %K
def stochastic_k(df, timeframe=14):
    return df.join(pd.Series(100 * ((df['Close'] - df['Low'].rolling(timeframe).min()) / (df['High'].rolling(timeframe).max() - df['Low'].rolling(timeframe).min())), name='stocK'))

In [3]:
# Stochastic %D
def stochastic_d(df, timeframe=3):
    return df.join(pd.Series(df['stocK'].rolling(timeframe).mean(), name='stocD'))

In [4]:
# Stochastic slow %D
def stochastic_sd(df, timeframe=3):
    return df.join(pd.Series(df['stocD'].rolling(timeframe).mean(), name='stocSD'))

In [5]:
# Momentum
def momentum(df, timeframe=14):
    return df.join(pd.Series(df['Close'].diff(timeframe), name='Momentum'))

In [6]:
# Rate of change
def rate_of_change(df, timeframe):  
    return df.join(pd.Series( ((df["Close"] / df["Close"].shift(timeframe)) -1) * 100 , name='ROC') )

In [7]:
# Larry William's %R
def larry_williams(df, timeframe):
    df['Ln'] = df['Low'].rolling(window=timeframe).min()
    df['Hn'] = df['High'].rolling(window=timeframe).max()  
    return df.join(pd.Series( ( df['Hn'] -df['Close'] ) / (df['Hn'] - df['Ln']) * 100 , name='LWR')).drop(columns=['Ln', 'Hn'])

In [8]:
# A/O Oscillator (accumulation/distribution oscillator)
def ao_oscillator(df):
    return df.join(pd.Series((df['High'] - df['Close'].shift(1) ) / (df['High'] - df['Low']) , name='AOosci'))

In [9]:
# Disparity
def disparity(df, timeframe):
    return df.join(pd.Series(df['Close'] / (df['Close'].rolling(timeframe).sum()/timeframe) * 100, name='Disp' + str(timeframe) ))

In [10]:
# Price oscillator
def price_oscillator(df):
    df['MA5'] = df['Close'].rolling(5).sum()/5
    df['MA10'] = df['Close'].rolling(10).sum()/10
    return df.join(pd.Series(df['MA5'] - df['MA10']/df['MA5'], name='OSCP')).drop(columns=['MA5', 'MA10'])

In [11]:
# Commodity channel index
def CCI(df, timeframe):
    TP = (df['High'] + df['Low'] + df['Close']) / 3 
    return df.join(pd.Series((TP - TP.rolling(timeframe).mean()) / (0.015 * TP.rolling(timeframe).std()), name='CCI'))

In [12]:
# Relative strength index
def RSI(df, timeframe):
    chg = df['Close'].diff(1)
    gain = chg.mask(chg<0,0)
    #data['gain'] = gain
    loss = chg.mask(chg>0,0)
    #data['loss'] = loss
    avg_gain = gain.ewm(com = timeframe - 1, min_periods = timeframe).mean()
    avg_loss = loss.ewm(com = timeframe - 1, min_periods = timeframe).mean()
    #data['avg_gain'] = avg_gain
    #data['avg_loss'] = avg_loss
    rs = abs(avg_gain/avg_loss)
    rsi = 100-(100/(1+rs))
    
    return df.join(pd.Series(rsi,name='RSI'))

In [13]:
# Feature addition function
def add_type1_features(df, timeframe):
    df = stochastic_k(df, timeframe)
    df = stochastic_d(df)  # Default as 3 
    df = stochastic_sd(df) # Default as 3 
    df = momentum(df, timeframe)
    df = rate_of_change(df, timeframe)
    df = larry_williams(df, timeframe)
    df = ao_oscillator(df) # No timeframe
    df = disparity(df, 5) # Default as 5 
    df = disparity(df, 10) # Default as 10 
    df = price_oscillator(df)
    df = CCI(df, timeframe)
    df = RSI(df, timeframe)
    return df

### TYPE 2 FEATURES

In [14]:
# OBV
def obv(df):
    df['OBV'] = np.where(df['Close'] > df['Close'].shift(1), df['Volume'], np.where(df['Close'] < df['Close'].shift(1), -df['Volume'], 0)).cumsum()
    return df

In [15]:
# Moving Average
def moving_average(df, timeframe):
    return df.join(pd.Series(df['Close'].rolling(timeframe).sum()/timeframe, name='MA'))

In [16]:
# BIAS 
def bias(df, timeframe):
    return df.join(pd.Series(( (df['Close'] - (df['Close'].rolling(timeframe).sum()/timeframe) ) / (df['Close'].rolling(timeframe).sum()/timeframe) ) , name='BIAS' + str(timeframe)))

In [17]:
# PSY - ratio of the number of rising periods over the n day period
def psy(df, timeframe):    
    df['A'] = np.where(df['Close'].shift(-1) - df['Close'] >0, 1, 0)
    return df.join(round(pd.Series(df['A'].rolling(timeframe).sum()/timeframe * 100, name='PSY' + str(timeframe)),1)).drop(columns=['A'])

In [18]:
# SY
def sy(df):
    return df.join(pd.Series((np.log(df['Close']) - np.log(df['Close'].shift(1))) * 100 , name='SY'))

In [19]:
# ASY
def asy(df, timeframe):
    return df.join(pd.Series(df['SY'].rolling(timeframe).sum()/timeframe, name='ASY' + str(timeframe)))

In [20]:
# Feature addition function
def add_type2_features(df, timeframe):
    df = obv(df)
    df = moving_average(df, 5)
    df = bias(df, 6) # Default value 6
    df = psy(df, 12) # Default value 12
    df = sy(df)
    df = asy(df, 5) # Default value 5
    df = asy(df, 4) # Default value 4
    df = asy(df, 3) # Default value 3
    df = asy(df, 2) # Default value 2
    df = asy(df, 1) # Default value 1
    df = df.drop(columns=['SY']) #USED ONLY FOR ASY CALCULATIONS
    return df

## MAIN FUNCTION

In [21]:
def add_features(df, config):
    
    # EXTRACT PARAMETERS
    timeframe = config['window']
    features = config['type']
    post_filter = config['filter']
    
    # ADD FEATURES
    if(features == 0):
        df = add_type1_features(df, timeframe)
        df = add_type2_features(df, timeframe)
    elif(features == 1):
        df = add_type1_features(df, timeframe)
    elif(features == 2):
        df = add_type2_features(df, timeframe)
        
    # FILTER AND DROP NA VALUES
    
    return df.dropna().filter(post_filter)