In [1]:
# Installing dependencies
import numpy as np
import pandas as pd
from dLoader import DataLoader, BuySell, generate_df

In [2]:
def softmax(x):
    # Calculate softmax of x
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [3]:
class MACross:
    # Moving Average Fast Slow Crossover Strategy
    # Check Fast Moving Average against Slow Moving Average
    # Also checking Current Volume is above 
    # Volume Moving Average
    def __init__(self, fast=3, slow=6):
        self.fast=3
        self.slow=6
    
    def check_ma_above(self, df):
        # Create Moving Averages for Price and Volume
        ndf = self.get_ma(df)
        # Check if price and volume is above Moving Averages
        if ndf['MA-Above'].iloc[-1] and ndf['MA-Vol-Above'].iloc[-1]:
            return True
        return False

    def get_ma(self, df):
        # MA Column Creations
        df['Fast'] = df['Close'].rolling(self.fast).mean()
        df['Slow'] = df['Low'].rolling(self.slow).mean()
        df['MA-Vol'] = df['Volume'].rolling(self.slow).mean()
        df['MA-Above'] = df['Fast'] > df['Slow']
        df['MA-Vol-Above'] = df['Volume'] > df['MA-Vol']
        return df


In [27]:
class GapTable:
    # Using data to create a Gap Percentage Table
    # Table contain Counts, Percentage, and Softmax
    def __init__(self):
        pass
    
    def create_table(self, 
                     data=None, 
                     step=None, 
                     a_col=None, a_shift=None, 
                     b_col=None, b_shift=None):
        a = self.get_col(data=data, col=a_col, s=a_shift)
        b = self.get_col(data=data, col=b_col, s=b_shift)
        # Create Probability table
        pct =self.get_pct(a, b)
        pairs = self.create_range(pct, step)
        return self.create_probability_df(pct, pairs)
    
    def create_probability_df(self, pct, pairs):
        # Create Probability Table base on Gap percentage range
        dic = {}
        for i, pair in enumerate(pairs):
            # Check if Gap percentage is within range
            isin = (pct >= pair[0]) & (pct < pair[1])
            # Assign the total counts into dictionary
            dic[i] = {'A': pair[0], 
                      'B': pair[1],
                      'Counts': len(pct[isin])}
        # Turn dictionary into DataFrame
        df = pd.DataFrame.from_dict(dic, orient='index')
        # Calculate Percentage
        df['PCT'] = df['Counts'] / df['Counts'].sum()
        # Calculate Softmax
        df['Softmax'] = softmax(df['PCT'])
        return df
    
    def check_isin(self, array, pair):
        mask = (array >= pair[0]) & (array < pair[1])
        return mask.sum()

    def create_range(self, pct, step):
        # Getting Gap percentage range
        total = pct.max() - pct.min()
        drange = np.arange(.0, np.round(total, 2), step) + np.round(pct.min(), 2)
        return np.stack([drange[:-1], drange[1:]]).T
    
    def get_col(self, col, data, s=None):
        # Change different between data columns
        if s is not None:
            return data[col].shift(s)
        return data[col]

    def get_pct(self, a, b):
        return a / b  - 1

In [28]:
# Load data
stock_data = DataLoader('AAPL')
# Gap probability Table with Past Data
pdata = stock_data.get_data('2017-01-01', '2017-12-31')
mc = MACross()
pdf = mc.get_ma(pdata.copy())
pdf['HLT'] = (pdf['High'] / pdf['Low'].shift(1) - 1).shift(-1)
pdf = pdf.dropna()

In [29]:
gt = GapTable()
pairs = gt.create_range(pdf['HLT'], 0.005)

In [36]:
dic = {}
hl = pdf['HLT']
maAbove = pdf['MA-Above']
volAbove = pdf['MA-Vol-Above']
for i, pair in enumerate(pairs):
    arr = np.array(hl)
    arrMaAbove = arr[maAbove].copy()
    arrVolAbove = arr[volAbove].copy()
    ncount = gt.check_isin(arr, pair)
    mcount = gt.check_isin(arrMaAbove, pair)
    vcount = gt.check_isin(arrVolAbove, pair)
    dic[i] = {'A': pair[0],
              'B': pair[1],
              'Count': ncount,
              'MA-Count': mcount,
              'V-Count': vcount}
df = pd.DataFrame.from_dict(dic, orient='index')

In [57]:
# Replacing counts less than n to zero
df.loc[:, ['Count', 'MA-Count', 'V-Count']] = df.loc[:, ['Count', 'MA-Count', 'V-Count']].where(df >= 10, 0)

In [65]:
mask = (df.loc[:, ['Count', 'MA-Count', 'V-Count']] != 0).sum(1) > 0
df = df[mask].reset_index(drop=True)
df

Unnamed: 0,A,B,Count,MA-Count,V-Count
0,0.0,0.005,21,18,0
1,0.005,0.01,63,57,22
2,0.01,0.015,58,53,12
3,0.015,0.02,42,38,21
4,0.02,0.025,27,25,13
5,0.025,0.03,12,11,10


In [69]:
# Calculate Softmax
sum = df.loc[:, ['Count', 'MA-Count', 'V-Count']].sum(1)
pct = sum / sum.sum()
df['Softmax'] = softmax(pct)
df

Unnamed: 0,A,B,Count,MA-Count,V-Count,Softmax
0,0.0,0.005,21,18,0,0.151944
1,0.005,0.01,63,57,22,0.186473
2,0.01,0.015,58,53,12,0.17956
3,0.015,0.02,42,38,21,0.171876
4,0.02,0.025,27,25,13,0.160005
5,0.025,0.03,12,11,10,0.150142
