In [1]:
# Denpendancies
import os
import numpy as np
import pandas as pd
from dLoader import DataLoader

In [2]:
def softmax(x):
    # Method for calculate the softmax probability
    return np.exp(x) / np.sum(np.exp(x))

def get_tickers(database='Database'):
    # Loading tickers symbol from the Database directory
    db = os.path.join(os.getcwd(), database) # Database actual path
    if os.path.exists(db):
        # Append symbols to tickers array if the filename with extension of csv
        tickers = [ld.split('.')[0] for ld in os.listdir(db) if ld.split('.')[1] == 'csv']
        return sorted(tickers)
    print('Database could not be found.')

def batching(data, period):
    # Batching data with period
    batch = len(data) // period
    max_length = batch * period
    return np.array(data)[-max_length:].reshape(-1, period)

def regroup(data, period):
    # Regrouping data by period
    base = batching(data['Open'], period)[:, 0]
    high = batching(data['High'], period).max(1)
    low = batching(data['Low'], period).min(1)
    close = batching(data['Close'], period)[:, -1]
    return pd.DataFrame(np.stack([base, high, low, close], axis=1), 
                        columns=['Open', 'High', 'Low', 'Close'])

In [3]:
def add_dic_to_df(df, dic, axis=1):
    # Adding dictionary to Dataframe
    if df is None:
        return pd.DataFrame.from_dict(dic, orient='index')
    return pd.concat([df, pd.DataFrame.from_dict(dic, orient='index')], axis=axis)

def calculate_capital_gain(pct):
    # Calculate Capital Gain with percentage gain / loss
    return np.prod(pct) / 1 - 1

def replace_percentage(gains, losses, pct):
    # Replacing the gains above percentage with pct and below percentage 
    # with losses
    # Create a copy of gains
    cgains = gains.copy()
    # Masking gains over percentage
    mask = cgains > pct
    # Replacing copy of gains that is above percentage with percentage
    cgains[mask] = pct
    # Replacing copy of gains that is below percentage with losses
    cgains[~mask] = losses[~mask]
    # Returning copy of gains
    return cgains

def generate_gain_loss_from_period(df, period):
    # Regrouping DataFrame to match giving period
    ndf = regroup(df, period)
    # Gain is calculate by the next period High (sell price) / current Close
    # (buy price) - 1 to get the percentage difference and dropping NA values
    gains = (ndf['High'].shift(-1) / ndf['Close'] - 1).dropna()
    # Loss is calculate by the next period Close (sell price ) / current close
    # (buy price) - 1 to get the percentage difference and dropping NA values
    losses = (ndf['Close'].shift(-1) / ndf['Close'] - 1).dropna()
    return gains, losses

def calculate_means(gains, losses):
    # Calculate the Mean of gains and losses
    mask = gains > 0
    # Return a dictionary containing gain and loss percentage
    # gain mean and loss mean
    return {'Gain Pct': mask.mean(), 
            'Loss Pct': 1 - mask.mean(),
            'Gain Mean': gains[mask].mean(),
            'Loss Mean': losses[mask].mean()}


In [4]:
def create_gain_loss_table(start, end,
                           timeframe=[2, 3, 5, 10, 20],
                           reduction=.8,
                           database='database',
                           tickers=None):
    # Load tickers from database if tickers is None
    if tickers is None:
        tickers = get_tickers(database=database)
    # Minimum days in a year
    mleng = len(pd.date_range(start, end, freq='B')) * .8
    # Main Method
    dataframe = None
    for ticker in tickers:
        # Looping through the tickers and if the df length is less than
        # mleng skip
        data = DataLoader(ticker).get_data(start, end)
        if len(data) < mleng:
            continue
        # Generate the gain and loss datas by different timeframe
        gl_mean = None
        for tf in timeframe:
            # Gain and loss percentage
            gains, losses = generate_gain_loss_from_period(data, tf)
            # Dictionary for gain and loss value data
            means = calculate_means(gains, losses)
            # Reduce the mean values
            ps = means['Gain Mean'] * reduction
            cgains = replace_percentage(gains, losses, ps) + 1
            # Update dictionary
            means.update({'Test PCT': ps, 
                        'Cap Gain': calculate_capital_gain(cgains)})
            gl_mean = add_dic_to_df(gl_mean, {tf: means}, 0)
        # Create dataframe 
        gl_mean.index = pd.MultiIndex.from_product([[ticker], gl_mean.index])
        if dataframe is None:
            dataframe = gl_mean
        else:
            dataframe = pd.concat([dataframe, gl_mean])
    return dataframe

In [5]:
def get_values(table):
    sorted_table = table.sort_values(by='Cap Gain', ascending=False)
    return sorted_table.iloc[0]['Test PCT'], sorted_table.index[0]

In [27]:
def static_backtest(data, days, pct):
    gains, losses = generate_gain_loss_from_period(data, days)
    cgains = replace_percentage(gains, losses, pct) + 1
    return calculate_capital_gain(cgains)

def dynamic_backtest(data, 
                     sell_limit, 
                     max_hold_days, 
                     max_share = 1000, 
                     capital = 1000):
    holding = False
    sell_off = False
    #
    day_held = 0
    #
    buy_at = 0
    shares = 0
    base_capital = capital
    #
    for date, prices in data.iterrows():
        if not holding:
            # Buying
            buy_at = prices.Close
            shares = capital // buy_at
            shares = max_share if shares > max_share else shares
            #
            sell_limit_price = buy_at * (1 + sell_limit)
            #
            holding = True
        else:
            if prices.High > sell_limit_price:
                gain = sell_limit_price - buy_at
                sell_off = True
            elif day_held >= max_hold_days:
                gain = prices.Close - buy_at
                sell_off = True
            elif date == data.index[-1]:
                gain = prices.Close - buy_at
                sell_off = True
            #
            if sell_off:
                capital += gain * shares
                #
                buy_at = 0
                shares = 0
                # 
                holding = False
                sell_off = False
                #
                day_held = 0
            else:
                day_held += 1

    return capital / base_capital - 1

In [39]:
def create_backtest_gain_data(start, end, table):
    dic = {}
    for ticker in table.index.levels[0]:
        data = DataLoader(ticker).get_data(start, end)
        sell_limit, max_hold_days = get_values(table.loc[ticker])
        static_cap_gain = static_backtest(data, max_hold_days, sell_limit)
        dynamic_cap_gain = dynamic_backtest(data, sell_limit, max_hold_days)
        dic[ticker] = {'Sell Limit': sell_limit,
                       'Hold Days': max_hold_days,
                       'Static': static_cap_gain, 
                       'Dynamic': dynamic_cap_gain}
    df = pd.DataFrame.from_dict(dic, orient='index')
    df['Square Error'] = np.square(df['Dynamic'] - df['Static'])
    return df

In [38]:
tickers = get_tickers()
df_table = create_gain_loss_table('2016-01-01', '2020-12-31', 
                                  timeframe=np.arange(2, 50, step=2), 
                                  tickers=tickers)

In [40]:
gain_table = create_backtest_gain_data('2021-01-01', '2021-12-31', df_table)

  shares = capital // buy_at
  shares = capital // buy_at
  shares = capital // buy_at


In [42]:
gain_table.sort_values(by='Diff')

Unnamed: 0,Sell Limit,Hold Days,Static,Dynamic,Diff
SMBC,0.020051,2,0.412112,0.411661,2.026986e-07
GNUS,0.360833,22,-0.205693,-0.204975,5.149529e-07
MPAA,0.116042,28,-0.116790,-0.118006,1.478857e-06
RYAM,0.106336,12,-0.125756,-0.127082,1.756809e-06
USPH,0.114323,46,0.087632,0.086114,2.303429e-06
...,...,...,...,...,...
DRNA,0.198980,30,-0.110989,,
DSPG,0.065075,26,0.249601,,
KDMN,0.176451,30,0.098300,,
MCF,0.224864,28,0.033789,,


In [49]:
gain_table.loc['USPH']

Sell Limit     0.114323
Hold Days     46.000000
Static         0.087632
Dynamic        0.086114
Diff           0.000002
Name: USPH, dtype: float64