## Ipython Notebooks For Triple-Bar

Add the package path for testing

In [1]:
import sys
sys.path.append('E:\\git_folder\\quant_models')

In [2]:
import pandas as pd
from util.utils import get_daily_vol, sample_df
data_fn = "././data/xu1_1min.csv"
df_price = pd.read_csv(data_fn)
test_df = df_price.copy()
test_df.loc[:,'datetime'] = pd.to_datetime(test_df.datetime)
test_df.set_index('datetime', inplace=True)
df_testprice = test_df.loc[test_df.index > pd.to_datetime('2019-01-01')]
df_sampled = sample_df(df_testprice, '10T')

## Start Testing

In [3]:
df_sampled.head(2)

Unnamed: 0_level_0,open,close,low,high,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-01 00:00:00,10400.0,10410.0,10395.0,10410.0,146.0
2019-01-01 00:10:00,10405.0,10402.5,10397.5,10405.0,94.0


In [4]:
timedelta = pd.Timedelta('1 hours')
nearest_index = df_sampled.index.searchsorted(df_sampled.index + timedelta)

In [5]:
nearest_index

array([    6,     7,     8, ..., 29607, 29607, 29607], dtype=int64)

In [6]:
def add_vertical_barrier(t_events, close, num_days=0, num_hours=0, num_minutes=0, num_seconds=0):
    """
    From AFML, Try to add a Vertical Barrier

    For each index in t_events, it finds the timestamp of the next price bar at or immediately after
    a number of days num_days. This vertical barrier can be passed as an optional argument t1 in get_events.

    This function creates a series that has all the timestamps of when the vertical barrier would be reached.

    :args
    1. t_events: DatetimeIndex
    2. df_price: the price dataframe which has the columns
        datetime as the index
        another column is can be price
    3. Time diff parameters:
        num_days: (int) number of days to add for vertical barrier
        num_hours: (int) number of hours to add for vertical barrier
        num_minutes: (int) number of minutes to add for vertical barrier
        num_seconds: (int) number of seconds to add for vertical barrier'

    :return:
    (series) timestamps of vertical barriers
    """
    timedelta = pd.Timedelta(
        '{} days, {} hours, {} minutes, {} seconds'.format(num_days, num_hours, num_minutes, num_seconds))
    # Find index to closest to vertical barrier
    nearest_index = close.index.searchsorted(t_events + timedelta)

    # Exclude indexes which are outside the range of close price index
    nearest_index = nearest_index[nearest_index < close.shape[0]]

    # Find price index closest to vertical barrier time stamp
    nearest_timestamp = close.index[nearest_index]
    filtered_events = t_events[:nearest_index.shape[0]]

    vertical_barriers = pd.Series(data=nearest_timestamp, index=filtered_events)
    return vertical_barriers

In [7]:
vertical_barrier_times = add_vertical_barrier(df_sampled.index, df_sampled, num_hours=1)

In [8]:
vertical_barrier_times.tail()

datetime
2019-12-18 12:20:00   2019-12-18 13:20:00
2019-12-18 12:30:00   2019-12-18 13:30:00
2019-12-18 12:40:00   2019-12-18 13:40:00
2019-12-18 12:50:00   2019-12-18 13:50:00
2019-12-18 13:00:00   2019-12-18 14:00:00
Name: datetime, dtype: datetime64[ns]

In [9]:
target_vert = get_daily_vol(df_sampled)

In [10]:
target = target_vert['vol']
t_events = df_sampled.index

In [11]:
target = target.loc[t_events]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


In [12]:
target_vert.head()

Unnamed: 0_level_0,vol
datetime,Unnamed: 1_level_1
2019-01-02 09:00:00,0.00034
2019-01-02 09:10:00,0.000739
2019-01-02 09:20:00,0.000663
2019-01-02 09:30:00,0.001913
2019-01-02 09:40:00,0.00392


In [13]:
targetv1 = target.reindex(t_events)

In [14]:
targetv1.dropna(inplace=True)

In [15]:
target = targetv1.copy()

In [16]:
target.head()

datetime
2019-01-02 09:00:00    0.000340
2019-01-02 09:10:00    0.000739
2019-01-02 09:20:00    0.000663
2019-01-02 09:30:00    0.001913
2019-01-02 09:40:00    0.003920
Name: vol, dtype: float64

In [17]:
side_ = pd.Series(1.0, index=target.index)

In [18]:
pt_sl_ = [2, 2]

In [19]:
events = pd.concat({'endt': vertical_barrier_times, 'trgt': target, 'side': side_}, axis=1)

In [20]:
events = events.dropna(subset=['side'])

In [21]:
events = events.fillna(value={'trgt': 0.001})

In [22]:
events = events.dropna(subset=['trgt'])

In [23]:
events.head()


Unnamed: 0_level_0,endt,side,trgt
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-02 09:00:00,2019-01-02 10:00:00,1.0,0.00034
2019-01-02 09:10:00,2019-01-02 10:10:00,1.0,0.000739
2019-01-02 09:20:00,2019-01-02 10:20:00,1.0,0.000663
2019-01-02 09:30:00,2019-01-02 10:30:00,1.0,0.001913
2019-01-02 09:40:00,2019-01-02 10:40:00,1.0,0.00392


In [24]:
# Snippet 3.1, page 44, Daily Volatility Estimates
from util.multiprocess import mp_pandas_obj

In [25]:
# Snippet 3.2, page 45, Triple Barrier Labeling Method
def apply_pt_sl_on_ent(close, events, pt_sl, molecule):
    """
    Snippet 3.2, page 45, Triple Barrier Labeling Method

    This function applies the triple-barrier labeling method. It works on a set of
    datetime index values (molecule). This allows the program to parallelize the processing.

    Mainly it returns a DataFrame of timestamps regarding the time when the first barriers were reached.

    :param
    1. close: A pandas series of prices
    2. events: dataframe with two columns: ent: the timestamp of vertical barrier, when the value is np.nan, then no vertical bar
                                           trgt: the unit width of the horizontal barriers.
    3. pts1: pts1[0]*trgt is the
    4. molecule: a list with the subset of event indcies that will be processed by a single thread.
    :param pt_sl: (array) element 0, indicates the profit taking level; element 1 is stop loss level
    :param molecule: (an array) a set of datetime index values for processing
    :return: DataFrame of timestamps of when first barrier was touched
    """
    # Apply stop loss/profit taking, if it takes place before ent (end of event)
    events_ = events.loc[molecule]
    out     = events_[['ent']].copy(deep=True)

    profit_taking_multiple = pt_sl[0]
    stop_loss_multiple     = pt_sl[1]

    # Profit taking active
    if profit_taking_multiple > 0:
        profit_taking = profit_taking_multiple * events_['trgt']
    else:
        profit_taking = pd.Series(index=events.index)  # NaNs

    # Stop loss active
    if stop_loss_multiple > 0:
        stop_loss = -stop_loss_multiple * events_['trgt']
    else:
        stop_loss = pd.Series(index=events.index)  # NaNs

    # Get events
    for loc, vertical_barrier in events_['ent'].fillna(close.index[-1]).iteritems():
        closing_prices = close[loc: vertical_barrier]  # Path prices for a given trade
        cum_returns = (closing_prices / close[loc] - 1) * events_.at[loc, 'side']  # Path returns
        out.loc[loc, 'sl'] = cum_returns[cum_returns < stop_loss[loc]].index.min()  # Earliest stop loss date
        out.loc[loc, 'pt'] = cum_returns[cum_returns > profit_taking[loc]].index.min()  # Earliest profit taking date
    return out

In [26]:
num_threads = 2

In [27]:
first_touch_dates = mp_pandas_obj(func=apply_pt_sl_on_ent,
                                  pd_obj=('molecule', events.index),
                                  num_threads=num_threads,
                                  close=df_testprice,
                                  events=events,
                                  pt_sl=pt_sl_)

In [None]:
hh = apply_pt_sl_on_ent(df_testprice, events, pt_sl_, events.index)