In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

# Labeling Methods

Source: Advances in Financial Machine Learning, Marcos Lopez de Prado

## Daily Vol

can be used to set profit taking and stop-loss limits

In [None]:
def getDailyVol(close,span0=100):
    # daily vol, reindexed to close
    df0 = close.index.searchsorted(close.index-pd.Timedelta(days=1))
    df0 = df0[df0>0]
    df0 = pd.Series(close.index[df0-1], index=close.index[close.shape[0]-df0.shape[0]:])
    df0 = close.loc[df0.index]/close.loc[df0.values].values-1 # daily returns
    df0 = df0.ewm(span=span0).std()
    return df0

## Triple Barrier

- Set two horizontal barriers and one vertical barrier
    - The two horizontal barriers are defined by profit-taking and stop-loss limits, which are a dynamic function of estimated volatility (whether realized or implied).
    - The third barrier is defined in terms of number of bars elapsed since the position was taken (an expiration limit).
- If the upper barrier is touched first, we label the observation as a 1.
- If the lower barrier is touched first, we label the observation as a −1.
- If the vertical barrier is touched first, we have two choices: the sign of the return, or a 0. I personally prefer the former as a matter of realizing a profit or loss within limits, but you should explore whether a 0 works better in your particular problems.

In [None]:
def applyPtSlOnT1(close, events, ptSl, molecule):
    # apply stop loss/profit taking, if it takes place before t1 (end of event)
    events_ = events.loc[molecule]
    out = events_[['t1']].copy(deep=True)

    if ptSl[0] > 0:pt = ptSl[0] * events_['trgt']
    else: pt = pd.Series(index=events.index, dtype=float) # NaNs

    if ptSl[1] > 0:sl = -ptSl[1] * events_['trgt']
    else: sl = pd.Series(index=events.index, dtype=float) # NaNs

    for loc, t1 in events_['t1'].fillna(close.index[-1]).items():
        df0 = close[loc:t1] # path prices
        
        df0 = (df0/close[loc]-1) * events_.at[loc,'side'] # path returns, need to be ommited for interest rates

        out.loc[loc,'sl'] = df0[df0<sl[loc]].index.min() # earliest stop loss.
        out.loc[loc,'pt'] = df0[df0>pt[loc]].index.min() # earliest profit taking.

    return out

In [None]:
# define vertical (time) barrier
def vertBar(close, tEvents, numDays):
    t1=close.index.searchsorted(tEvents+pd.Timedelta(days=numDays))
    t1=t1[t1<close.shape[0]]
    t1=pd.Series(close.index[t1],index=tEvents[:t1.shape[0]]) # NaNs at end
    return t1

In [None]:
from mpcode import mpPandasObj

def getEvents(close: pd.Series, tEvents: pd.Series, ptSl, trgt: pd.Series, minRet: float, numThreads: int, t1=False):
    
    #1) get target
    trgt = trgt.loc[tEvents]
    trgt = trgt[trgt>minRet] # minRet

    #2) get t1 (max holding period)
    if t1 is False: t1 = pd.Series(pd.NaT, index=tEvents)

    #3) form events object, apply stop loss on t1
    side_ = pd.Series(1.,index=trgt.index) # arbitrarily set side to 1, since it is not needed to learn the side

    events = pd.concat({'t1':t1,'trgt':trgt,'side':side_}, axis=1).dropna(subset=['trgt'])
    # df0 = mpPandasObj(func=applyPtSlOnT1, pdObj=('molecule',events.index), numThreads=numThreads, close=close, events=events, ptSl=[ptSl,ptSl]) # [ptSl, ptSl] does not work
    df0 = mpPandasObj(func=applyPtSlOnT1, pdObj=('molecule',events.index), numThreads=numThreads, close=close, events=events, ptSl=ptSl)

    # get timestamps of earliest hit of either stop loss or profit taking 
    events['t1'] = df0.dropna(how='all').min(axis=1) # pd.min ignores nan
    events = events.drop('side',axis=1)
    return events

In [None]:
def generate_test_data(obs, seed=1):
    np.random.seed(seed)
    returns = pd.Series(np.random.normal(0.002, 0.1, obs), index=pd.date_range('2015-01-02',periods=obs))
    close = returns.add(1).cumprod()
    close.loc[pd.to_datetime('2015-01-01')] = 1
    close = close.sort_index()
    return close

In [None]:
close = generate_test_data(1000)
# px.line(close)

In [None]:
trgt = getDailyVol(close)
trgt

In [None]:
e = getEvents(close, trgt.index, ptSl=[1,1], trgt=trgt, minRet=0.00, numThreads=16, t1=False)

In [None]:
e.head()

In [None]:
# barriers.head()

In [None]:
date = '2015-12-20'
takeprofit = close.loc[date] * (1 + e['trgt'].loc[date])
stoploss = close.loc[date] * (1 - e['trgt'].loc[date])

fig = go.Figure()

# close price
fig.add_trace(go.Scatter(x=close.index, y=close, name='close', mode='lines', line=dict(color='black', width=1)))
fig.add_trace(go.Scatter(x=close.index, y=close, name='close', mode='markers', marker=dict(color='black', size=4)))

# barriers
fig.add_shape(type='line', x0=date, y0=takeprofit, x1=e['t1'].loc[date], y1=takeprofit, line=dict(color='red', width=2))
fig.add_shape(type='line', x0=date, y0=stoploss, x1=e['t1'].loc[date], y1=stoploss, line=dict(color='red', width=2))
fig.add_shape(type='line', x0=date, y0=close.loc[date], x1=e['t1'].loc[date], y1=close.loc[date], line=dict(color='red', width=1, dash='dot'))
fig.add_shape(type='line', x0=e['t1'].loc[date], y0=stoploss, x1=e['t1'].loc[date], y1=takeprofit, line=dict(color='red', width=1, dash='dot'))
fig.add_shape(type='line', x0=date, y0=takeprofit, x1=date, y1=stoploss, line=dict(color='red', width=2))

fig.update_layout(
    title='Triple Barrier Labeling at one example date',
    yaxis_title='Price',
    xaxis_title='Date',
    showlegend=False
)

fig.show()

Now we need to derive the labeling

In [None]:
def getBins(events,close):
    #1) prices aligned with events
    events_=events.dropna(subset=['t1'])
    px=events_.index.union(events_['t1'].values).drop_duplicates()
    px=close.reindex(px,method='bfill')
    #2) create out object
    out=pd.DataFrame(index=events_.index)
    out['ret']=px.loc[events_['t1'].values].values/px.loc[events_.index]-1
    out['bin']=np.sign(out['ret'])
    return out

In [None]:
bins = getBins(e, close)
bins.head()

In [None]:

fig = go.Figure()
fig.add_trace(go.Scatter(x=close.index, y=close, name='close', mode='lines', line=dict(color='black', width=1)))
fig.add_trace(go.Scatter(x=bins['bin'].index[bins['bin'] == 1], y=close.iloc[3:-4].loc[bins['bin'] == 1], name='bin = 1', mode='markers', line=dict(color='green', width=2)))
fig.add_trace(go.Scatter(x=bins['bin'].index[bins['bin'] == -1], y=close.iloc[3:-4].loc[bins['bin'] == -1], name='bin = 1', mode='markers', line=dict(color='red', width=2)))
fig.update_layout(
    title='Bins',
    yaxis_title='Return',
    xaxis_rangeslider_visible=True,
    xaxis_title='Date',
    showlegend=False
)
fig.show()

# Meta Labeling (not tested)

Expand getEvents function for meta labeling

In [None]:
def getEvents(close,tEvents,ptSl,trgt,minRet,numThreads,t1=False,side=None):
    
    #1) get target
    trgt = trgt.loc[tEvents]
    trgt = trgt[trgt>minRet] # minRet

    #2) get t1 (max holding period)
    if t1 is False:t1 = pd.Series(pd.NaT,index=tEvents)

    #3) form events object, apply stop loss on t1
    if side is None: side_,ptSl_ = pd.Series(1., index=trgt.index), [ptSl[0],ptSl[0]]
    else: side_, ptSl_ = side.loc[trgt.index], ptSl[:2]
    events = pd.concat({'t1':t1,'trgt':trgt,'side':side_}, axis=1).dropna(subset=['trgt'])
    df0 = mpPandasObj(func=applyPtSlOnT1,pdObj=('molecule',events.index), numThreads=numThreads,close=inst['Close'],events=events,ptSl=ptSl_)
    events['t1']=df0.dropna(how='all').min(axis=1) # pd.min ignores nan
    if side is None:events=events.drop('side',axis=1)

    return events

Also expand get Bins

In [None]:
def getBins(events,close):
    '''
    Compute event's outcome (including side information, if provided).
    events is a DataFrame where:
    —events.index is event's starttime
    —events['t1'] is event's endtime
    —events['trgt'] is event's target
    —events['side'] (optional) implies the algo's position side
    Case 1: ('side' not in events): bin in (-1,1) <—label by price action
    Case 2: ('side' in events): bin in (0,1) <—label by pnl (meta-labeling)
    '''

    #1) prices aligned with events
    events_=events.dropna(subset=['t1'])
    px=events_.index.union(events_['t1'].values).drop_duplicates()
    px=close.reindex(px,method='bfill')

    #2) create out object
    out=pd.DataFrame(index=events_.index)
    out['ret']=px.loc[events_['t1'].values].values/px.loc[events_.index]-1
    if 'side' in events_:out['ret']*=events_['side'] # meta-labeling
    out['bin']=np.sign(out['ret'])
    if 'side' in events_:out.loc[out['ret']<=0,'bin']=0 # meta-labeling
    return out

Meta Labeling deals with the tradeoff between true positives and false positives.

Visualize with Receiver Operating Characteristic (ROC) curve!