In [1]:
#daily volatility estimates
#computed at intraday estimation points, with exponentially weighted moving standard deviation

def getDailyVol(close, span0=100):
    #daily vol, reindexed to close
    df0 = close.index.searchsorted(close.index-pd.Timedelta(days=1))
    df0 = df0[df0>0]
    df0 = pd.Series(close.index[df0-1], index=close.index[close.shape[0]-df0.shape[0]:])
    df0 = close.loc[df0.index]/close.loc[df0.values].values-1 #daily returns
    df0 = df0.ewm(span=span0).std()
    return df0    

In [2]:
#triple barrier method 
#args = close: series of prices 
#events: df with columns: t1: timestamp of vertical barrier, trgt: unit width of horizontal bars, 
#ptSl: a list of two non-negative float values: ptSl[0]: the factor that multiplies trgt to set the width of the upper barrier, ptSl[1]: the factor that multiples trgt to set the width of the lower barrier
#molecule: a list with the subset of event indices that will be processed by a single thread.

def applyPtSlOnT1(close, events, ptSl, molecule):
    #apply stop loss/profit taking, if it takes place before end of t1(end of event)
    events_ = events.loc[molecule]
    out = events_[['t1']].copy(deep=True)
    if ptSl[0]>0:
        pt=ptSl[0]*events_['trgt']
    else:
        pt = pd.series(index=events.index) #NaNs
    if ptSl[1]>0:
        sl=-ptSl[1]*events_['trgt']
    else: 
        sl = pd.series(index=events.index) #NaNs
    for loc, t1 in events_['t1'].fillna(close.index[-1]).iteritems():
        df0 = close[loc:t1] #path prices
        df0 = (df0/close[loc]-1)*events_.at[loc,'side'] #path returns
        out.loc[loc, 'sl'] = df0[df0<sl[loc]].index.min() #earliest stop loss
        out.loc[loc, 'pt'] = df0[df0>pt[loc]].index.min() #earliest profit taking
    return out 

    

In [3]:
#learning side and size of bet

#function get events takes args
#close: series of prices
#tEvents: timeindex containing the time series that will see every triple barrier, selected by event sampling(potentially CUSUM filter)
#ptSl: same as above
#t1: series with timestamps of vertical barriers
#trgt: series of targets, expressed in absolute returns
#minRet: the minimum target return return required for running a triple barrier search
#numThreads: the number of threads concurrently used by the function

def getEvents(close, tEvents, ptSl, trgt, minRet, numthreads, t1=False):
    #1) get target
    trgt = trgt.loc[tEvents]
    trgt = trgt[trgt>minRet]
    #2) get t1 (max holding period)
    if t1 is False:
        t1 = pd.Series(pd.NaT, index=tEvents)
    #form events object, apply stop loss on t1
    side_ = pd.Series(1.,index=trgt.index)
    events = pd.concat({'t1':t1, 'trgt':trgt, 'side': side_}, axis=1).dropna(subset=['trgt'])
    df0 = mpPandasObj(func=applyPtSlOnT1,pdObj=('molecule',events.index),numThreads=numThreads, close=close, events=events, ptSl=[ptSl,ptSl])
    events['t1']=df0.dropna(how='all').min(axis=1) #pd.min ignores NaN
    events = events.drop('side',axis=1)
    return events

In [5]:
#define vertical barrier, to be (optionally) passed as t1 in getEvents
def defineT1(close, tEvents):
    t1 = close.index.searchSorted(tEvents+pd.Timedelta(days=numDays))
    t1 = t1[t1<close.shape[0]]
    t1 = pd.Series(close.index[t1],index=tEvents[:t1.shape[0]]) #Nans at end

In [6]:
#labeling for side and size
def getBins(event,close):
    #1) prices aligned with events
    events_ = events.dropna(subset=['t1'])
    px = events_.index.union(events_['t1'].values).drop_duplicates()
    px = close.reindex(px, method='bfill')
    #create out object
    out = pd.DataFrame(index=events_.index)
    out['ret'] = px.loc[events_['t1'].values].values/px.loc[events_.index]-1
    out['bin'] = np.sign(out['ret'])
    return out

In [7]:
#meta labeling for bet sizing 
#want to build a secondary ml model that learns how to use a primary exogenous model 
#modify getEvents to include a side arg
#ptSl[0] : upper barrier, ptSl[1] : lower barrier
def getEvents(close, tEvents, ptSl, trgt, minRet, numThreads, t1=False, side=None):
    # 1) get target
    trgt = trgt.loc[tEvents]
    trgt = trgt[trgt>minRet]
    # 2) get max holding peried 
    if t1 is False:
        t1 = pd.Series(pd.NaT,index=tEvents)
    # form event object, apply stop loss on t1
    if side is None:
        side_, ptSl_ = pd.Series(1.,index=trgt.index), [ptSl[0],ptSl[0]]
    else:
        side_,ptSl_ = side.loc[trgt.index], ptS1[:2]
    events = pd.concat({'t1':t1, 'trgt':trgt, 'side':side_}, axis=1).dropna(subset=['trgt'])
    df0 = mpPandasObj(func=applyPtSlOnT1, pdObj=('molecule', events.index), numThreads=numThreads, close=inst['Close'],events=events,ptSl=ptSl_)
    events['t1']=df0.dropna(how='all').min(axis=1)
    if side is None:
        events=events.drop('side', axis=1)
    return events

In [9]:
#expand get bins to incorporate meta labeling 
def getBins(events, close):
    '''
    Compute events outcome (including side information if applicable)
    -events.index is events starttime
    -events['t1'] is events endtime
    -events['trgt'] is events target
    -events['side'] (optional) implies the algos position side 
    Case 1: ('side' not in events): bin in (-1, 1) <- label by price action
    Case 2: (side in events): bin in (0,1) <- label by Pnl'''
    #1) prices aligned with events
    events_ = events.dropna(subset=['t1'])
    px = events_.index.union(events_['t1'].values).drop_duplicates()
    px = close.redindex(px, method='bfill')
    #create an out object
    out = pd.DataFrame(index=events_.index)
    out['ret']=px.loc[events_['t1'].values].values/px.loc[events_.index]-1
    if 'side' in events_:
        out['ret']*=events_['side'] #meta labeling
    out['bin']=np.sign(out['ret'])
    if 'side' in events_:
        out.loc[out['ret']<=0,'bin']=0 #meta labeling
    return out 
        

In [13]:
#possible output values are now just 1,0... making this a binary classification problem, take the bet or pass. 
#with this in mind we can determine the size of the bet by the classification confidence of the model when output is 1

#drop underused labels to improve accuracy of model
def dropLabels(events, minPtc=.05):
    #apply weights, drop labels with insufficient examples
    while True: 
        df0 = event['bin'].value_counts(normalize=True)
        if dfo.min()>minPct or df0.shape[0]<3:
            break
        print('dropped label', df0.argmin(), df0.min())
        events = events[events['bin']!=df0.argmin()]
    return events