<a href="https://colab.research.google.com/github/GyeeunJeong/Advances-in-Financial-Machine-Learning/blob/master/ch03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Source code in chapter 3

In [0]:
import datetime as dt
import pandas as pd
import pandas_datareader.data as web

In [0]:
# 3.1 Calculate daily volatility
def getDailyVol(close, span0=100):
  df0 = close.index.searchsorted(close.index-pd.Timedelta(days=1))
  df0 = df0[df0>0]
  df0 = pd.Series(close.index[df0-1], index=close.index[close.shape[0]-df0.shape[0]:])
  df0 = close.loc[df0.index]/close.loc[df0.values].values-1
  df0 = df0.ewm(span=span0).std()
  return df0

# 3.2 triple-barrier labeling
def applyPtSlonT1(close, events, ptsl, molecule):
  # t1 : vertical barrier
  # pt : upper horizontal barrier
  # sl : lower horizontal barrier

  events_ = events.loc[molecule]
  out = events_[['t1']].copy(deep=True)
  
  if ptsl[0]>0: pt=ptsl[0]*events_['trgt']
  else: pt = pd.Series(index=events.index) # NaNs
  if ptsl[1]>0: sl=-ptsl[1]*events_['trgt']
  else: sl = pd.Series(index=events.index) # NaNs
  
  for loc,t1 in events_['t1'].fillna(close.index[-1]).iteritems():
    df0 = close[loc:t1] # price path
    df0 = (df0/close[loc]-1)*events_.at[loc, 'side']
    out.loc[loc, 'sl'] = df0[df0<sl[loc]].index.min()
    out.loc[loc, 'pt'] = df0[df0>pt[loc]].index.min()
    
  return out

# 3.3 measure the first arrival time & 3.6 expand to meta-label
def getEvents(close, tEvents, ptsl, trgt, minRet, numThreads, t1=False, side=None):
  # close : price series
  # tEvents : triple-barrier seed timeindex extracting by 2.5
  # ptsl : positive float list. barrier width
  # t1 : vertical barrier timestamp series
  # trgt : absolute target profit series
  # minRet : minimum target profit to search triple barrier
  # numThreads : the number of threads in the function at the same time

  # 1) get target
  trgt = trgt.loc[tEvents]
  trgt = trgt[trgt>minRet]
  
  # 2) get t1 (the longest holding period)
  if t1 is False: 
    t1=pd.Series(pd.NaT, index=tEvents)

  # 3) form events dataframe applying ptsl at t1
  if side is None:
    side_, ptsl_ = pd.Series(1., index=trgt.index), [ptsl[0], ptsl[0]]
  else:
    side_, ptsl_ = side.loc[trgt.index], ptsl[:2]
  events = pd.concat({'t1':t1, 'trgt':trgt, 'side':side_}, axis=1).dropna(subset=['trgt'])
  
  #df0 = mpPandasObj(func=applyPtSlonT1, pdObj=('molecule', events.index), \
  #                  numThreads=numThreads, close=close, events=events, ptsl=ptsl)
  df0 = applyPtSlonT1(close, events, ptsl, molecule)
  events['t1'] = df0.dropna(how='all').min(axis=1)
  if side is None:
    events=events.drop('side', axis=1)
  return = events

# 3.4 add vertical barrier
t1 = close.index.searchsorted(tEvents+pd.Timedelta(days=numDays))
t1 = t1[t1<close.shape[0]]
t1 = pd.Series(close.index[t1], index=tEvents[:t1.shape[0]])

# 3.5 labeling side and size & 3.7 expand to meta-label
def getBins(events, close):
  # events : dataframe from getEvents

  # 1) price, events
  events_ = events.dropna(subset=['t1'])
  px = events_.index.union(events_['t1'].values).drop_duplicates()
  px = close.reindex(px, method='bfill')

  # 2) OUT dataframe
  out = pd.DataFrame(index=events_.index)
  out['ret'] = px.loc[events_['t1'].values].values/px.loc[events_.index]-1
  if 'side' in events_:
    out['ret'] *= events_['side'] # meta-label
  out['bin'] = np.sign(out['ret'])
  if 'side' in events_:
    out.loc[out['ret']<=0, 'bin'] = 0 # meta-label
  # changed by practice 3.3
  # 실제 이벤트 발생 시점 == 설정한 vertical barrier 시점
  out.loc[events_.index == t1.values, 'bin'] = 0
  return out

# 3.8 drop the small example number of labels
def dropLabels(events, minPtc=.05):
  while True:
    df0 = events['bin'].value_counts(normalize=True)
    if df0.min() > minPct or df0.shape[0]<3:
      break
    print ('dropped label', df0.argmin(), df0.min())
    events=events[events['bin']!=df0.argmin()]
  return events

In [0]:
start = dt.datetime(2019, 1, 1)
end = dt.datetime.now()

In [0]:
price_df = web.DataReader("^GSPC", 'yahoo', start, end)
close_sr = price_df['Close']

In [13]:
close_sr.head()

Date
2019-01-02    2510.030029
2019-01-03    2447.889893
2019-01-04    2531.939941
2019-01-07    2549.689941
2019-01-08    2574.409912
Name: Close, dtype: float64

In [0]:
events_df = pd.DataFrame(index=price_df.index, columns=['t1', 'trgt', 'side'])
events_df.loc[[dt.datetime(2019,1,3), dt.datetime(2019,1,7)], 't1'] = [dt.datetime(2019,1,5), dt.datetime(2019,1,9)]
events_df['trgt'] = 1
events_df['side'] = 1 # buy / sell

ptsl = [0.01, 0.01]

molecule = [dt.datetime(2019,1,3), dt.datetime(2019,1,7)]

In [56]:
applyPtSlonT1(close_sr, events_df, ptsl, molecule)

Unnamed: 0_level_0,t1,sl,pt
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-03,2019-01-05 00:00:00,NaT,2019-01-04
2019-01-07,2019-01-09 00:00:00,NaT,2019-01-09
