## Ipython Notebooks For Triple-Bar

Add the package path for testing

In [1]:
import sys
sys.path.append('E:\\git_folder\\quant_models')

In [2]:
import pandas as pd
from util.utils import get_daily_vol, sample_df

#### Load the data

In [3]:
data_fn = "././data/xu1_1min.csv"
df_price = pd.read_csv(data_fn)
test_df = df_price.copy()
test_df.loc[:,'datetime'] = pd.to_datetime(test_df.datetime)
test_df.set_index('datetime', inplace=True)
# testing using a small sample size
df_testprice = test_df.loc[test_df.index > pd.to_datetime('2019-11-01')]
# event time is set as 30 mintues
df_sampled = sample_df(df_testprice, '30T')
print(df_sampled.count())
# due to the resampling mechanism, the generated datatime may not be trading hours
# filter it by take the intersection with the true df time index
df_sampled = df_sampled.reindex(df_sampled.index.intersection(df_testprice.index))
print(df_sampled.count())

open      1402
close     1402
low       1402
high      1402
volume    1402
dtype: int64
open      1219
close     1219
low       1219
high      1219
volume    1219
dtype: int64


In [8]:
df_sampled.head(2)

Unnamed: 0_level_0,open,close,low,high,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-11-01 00:30:00,13865.0,13857.5,13850.0,13865.0,686.0
2019-11-01 01:00:00,13855.0,13855.0,13850.0,13860.0,438.0


In [7]:
df_testprice.head(2)

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-11-01 00:01:00,13862.5,13862.5,13860.0,13860.0,24.0
2019-11-01 00:02:00,13860.0,13862.5,13860.0,13862.5,40.0


### Start Testing

1. add the vertical barriers on events
2. get the daily vol on events
3. get the final trip bar events
4. get the labels
5. filter the labels

In [36]:
from labeling import get_events, add_vertical_barrier, get_bins, drop_labels
import random
import numpy as np

In [11]:
vertical_barrier_times = add_vertical_barrier(df_testprice.open, df_sampled.index, num_hours=5)

In [12]:
vertical_barrier_times.tail(5)

datetime
2019-12-18 03:00:00   2019-12-18 08:59:00
2019-12-18 03:30:00   2019-12-18 08:59:00
2019-12-18 04:00:00   2019-12-18 09:00:00
2019-12-18 05:00:00   2019-12-18 10:00:00
2019-12-18 09:00:00   2019-12-18 14:00:00
Name: datetime, dtype: datetime64[ns]

In [14]:
target_vert = get_daily_vol(df_sampled)
# only take the series as the input in get_events function
target      = target_vert['vol']

#### Compute triple bar label

1. if touch the up bar, 1
2. if touch the low bar, -1
3. if touch the vertical bar, 0

In [15]:
events = get_events(df_testprice.open, target, df_sampled.index, min_ret=0.001, 
          num_threads = 3, vertical_barrier_times=vertical_barrier_times)

2019-12-23 17:57:45.897672 100.0% apply_pt_sl_on_ent done after 0.06 minutes. Remaining 0.0 minutes.


In [48]:
final_hh = get_bins(events, df_testprice.open)
final_hh.head()

Unnamed: 0_level_0,ret,trgt,bin
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-11-02 02:00:00,-0.001229,0.00109,-1
2019-11-04 09:00:00,0.002634,0.00957,0
2019-11-04 09:30:00,0.002984,0.011794,0
2019-11-04 10:00:00,0.002455,0.013282,0
2019-11-04 10:30:00,0.004217,0.013554,0


In [49]:
final_hh.bin.value_counts()

 0    976
 1     97
-1     90
Name: bin, dtype: int64

#### Compute meta-label

1. if the trade entered, it makes money then 1
2.  else, 0

In [40]:
side = [random.choice([-1, 1]) for idx in range(target.index.shape[0])]
simulated_side = pd.DataFrame({'test':target.index, 'side': side})
simulated_side = simulated_side.set_index('test')

In [41]:
simulated_side.head()

Unnamed: 0_level_0,side
test,Unnamed: 1_level_1
2019-11-02 01:30:00,-1
2019-11-02 02:00:00,-1
2019-11-02 02:30:00,1
2019-11-02 03:00:00,-1
2019-11-02 03:30:00,1


In [42]:
meta_evens = get_events(df_testprice.open, target, 
                        df_sampled.index, min_ret=0.001, 
                        num_threads = 3, vertical_barrier_times=vertical_barrier_times, 
                        side_prediction=simulated_side.side)

2019-12-23 18:10:38.862050 100.0% apply_pt_sl_on_ent done after 0.07 minutes. Remaining 0.0 minutes.


In [43]:
meta_evens.head()

Unnamed: 0_level_0,ent,side,trgt,pt,sl
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-11-02 02:00:00,2019-11-02 03:12:00,-1.0,0.00109,1,1
2019-11-04 09:00:00,2019-11-04 14:00:00,1.0,0.00957,1,1
2019-11-04 09:30:00,2019-11-04 14:30:00,1.0,0.011794,1,1
2019-11-04 10:00:00,2019-11-04 15:00:00,1.0,0.013282,1,1
2019-11-04 10:30:00,2019-11-04 15:30:00,-1.0,0.013554,1,1


In [50]:
final_hh = get_bins(meta_evens, df_testprice.open)
final_hh.head()

Unnamed: 0_level_0,ret,trgt,bin,side
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-11-02 02:00:00,0.001231,0.00109,1,-1.0
2019-11-04 09:00:00,0.002634,0.00957,0,1.0
2019-11-04 09:30:00,0.002984,0.011794,0,1.0
2019-11-04 10:00:00,0.002455,0.013282,0,1.0
2019-11-04 10:30:00,-0.004199,0.013554,0,-1.0


In [51]:
final_hh.bin.value_counts()

0    1072
1      91
Name: bin, dtype: int64

In [24]:
final_hh = drop_labels(final_hh)