# Generating open position signals

Input: ohlc dataset

Output: ohlc dataset + position direction info + additional TA indicators values

In [1]:
import mydatalabeling as mdl
import myplotlib as mpl
import mytalib as mtal

import pandas as pd
import numpy as np

In [2]:
#read ohlc dataset
df = pd.read_pickle('final_ds_cusum_small.pkl').reset_index()
#df = pd.read_pickle('final_ds_cusum.pkl')
df.head(2)

Unnamed: 0,date,Nr,open,high,low,close,sum_vol,time
0,2017-01-03,0,0.0,0.0,-0.01,0.0,1232,09:30:08
1,2017-01-03,5,-0.0195,0.04,-0.0195,0.0,1062,09:30:15


### Adding some TA indicators to our dataset

In [3]:
fastma_w = 10 #period of fast ma
slowma_w = 30 #period of slow ema

#add ma
df = mtal.MA(df,  n=fastma_w, col='close', _name='fastma')
#add ewm
df = mtal.EMA(df, n=slowma_w, col='close', _name='slowema')
#add rsi
df = mtal.RSI(df, n=30, col='close')

df.head(2)

Unnamed: 0,date,Nr,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30
0,2017-01-03,0,0.0,0.0,-0.01,0.0,1232,09:30:08,0.0,0.0,0.0
1,2017-01-03,5,-0.0195,0.04,-0.0195,0.0,1062,09:30:15,0.0,0.0,0.0


In [4]:
#show the graph
example_date = '2017-06-07'
lc = ['fastma','slowema']
mpl.plotChartWithSignal(df,example_date, lines_col=lc, show_markers=False)

## Implement a strategy
Here we will generate signal based on crossing moving average strategy

In [6]:
def generate_signals(df, fast_ma=10, slow_ma=30, signals_c='dir'):
    """Returns the DataFrame of symbols containing the signals
    to go long, short or hold (1, -1 or 0)."""
    #signals = pd.DataFrame(index=df.index)
    df.loc[:, signals_c] = 0.0

    # Create the set of short and long simple moving averages over the 
    # respective periods
    df = mtal.MA(df, 10, _name='fastma')
    df = mtal.EMA(df, 30, _name='slowema')

    # Create a 'signal' when the fast moving average crosses the slow
    # moving average, but only for the period greater than the fast moving average window
    df.iloc[fast_ma:][signals_c] = np.where(df['fastma'][fast_ma:] > df['slowema'][fast_ma:], 1.0, 0.0)

    # Take the difference of the signals in order to generate actual trading orders
    df.loc[:,signals_c] = df[signals_c].diff().fillna(0.0)   

    return df

In [7]:
#generate signals
#after execution of this function we will get new column 'dir' that contains signals
df = generate_signals(df, fast_ma=fastma_w, slow_ma=slowma_w)

df.head(2)

Unnamed: 0,date,Nr,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30,dir
0,2017-01-03,0,0.0,0.0,-0.01,0.0,1232,09:30:08,0.0,0.0,0.0,0.0
1,2017-01-03,5,-0.0195,0.04,-0.0195,0.0,1062,09:30:15,0.0,0.0,0.0,0.0


In [8]:
#show only value with signals
f = df.dir != 0
df[f].head()

Unnamed: 0,date,Nr,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30,dir
14,2017-01-03,80,-0.02,-0.01,-0.02,-0.02,1000,09:37:00,-0.017,-0.02,-0.02,1.0
15,2017-01-03,86,-0.02,0.01,-0.02,0.01,1000,09:37:18,-0.003,0.01,0.01,-1.0
17,2017-01-03,101,-0.01,-0.01,-0.05,-0.05,1028,09:38:20,0.003,-0.05,-0.05,1.0
28,2017-01-03,178,-0.12,-0.12,-0.15,-0.15,1150,09:48:21,-0.08843,-0.064388,-0.15,-1.0
41,2017-01-03,239,-0.07,-0.07,-0.08,-0.08,4529,10:01:53,-0.07819,-0.081532,48.023073,1.0


In [9]:
#create a grpah
lc = ['fastma','slowema'] #show these indicators on the same graph as price
ac = [['RSI_close_30']]   #show this indicator as additional graph
mpl.plotChartWithSignal(df,example_date, lines_col=lc, show_cross_idx=False, additiona_ind=ac)

# Meta Labeling
We will set the labels according to triple-barrier method

In [10]:
#first let's try it on one day
grp = df.groupby(df.date)
#we need to set index to 'Nr' column that our function works
grp1 = grp.get_group(example_date)
grp1.head(2)

Unnamed: 0,date,Nr,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30,dir
44588,2017-06-07,0,0.0,0.03,-0.03,0.001,1100,09:30:01,0.10226,0.114967,33.350585,0.0
44589,2017-06-07,6,0.01,0.03,0.01,0.03,2500,09:32:44,0.09441,0.109485,37.632228,0.0


In [15]:
TakeProfit = 0.05
StopLoss = 0.05
Window = 40

In [16]:
#generate labels
df_met = mdl.metaLabling_executer(grp1,colEvents='dir',colSide='dir',verticalBarrier=Window, SL=StopLoss, TP=TakeProfit, isPercentUse=False)

#select labels
f = df_met.touch_tp != 0.0

df_met[f].head(20)

Unnamed: 0_level_0,date,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30,dir,touch_tp,cross_idx,raw_return
Nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
97,2017-06-07,0.15,0.17,0.136,0.15,1092,09:55:42,0.09227,0.087152,53.906062,1.0,1.0,115.0,0.07
238,2017-06-07,-0.02,0.0066,-0.02,0.0066,2505,10:33:45,0.10297,0.109345,41.516781,-1.0,1.0,249.0,0.0532
500,2017-06-07,0.0,0.0352,0.0,0.0352,1186,12:18:40,-0.00478,-0.006529,51.834338,1.0,-1.0,523.0,-0.0552
566,2017-06-07,-0.11,-0.11,-0.12,-0.12,1200,12:47:49,-0.0278,-0.020771,39.373346,-1.0,1.0,593.0,0.33
698,2017-06-07,-0.09,-0.09,-0.09,-0.09,1197,13:20:10,-0.085,-0.089883,47.068692,1.0,1.0,731.0,0.12
833,2017-06-07,-0.04,-0.04,-0.053,-0.053,1313,13:50:12,-0.05667,-0.056282,50.076557,-1.0,-1.0,846.0,-0.073
846,2017-06-07,0.0,0.02,0.0,0.02,1000,13:56:10,-0.04626,-0.048816,57.760259,1.0,1.0,876.0,0.07
1090,2017-06-07,0.08,0.08,0.07,0.07,1171,15:10:27,0.10055,0.101302,49.807769,-1.0,-1.0,1148.0,-0.0633
1148,2017-06-07,0.1247,0.1333,0.1247,0.1333,1230,15:26:41,0.10498,0.104686,56.848161,1.0,-1.0,1202.0,-0.0533
1215,2017-06-07,0.1,0.11,0.1,0.11,1234,15:42:45,0.10663,0.111165,51.925158,-1.0,-1.0,1224.0,-0.08


In [17]:
#select labels
f = (df_met.touch_tp == 255.0) & (df_met.raw_return > 0.05)
df_met.loc[f, ['touch_tp']] = 1.0
f = df_met.touch_tp != 0.0
df_met[f].head(10)


Unnamed: 0_level_0,date,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30,dir,touch_tp,cross_idx,raw_return
Nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
97,2017-06-07,0.15,0.17,0.136,0.15,1092,09:55:42,0.09227,0.087152,53.906062,1.0,1.0,115.0,0.07
238,2017-06-07,-0.02,0.0066,-0.02,0.0066,2505,10:33:45,0.10297,0.109345,41.516781,-1.0,1.0,249.0,0.0532
500,2017-06-07,0.0,0.0352,0.0,0.0352,1186,12:18:40,-0.00478,-0.006529,51.834338,1.0,-1.0,523.0,-0.0552
566,2017-06-07,-0.11,-0.11,-0.12,-0.12,1200,12:47:49,-0.0278,-0.020771,39.373346,-1.0,1.0,593.0,0.33
698,2017-06-07,-0.09,-0.09,-0.09,-0.09,1197,13:20:10,-0.085,-0.089883,47.068692,1.0,1.0,731.0,0.12
833,2017-06-07,-0.04,-0.04,-0.053,-0.053,1313,13:50:12,-0.05667,-0.056282,50.076557,-1.0,-1.0,846.0,-0.073
846,2017-06-07,0.0,0.02,0.0,0.02,1000,13:56:10,-0.04626,-0.048816,57.760259,1.0,1.0,876.0,0.07
1090,2017-06-07,0.08,0.08,0.07,0.07,1171,15:10:27,0.10055,0.101302,49.807769,-1.0,-1.0,1148.0,-0.0633
1148,2017-06-07,0.1247,0.1333,0.1247,0.1333,1230,15:26:41,0.10498,0.104686,56.848161,1.0,-1.0,1202.0,-0.0533
1215,2017-06-07,0.1,0.11,0.1,0.11,1234,15:42:45,0.10663,0.111165,51.925158,-1.0,-1.0,1224.0,-0.08


In [18]:
df_met.touch_tp = df_met.touch_tp.replace([-1.0, 255.0], -1.0)
df_met[f].head(10)

Unnamed: 0_level_0,date,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30,dir,touch_tp,cross_idx,raw_return
Nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
97,2017-06-07,0.15,0.17,0.136,0.15,1092,09:55:42,0.09227,0.087152,53.906062,1.0,1.0,115.0,0.07
238,2017-06-07,-0.02,0.0066,-0.02,0.0066,2505,10:33:45,0.10297,0.109345,41.516781,-1.0,1.0,249.0,0.0532
500,2017-06-07,0.0,0.0352,0.0,0.0352,1186,12:18:40,-0.00478,-0.006529,51.834338,1.0,-1.0,523.0,-0.0552
566,2017-06-07,-0.11,-0.11,-0.12,-0.12,1200,12:47:49,-0.0278,-0.020771,39.373346,-1.0,1.0,593.0,0.33
698,2017-06-07,-0.09,-0.09,-0.09,-0.09,1197,13:20:10,-0.085,-0.089883,47.068692,1.0,1.0,731.0,0.12
833,2017-06-07,-0.04,-0.04,-0.053,-0.053,1313,13:50:12,-0.05667,-0.056282,50.076557,-1.0,-1.0,846.0,-0.073
846,2017-06-07,0.0,0.02,0.0,0.02,1000,13:56:10,-0.04626,-0.048816,57.760259,1.0,1.0,876.0,0.07
1090,2017-06-07,0.08,0.08,0.07,0.07,1171,15:10:27,0.10055,0.101302,49.807769,-1.0,-1.0,1148.0,-0.0633
1148,2017-06-07,0.1247,0.1333,0.1247,0.1333,1230,15:26:41,0.10498,0.104686,56.848161,1.0,-1.0,1202.0,-0.0533
1215,2017-06-07,0.1,0.11,0.1,0.11,1234,15:42:45,0.10663,0.111165,51.925158,-1.0,-1.0,1224.0,-0.08


In [19]:
%%time
#generate labels for all datas
df_met = mdl.metalabeling_labels_mp(df,colEvents='dir',colSide='dir', grpby='date', verticalBarrier=Window, SL=StopLoss, TP=TakeProfit, isPercentUse=False)

#select labels
f = df_met.touch_tp != 0.0
df_met[f].head(2)

CPU times: user 676 ms, sys: 124 ms, total: 800 ms
Wall time: 2.02 s


Unnamed: 0_level_0,date,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30,dir,touch_tp,cross_idx,raw_return
Nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
80,2017-01-03,-0.02,-0.01,-0.02,-0.02,1000,09:37:00,-0.017,-0.02,-0.02,1.0,-1.0,115.0,-0.09
86,2017-01-03,-0.02,0.01,-0.02,0.01,1000,09:37:18,-0.003,0.01,0.01,-1.0,1.0,101.0,0.06


In [20]:
#select labels
f = (df_met.touch_tp == 255.0) & (df_met.raw_return > 0.05)
df_met.loc[f, ['touch_tp']] = 1.0
f = df_met.touch_tp != 0.0
df_met[f].head(2)

Unnamed: 0_level_0,date,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30,dir,touch_tp,cross_idx,raw_return
Nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
80,2017-01-03,-0.02,-0.01,-0.02,-0.02,1000,09:37:00,-0.017,-0.02,-0.02,1.0,-1.0,115.0,-0.09
86,2017-01-03,-0.02,0.01,-0.02,0.01,1000,09:37:18,-0.003,0.01,0.01,-1.0,1.0,101.0,0.06


In [21]:
df_met.touch_tp = df_met.touch_tp.replace([-1.0, 255.0], -1.0)
df_met[f].head(2)

Unnamed: 0_level_0,date,open,high,low,close,sum_vol,time,fastma,slowema,RSI_close_30,dir,touch_tp,cross_idx,raw_return
Nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
80,2017-01-03,-0.02,-0.01,-0.02,-0.02,1000,09:37:00,-0.017,-0.02,-0.02,1.0,-1.0,115.0,-0.09
86,2017-01-03,-0.02,0.01,-0.02,0.01,1000,09:37:18,-0.003,0.01,0.01,-1.0,1.0,101.0,0.06


In [22]:
df_met.reset_index().to_pickle('final_ds_labeled_small.pkl')

In [23]:
df_met.touch_tp.value_counts()

 0.0    81106
-1.0     1616
 1.0     1406
Name: touch_tp, dtype: int64