In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data_train = pd.read_csv('./data/train_2327.csv')
data_test = pd.read_csv('./data/test_2327.csv')

In [3]:
#技術指標
#Moving Average  
def MA(df, n):  
    MA = df.rolling(n).mean()
    return MA

#均價線
def AVL(df):#, tick_N):
    AVL =  pd.DataFrame(data = np.zeros((len(df), 1)), index = df.index, columns = ['AVL'])  
    AVL['AVL'] = np.cumsum(df['DealPrice']*df['DealQty']) / np.cumsum(df['DealQty'])
    AVL['%AVL'] = df['mid1'] / AVL['AVL'] - 1
    return AVL

#布林通道
def BBANDS(df, n, multiple, tick_N):  
    MA = df.rolling(n).mean()
    #%change
    MA_ = MA / MA.shift(tick_N) -1
    MSD = df.rolling(n).std()
    ub1 =  MA + multiple * MSD
    #%change
    ub1_ = ub1 / ub1.shift(tick_N) -1
    uB1 = pd.DataFrame(ub1_, index = df.index, columns = df.columns)  
    lb1 =  MA - multiple * MSD
    #%change
    lb1_ = lb1 / lb1.shift(tick_N) -1
    lB1 = pd.DataFrame(lb1_, index = df.index, columns = df.columns)  
    b2 = (df - MA + multiple * MSD) / (2 * multiple * MSD)  
    B2 = pd.DataFrame(b2, index = df.index, columns = df.columns)  
    BBAND = pd.concat([uB1, lB1, B2], keys = ['UpperBand', 'LowerBand', '%B'], axis = 1)
    BBAND.columns = ['UpperBand', 'LowerBand', '%B']
    return BBAND

#MACD, MACD Signal and MACD difference  
def MACD(df, n_fast, n_slow, C_price_name='mid1'):   
    EMAfast = df[C_price_name].ewm(span = n_fast, min_periods = n_slow - 1).mean() 
    EMAslow = df[C_price_name].ewm(span = n_slow, min_periods = n_slow - 1).mean() 
    #短EMA-長EMA
    MACD = pd.Series(EMAfast - EMAslow, name = 'MACD_' + str(n_fast) + '_' + str(n_slow))  
    MACDsign = pd.Series(MACD.ewm(span = 9, min_periods = 8).mean(), name = 'MACDsign_' + str(n_fast) + '_' + str(n_slow))  
    MACDdiff = pd.Series(MACD - MACDsign, name = 'MACDdiff_' + str(n_fast) + '_' + str(n_slow))  
    MACD = pd.concat([MACD, MACDsign, MACDdiff], axis = 1)
    return MACD

#generate features
def AlphaGenerate_Tick(original_data,bb=False,avl=False,spread=False,macd=False,price_diff=False,
                       price_mean=False,qty_mean=False,acc_diff=False,dt1p=False,dt5p=False,dt10p=False,
                      cross1=False,cross5=False,cross10=False,spread_pq=False,price_diff_pq=False,
                      spread_pct_pq=False,pq_mean=False,acc_diff_pq=False,dt1_pq=False,dt5_pq=False,dt10_pq=False,
                      ma=False,dt1q=False,dt5q=False,dt10q=False):
    ############################################################################################################################
    #需要預先處裡的東西放這邊
    #build Date column
    #original_data['Date'] = pd.to_datetime(original_data['TxTime'])
    #original_data['Date'] = [str(x)[0:10] for x in original_data['Date']]
    #df to save X
    X = pd.DataFrame()
    #copy
    data=original_data.copy()
    #mid
    data['mid1'] = (data['Ask1'] + data['Bid1']) / 2
    data['mid2'] = (data['Ask2'] + data['Bid2']) / 2
    data['mid3'] = (data['Ask3'] + data['Bid3']) / 2
    data['mid4'] = (data['Ask4'] + data['Bid4']) / 2
    data['mid5'] = (data['Ask5'] + data['Bid5']) / 2
    #some index's ask1 = 0, we change that mid1 to bid1
    idx = data[data['Ask1'] == 0].index
    for i in idx:
        data['mid1'].iloc[i] = data['Bid1'].iloc[i]
    #ask and askqty <-> bid and bidqty if one side = 0
    data['Ask1_'] = data['Ask1'].values
    data['Ask2_'] = data['Ask2'].values
    data['Ask3_'] = data['Ask3'].values
    data['Ask4_'] = data['Ask4'].values
    data['Ask5_'] = data['Ask5'].values
    data['Bid1_'] = data['Bid1'].values
    data['Bid2_'] = data['Bid2'].values
    data['Bid3_'] = data['Bid3'].values
    data['Bid4_'] = data['Bid4'].values
    data['Bid5_'] = data['Bid5'].values
    data['AskQty1_'] = data['AskQty1'].values
    data['AskQty2_'] = data['AskQty2'].values
    data['AskQty3_'] = data['AskQty3'].values
    data['AskQty4_'] = data['AskQty4'].values
    data['AskQty5_'] = data['AskQty5'].values
    data['BidQty1_'] = data['BidQty1'].values
    data['BidQty2_'] = data['BidQty2'].values
    data['BidQty3_'] = data['BidQty3'].values
    data['BidQty4_'] = data['BidQty4'].values
    data['BidQty5_'] = data['BidQty5'].values
    data['Ask1_'][data['Ask1_']==0]=data['Bid1_']
    data['Ask2_'][data['Ask2_']==0]=data['Bid2_']
    data['Ask3_'][data['Ask3_']==0]=data['Bid3_']
    data['Ask4_'][data['Ask4_']==0]=data['Bid4_']
    data['Ask5_'][data['Ask5_']==0]=data['Bid5_']
    data['AskQty1_'][data['AskQty1_']==0]=data['BidQty1_']
    data['AskQty2_'][data['AskQty2_']==0]=data['BidQty2_']
    data['AskQty3_'][data['AskQty3_']==0]=data['BidQty3_']
    data['AskQty4_'][data['AskQty4_']==0]=data['BidQty4_']
    data['AskQty5_'][data['AskQty5_']==0]=data['BidQty5_']
    data['Bid1_'][data['Bid1_']==0]=data['Ask1_']
    data['Bid2_'][data['Bid2_']==0]=data['Ask2_']
    data['Bid3_'][data['Bid3_']==0]=data['Ask3_']
    data['Bid4_'][data['Bid4_']==0]=data['Ask4_']
    data['Bid5_'][data['Bid5_']==0]=data['Ask5_']
    data['BidQty1_'][data['BidQty1_']==0]=data['AskQty1_']
    data['BidQty2_'][data['BidQty2_']==0]=data['AskQty2_']
    data['BidQty3_'][data['BidQty3_']==0]=data['AskQty3_']
    data['BidQty4_'][data['BidQty4_']==0]=data['AskQty4_']
    data['BidQty5_'][data['BidQty5_']==0]=data['AskQty5_']
    #df to save
    #df_date = pd.DataFrame()
    #date_idx = data['Date'].unique()
    #value(price*quantity)
    data['Ask1_value'] = data['Ask1_']*data['AskQty1_']
    data['Ask2_value'] = data['Ask2_']*data['AskQty2_']
    data['Ask3_value'] = data['Ask3_']*data['AskQty3_']
    data['Ask4_value'] = data['Ask4_']*data['AskQty4_']
    data['Ask5_value'] = data['Ask5_']*data['AskQty5_']
    data['Bid1_value'] = data['Bid1_']*data['BidQty1_']
    data['Bid2_value'] = data['Bid2_']*data['BidQty2_']
    data['Bid3_value'] = data['Bid3_']*data['BidQty3_']
    data['Bid4_value'] = data['Bid4_']*data['BidQty4_']
    data['Bid5_value'] = data['Bid5_']*data['BidQty5_']
    ############################################################################################################################
    #因子放這邊
    #不用考慮換日的因子 
    #MA
    if ma == True:
        X['MA50_ask1']=MA(data['Ask1_value'], 50)-data['Ask1_value']
        X['MA50_ask2']=MA(data['Ask2_value'], 50)-data['Ask2_value']
        X['MA50_ask3']=MA(data['Ask3_value'], 50)-data['Ask3_value']
        X['MA50_ask4']=MA(data['Ask4_value'], 50)-data['Ask4_value']
        X['MA50_ask5']=MA(data['Ask5_value'], 50)-data['Ask5_value']
        X['MA50_bid1']=MA(data['Bid1_value'], 50)-data['Bid1_value']
        X['MA50_bid2']=MA(data['Bid2_value'], 50)-data['Bid2_value']
        X['MA50_bid3']=MA(data['Bid3_value'], 50)-data['Bid3_value']
        X['MA50_bid4']=MA(data['Bid4_value'], 50)-data['Bid4_value']
        X['MA50_bid5']=MA(data['Bid5_value'], 50)-data['Bid5_value']
        X['MA100_ask1']=MA(data['Ask1_value'], 100)-data['Ask1_value']
        X['MA100_ask2']=MA(data['Ask2_value'], 100)-data['Ask2_value']
        X['MA100_ask3']=MA(data['Ask3_value'], 100)-data['Ask3_value']
        X['MA100_ask4']=MA(data['Ask4_value'], 100)-data['Ask4_value']
        X['MA100_ask5']=MA(data['Ask5_value'], 100)-data['Ask5_value']
        X['MA100_bid1']=MA(data['Bid1_value'], 100)-data['Bid1_value']
        X['MA100_bid2']=MA(data['Bid2_value'], 100)-data['Bid2_value']
        X['MA100_bid3']=MA(data['Bid3_value'], 100)-data['Bid3_value']
        X['MA100_bid4']=MA(data['Bid4_value'], 100)-data['Bid4_value']
        X['MA100_bid5']=MA(data['Bid5_value'], 100)-data['Bid5_value']
        X['MA10_ab1']=MA(data['Ask1_value'], 10) - MA(data['Bid1_value'], 10)
        X['MA10_ab2']=MA(data['Ask2_value'], 10) - MA(data['Bid2_value'], 10)
        X['MA10_ab3']=MA(data['Ask3_value'], 10) - MA(data['Bid3_value'], 10)
        X['MA10_ab4']=MA(data['Ask4_value'], 10) - MA(data['Bid4_value'], 10)
        X['MA10_ab5']=MA(data['Ask5_value'], 10) - MA(data['Bid5_value'], 10)
        X['MA20_ab1']=MA(data['Ask1_value'], 20) - MA(data['Bid1_value'], 20)
        X['MA20_ab2']=MA(data['Ask2_value'], 20) - MA(data['Bid2_value'], 20)
        X['MA20_ab3']=MA(data['Ask3_value'], 20) - MA(data['Bid3_value'], 20)
        X['MA20_ab4']=MA(data['Ask4_value'], 20) - MA(data['Bid4_value'], 20)
        X['MA20_ab5']=MA(data['Ask5_value'], 20) - MA(data['Bid5_value'], 20)
        X['MA30_ab1']=MA(data['Ask1_value'], 30) - MA(data['Bid1_value'], 30)
        X['MA30_ab2']=MA(data['Ask2_value'], 30) - MA(data['Bid2_value'], 30)
        X['MA30_ab3']=MA(data['Ask3_value'], 30) - MA(data['Bid3_value'], 30)
        X['MA30_ab4']=MA(data['Ask4_value'], 30) - MA(data['Bid4_value'], 30)
        X['MA30_ab5']=MA(data['Ask5_value'], 30) - MA(data['Bid5_value'], 30)
        X['MA50_ab1']=MA(data['Ask1_value'], 50) - MA(data['Bid1_value'], 50)
        X['MA50_ab2']=MA(data['Ask2_value'], 50) - MA(data['Bid2_value'], 50)
        X['MA50_ab3']=MA(data['Ask3_value'], 50) - MA(data['Bid3_value'], 50)
        X['MA50_ab4']=MA(data['Ask4_value'], 50) - MA(data['Bid4_value'], 50)
        X['MA50_ab5']=MA(data['Ask5_value'], 50) - MA(data['Bid5_value'], 50)
        X['MA100_ab1']=MA(data['Ask1_value'], 100) - MA(data['Bid1_value'], 100)
        X['MA100_ab2']=MA(data['Ask2_value'], 100) - MA(data['Bid2_value'], 100)
        X['MA100_ab3']=MA(data['Ask3_value'], 100) - MA(data['Bid3_value'], 100)
        X['MA100_ab4']=MA(data['Ask4_value'], 100) - MA(data['Bid4_value'], 100)
        X['MA100_ab5']=MA(data['Ask5_value'], 100) - MA(data['Bid5_value'], 100)
    #布林通道
    if bb == True:
        X = pd.concat([X, BBANDS(data[['mid1']], 100, 5, 10)], axis = 1)
    #均價線
    if avl == True:
        X = pd.concat([X, AVL(data)], axis = 1)
    #MACD
    if macd == True:
        X = pd.concat([X, MACD(data, 12, 26, C_price_name='mid1')], axis = 1)
    #spread of price
    if spread == True:
        X['spread1'] = data['Ask1_'] - data['Bid1_']
        X['spread2'] = data['Ask2_'] - data['Bid2_']
        X['spread3'] = data['Ask3_'] - data['Bid3_']
        X['spread4'] = data['Ask4_'] - data['Bid4_']
        X['spread5'] = data['Ask5_'] - data['Bid5_']
    #pct change of value
    #if spread_pct_pq == True:
    #    X['spread1_pct_pq'] = (data['Ask1_value'] / data['Bid1_value']) -1
    #    X['spread2_pct_pq'] = (data['Ask2_value'] / data['Bid2_value']) -1
    #    X['spread3_pct_pq'] = (data['Ask3_value'] / data['Bid3_value']) -1
    #    X['spread4_pct_pq'] = (data['Ask4_value'] / data['Bid4_value']) -1
    #    X['spread5_pct_pq'] = (data['Ask5_value'] / data['Bid5_value']) -1
    #spread of value
    if spread_pq == True:
        X['spread1_pq'] = data['Ask1_value'] - data['Bid1_value']
        X['spread2_pq'] = data['Ask2_value'] - data['Bid2_value'] 
        X['spread3_pq'] = data['Ask3_value'] - data['Bid3_value'] 
        X['spread4_pq'] = data['Ask4_value'] - data['Bid4_value'] 
        X['spread5_pq'] = data['Ask5_value'] - data['Bid5_value'] 
    #price diff
    if price_diff == True:
        X['a51_diff'] = data['Ask5_'] - data['Ask1_']
        X['b15_diff'] = data['Bid1_'] - data['Bid5_']
        X['a21_diff_abs'] = np.abs(data['Ask2_'] - data['Ask1_'])
        X['a32_diff_abs'] = np.abs(data['Ask3_'] - data['Ask2_'])
        X['a43_diff_abs'] = np.abs(data['Ask4_'] - data['Ask3_'])
        X['a54_diff_abs'] = np.abs(data['Ask5_'] - data['Ask4_'])
        X['b21_diff_abs'] = np.abs(data['Bid2_'] - data['Bid1_'])
        X['b32_diff_abs'] = np.abs(data['Bid3_'] - data['Bid2_'])
        X['b43_diff_abs'] = np.abs(data['Bid4_'] - data['Bid3_'])
        X['b54_diff_abs'] = np.abs(data['Bid5_'] - data['Bid4_'])
    #value diff
    if price_diff_pq == True:
        X['a51_diff_pq'] = data['Ask5_value'] - data['Ask1_value']
        X['b15_diff_pq'] = data['Bid1_value'] - data['Bid5_value']
        X['a21_diff_pq'] = data['Ask2_value'] - data['Ask1_value']
        X['a32_diff_pq'] = data['Ask3_value'] - data['Ask2_value']
        X['a43_diff_pq'] = data['Ask4_value'] - data['Ask3_value']
        X['a54_diff_pq'] = data['Ask5_value'] - data['Ask4_value']
        X['b21_diff_pq'] = data['Bid2_value'] - data['Bid1_value']
        X['b32_diff_pq'] = data['Bid3_value'] - data['Bid2_value']
        X['b43_diff_pq'] = data['Bid4_value'] - data['Bid3_value']
        X['b54_diff_pq'] = data['Bid5_value'] - data['Bid4_value']
    #price mean
    if price_mean == True:
        X['a_mean'] = (data['Ask1_'] +data['Ask2_'] +data['Ask3_'] +data['Ask4_'] +data['Ask5_']) / 5
        X['b_mean'] = (data['Bid1_'] +data['Bid2_'] +data['Bid3_'] +data['Bid4_'] +data['Bid5_']) / 5
    #quantity mean
    if qty_mean == True:
        X['aq_mean'] = (data['AskQty1_'] +data['AskQty2_'] +data['AskQty3_'] +data['AskQty4_'] +data['AskQty5_']) / 5
        X['bq_mean'] = (data['BidQty1_'] +data['BidQty2_'] +data['BidQty3_'] +data['BidQty4_'] +data['BidQty5_']) / 5
    #value mean
    if pq_mean == True:
        X['a_pq_mean'] = (data['Ask1_value']+data['Ask2_value']+data['Ask3_value']+data['Ask4_value']+data['Ask5_value'])/5
        X['b_pq_mean'] = (data['Bid1_value']+data['Bid2_value']+data['Bid3_value']+data['Bid4_value']+data['Bid5_value'])/5
    #accumulative difference of price and quantity
    if acc_diff == True:
        X['p_acc_diff'] = (data['Ask1_'] - data['Bid1_']) +(data['Ask2_'] - data['Bid2_']) + (data['Ask3_'] - data['Bid3_']) + (data['Ask4_'] - data['Bid4_']) + (data['Ask5_'] - data['Bid5_'])
        X['q_acc_diff'] = (data['AskQty1_'] - data['BidQty1_']) +(data['AskQty2_'] - data['BidQty2_']) + (data['AskQty3_'] - data['BidQty3_']) + (data['AskQty4_'] - data['BidQty4_']) + (data['AskQty5_'] - data['BidQty5_'])
    #value accumulative difference
    if acc_diff_pq == True:
        X['p_acc_diff_pq'] = (data['Ask1_value'] - data['Bid1_value']) +(data['Ask2_value'] - data['Bid2_value']) + (data['Ask3_value'] - data['Bid3_value']) + (data['Ask4_value'] - data['Bid4_value']) + (data['Ask5_value'] - data['Bid5_value'])
    ############################################################################################################################
    #要考慮換日的因子
    #shift 1 5 10 (price, quantity, value)
    if dt1p == True:
    #1 tick
        X['da1_dt_1'] = data['Ask1_']-data.groupby('Date')['Ask1_'].shift(1)
        X['da2_dt_1'] = data['Ask2_']-data.groupby('Date')['Ask2_'].shift(1)
        X['da3_dt_1'] = data['Ask3_']-data.groupby('Date')['Ask3_'].shift(1)
        X['da4_dt_1'] = data['Ask4_']-data.groupby('Date')['Ask4_'].shift(1)
        X['da5_dt_1'] = data['Ask5_']-data.groupby('Date')['Ask5_'].shift(1)
        X['db1_dt_1'] = data['Bid1_']-data.groupby('Date')['Bid1_'].shift(1)
        X['db2_dt_1'] = data['Bid2_']-data.groupby('Date')['Bid2_'].shift(1)
        X['db3_dt_1'] = data['Bid3_']-data.groupby('Date')['Bid3_'].shift(1)
        X['db4_dt_1'] = data['Bid4_']-data.groupby('Date')['Bid4_'].shift(1)
        X['db5_dt_1'] = data['Bid5_']-data.groupby('Date')['Bid5_'].shift(1)
        #fillna to 0
        X[['da1_dt_1','da2_dt_1','da3_dt_1','da4_dt_1','da5_dt_1','db1_dt_1','db2_dt_1','db3_dt_1','db4_dt_1','db5_dt_1']] = X[['da1_dt_1','da2_dt_1','da3_dt_1','da4_dt_1','da5_dt_1','db1_dt_1','db2_dt_1','db3_dt_1','db4_dt_1','db5_dt_1']].fillna(value = 0)
        
    if dt1_pq == True:
    #1 tick
        X['da1_dt_1_pq'] = data['Ask1_value']-data.groupby('Date')['Ask1_value'].shift(1)
        X['da2_dt_1_pq'] = data['Ask2_value']-data.groupby('Date')['Ask2_value'].shift(1)
        X['da3_dt_1_pq'] = data['Ask3_value']-data.groupby('Date')['Ask3_value'].shift(1)
        X['da4_dt_1_pq'] = data['Ask4_value']-data.groupby('Date')['Ask4_value'].shift(1)
        X['da5_dt_1_pq'] = data['Ask5_value']-data.groupby('Date')['Ask5_value'].shift(1)
        X['db1_dt_1_pq'] = data['Bid1_value']-data.groupby('Date')['Bid1_value'].shift(1)
        X['db2_dt_1_pq'] = data['Bid2_value']-data.groupby('Date')['Bid2_value'].shift(1)
        X['db3_dt_1_pq'] = data['Bid3_value']-data.groupby('Date')['Bid3_value'].shift(1)
        X['db4_dt_1_pq'] = data['Bid4_value']-data.groupby('Date')['Bid4_value'].shift(1)
        X['db5_dt_1_pq'] = data['Bid5_value']-data.groupby('Date')['Bid5_value'].shift(1)
    if dt5p == True:
    #5 ticks
        X['da1_dt_5'] = data['Ask1_']-data.groupby('Date')['Ask1_'].shift(5)
        X['da2_dt_5'] = data['Ask2_']-data.groupby('Date')['Ask2_'].shift(5)
        X['da3_dt_5'] = data['Ask3_']-data.groupby('Date')['Ask3_'].shift(5)
        X['da4_dt_5'] = data['Ask4_']-data.groupby('Date')['Ask4_'].shift(5)
        X['da5_dt_5'] = data['Ask5_']-data.groupby('Date')['Ask5_'].shift(5)
        X['db1_dt_5'] = data['Bid1_']-data.groupby('Date')['Bid1_'].shift(5)
        X['db2_dt_5'] = data['Bid2_']-data.groupby('Date')['Bid2_'].shift(5)
        X['db3_dt_5'] = data['Bid3_']-data.groupby('Date')['Bid3_'].shift(5)
        X['db4_dt_5'] = data['Bid4_']-data.groupby('Date')['Bid4_'].shift(5)
        X['db5_dt_5'] = data['Bid5_']-data.groupby('Date')['Bid5_'].shift(5)
        #fill na to 0
        X[['da1_dt_5','da2_dt_5','da3_dt_5','da4_dt_5','da5_dt_5','db1_dt_5','db2_dt_5','db3_dt_5','db4_dt_5','db5_dt_5']] = X[['da1_dt_5','da2_dt_5','da3_dt_5','da4_dt_5','da5_dt_5','db1_dt_5','db2_dt_5','db3_dt_5','db4_dt_5','db5_dt_5']].fillna(value = 0)
    if dt5_pq == True:
    #5 tick
        X['da1_dt_5_pq'] = data['Ask1_value']-data.groupby('Date')['Ask1_value'].shift(5)
        X['da2_dt_5_pq'] = data['Ask2_value']-data.groupby('Date')['Ask2_value'].shift(5)
        X['da3_dt_5_pq'] = data['Ask3_value']-data.groupby('Date')['Ask3_value'].shift(5)
        X['da4_dt_5_pq'] = data['Ask4_value']-data.groupby('Date')['Ask4_value'].shift(5)
        X['da5_dt_5_pq'] = data['Ask5_value']-data.groupby('Date')['Ask5_value'].shift(5)
        X['db1_dt_5_pq'] = data['Bid1_value']-data.groupby('Date')['Bid1_value'].shift(5)
        X['db2_dt_5_pq'] = data['Bid2_value']-data.groupby('Date')['Bid2_value'].shift(5)
        X['db3_dt_5_pq'] = data['Bid3_value']-data.groupby('Date')['Bid3_value'].shift(5)
        X['db4_dt_5_pq'] = data['Bid4_value']-data.groupby('Date')['Bid4_value'].shift(5)
        X['db5_dt_5_pq'] = data['Bid5_value']-data.groupby('Date')['Bid5_value'].shift(5)                                            
    if dt10p == True:
    #10 ticks
        X['da1_dt_10'] = data['Ask1_']-data.groupby('Date')['Ask1_'].shift(10)
        X['da2_dt_10'] = data['Ask2_']-data.groupby('Date')['Ask2_'].shift(10)
        X['da3_dt_10'] = data['Ask3_']-data.groupby('Date')['Ask3_'].shift(10)
        X['da4_dt_10'] = data['Ask4_']-data.groupby('Date')['Ask4_'].shift(10)
        X['da5_dt_10'] = data['Ask5_']-data.groupby('Date')['Ask5_'].shift(10)
        X['db1_dt_10'] = data['Bid1_']-data.groupby('Date')['Bid1_'].shift(10)
        X['db2_dt_10'] = data['Bid2_']-data.groupby('Date')['Bid2_'].shift(10)
        X['db3_dt_10'] = data['Bid3_']-data.groupby('Date')['Bid3_'].shift(10)
        X['db4_dt_10'] = data['Bid4_']-data.groupby('Date')['Bid4_'].shift(10)
        X['db5_dt_10'] = data['Bid5_']-data.groupby('Date')['Bid5_'].shift(10)
        #fill na to 0
        X[['da1_dt_10','da2_dt_10','da3_dt_10','da4_dt_10','da5_dt_10','db1_dt_10','db2_dt_10','db3_dt_10','db4_dt_10','db5_dt_10']] = X[['da1_dt_10','da2_dt_10','da3_dt_10','da4_dt_10','da5_dt_10','db1_dt_10','db2_dt_10','db3_dt_10','db4_dt_10','db5_dt_10']].fillna(value = 0)
    if dt10_pq == True:
    #10 tick
        X['da1_dt_10_pq'] = data['Ask1_value']-data.groupby('Date')['Ask1_value'].shift(10)
        X['da2_dt_10_pq'] = data['Ask2_value']-data.groupby('Date')['Ask2_value'].shift(10)
        X['da3_dt_10_pq'] = data['Ask3_value']-data.groupby('Date')['Ask3_value'].shift(10)
        X['da4_dt_10_pq'] = data['Ask4_value']-data.groupby('Date')['Ask4_value'].shift(10)
        X['da5_dt_10_pq'] = data['Ask5_value']-data.groupby('Date')['Ask5_value'].shift(10)
        X['db1_dt_10_pq'] = data['Bid1_value']-data.groupby('Date')['Bid1_value'].shift(10)
        X['db2_dt_10_pq'] = data['Bid2_value']-data.groupby('Date')['Bid2_value'].shift(10)
        X['db3_dt_10_pq'] = data['Bid3_value']-data.groupby('Date')['Bid3_value'].shift(10)
        X['db4_dt_10_pq'] = data['Bid4_value']-data.groupby('Date')['Bid4_value'].shift(10)
        X['db5_dt_10_pq'] = data['Bid5_value']-data.groupby('Date')['Bid5_value'].shift(10)                                           
    #quantity
    if dt1q == True:
    #1 tick
        X['daq1_dt_1'] = data['AskQty1_']-data.groupby('Date')['AskQty1_'].shift(1)
        X['daq2_dt_1'] = data['AskQty2_']-data.groupby('Date')['AskQty2_'].shift(1)
        X['daq3_dt_1'] = data['AskQty3_']-data.groupby('Date')['AskQty3_'].shift(1)
        X['daq4_dt_1'] = data['AskQty4_']-data.groupby('Date')['AskQty4_'].shift(1)
        X['daq5_dt_1'] = data['AskQty5_']-data.groupby('Date')['AskQty5_'].shift(1)
        X['dbq1_dt_1'] = data['BidQty1_']-data.groupby('Date')['BidQty1_'].shift(1)
        X['dbq2_dt_1'] = data['BidQty2_']-data.groupby('Date')['BidQty2_'].shift(1)
        X['dbq3_dt_1'] = data['BidQty3_']-data.groupby('Date')['BidQty3_'].shift(1)
        X['dbq4_dt_1'] = data['BidQty4_']-data.groupby('Date')['BidQty4_'].shift(1)
        X['dbq5_dt_1'] = data['BidQty5_']-data.groupby('Date')['BidQty5_'].shift(1)
        #fill na to 0
        X[['daq1_dt_1','daq2_dt_1','daq3_dt_1','daq4_dt_1','daq5_dt_1','dbq1_dt_1','dbq2_dt_1','dbq3_dt_1','dbq4_dt_1','dbq5_dt_1']] = X[['daq1_dt_1','daq2_dt_1','daq3_dt_1','daq4_dt_1','daq5_dt_1','dbq1_dt_1','dbq2_dt_1','dbq3_dt_1','dbq4_dt_1','dbq5_dt_1']].fillna(value = 0)
    if dt5q == True:
    #5 ticks
        X['daq1_dt_5'] = data['AskQty1_']-data.groupby('Date')['AskQty1_'].shift(5)
        X['daq2_dt_5'] = data['AskQty2_']-data.groupby('Date')['AskQty2_'].shift(5)
        X['daq3_dt_5'] = data['AskQty3_']-data.groupby('Date')['AskQty3_'].shift(5)
        X['daq4_dt_5'] = data['AskQty4_']-data.groupby('Date')['AskQty4_'].shift(5)
        X['daq5_dt_5'] = data['AskQty5_']-data.groupby('Date')['AskQty5_'].shift(5)
        X['dbq1_dt_5'] = data['BidQty1_']-data.groupby('Date')['BidQty1_'].shift(5)
        X['dbq2_dt_5'] = data['BidQty2_']-data.groupby('Date')['BidQty2_'].shift(5)
        X['dbq3_dt_5'] = data['BidQty3_']-data.groupby('Date')['BidQty3_'].shift(5)
        X['dbq4_dt_5'] = data['BidQty4_']-data.groupby('Date')['BidQty4_'].shift(5)
        X['dbq5_dt_5'] = data['BidQty5_']-data.groupby('Date')['BidQty5_'].shift(5)
        #fill na to 0
        X[['daq1_dt_5','daq2_dt_5','daq3_dt_5','daq4_dt_5','daq5_dt_5','dbq1_dt_5','dbq2_dt_5','dbq3_dt_5','dbq4_dt_5','dbq5_dt_5']] = X[['daq1_dt_5','daq2_dt_5','daq3_dt_5','daq4_dt_5','daq5_dt_5','dbq1_dt_5','dbq2_dt_5','dbq3_dt_5','dbq4_dt_5','dbq5_dt_5']].fillna(value = 0)
    if dt10q == True:
    #10 ticks
        X['daq1_dt_10'] = data['AskQty1_']-data.groupby('Date')['AskQty1_'].shift(10)
        X['daq2_dt_10'] = data['AskQty2_']-data.groupby('Date')['AskQty2_'].shift(10)
        X['daq3_dt_10'] = data['AskQty3_']-data.groupby('Date')['AskQty3_'].shift(10)
        X['daq4_dt_10'] = data['AskQty4_']-data.groupby('Date')['AskQty4_'].shift(10)
        X['daq5_dt_10'] = data['AskQty5_']-data.groupby('Date')['AskQty5_'].shift(10)
        X['dbq1_dt_10'] = data['BidQty1_']-data.groupby('Date')['BidQty1_'].shift(10)
        X['dbq2_dt_10'] = data['BidQty2_']-data.groupby('Date')['BidQty2_'].shift(10)
        X['dbq3_dt_10'] = data['BidQty3_']-data.groupby('Date')['BidQty3_'].shift(10)
        X['dbq4_dt_10'] = data['BidQty4_']-data.groupby('Date')['BidQty4_'].shift(10)
        X['dbq5_dt_10'] = data['BidQty5_']-data.groupby('Date')['BidQty5_'].shift(10)
        #fill na to 0
        X[['daq1_dt_10','daq2_dt_10','daq3_dt_10','daq4_dt_10','daq5_dt_10','dbq1_dt_10','dbq2_dt_10','dbq3_dt_10','dbq4_dt_10','dbq5_dt_10']] = X[['daq1_dt_10','daq2_dt_10','daq3_dt_10','daq4_dt_10','daq5_dt_10','dbq1_dt_10','dbq2_dt_10','dbq3_dt_10','dbq4_dt_10','dbq5_dt_10']].fillna(value = 0)    
    #features crosses
    #dt1
    if (cross1 == True) and (dt1p == True) and (dt1q == True):
        #ask
        X['aq1dt1_cross'] = X['da1_dt_1'] * X['daq1_dt_1']
        X['aq2dt1_cross'] = X['da2_dt_1'] * X['daq2_dt_1']
        X['aq3dt1_cross'] = X['da3_dt_1'] * X['daq3_dt_1']
        X['aq4dt1_cross'] = X['da4_dt_1'] * X['daq4_dt_1']
        X['aq5dt1_cross'] = X['da5_dt_1'] * X['daq5_dt_1']
        #bid
        X['bq1dt1_cross'] = X['db1_dt_1'] * X['dbq1_dt_1']
        X['bq2dt1_cross'] = X['db2_dt_1'] * X['dbq2_dt_1']
        X['bq3dt1_cross'] = X['db3_dt_1'] * X['dbq3_dt_1']
        X['bq4dt1_cross'] = X['db4_dt_1'] * X['dbq4_dt_1']
        X['bq5dt1_cross'] = X['db5_dt_1'] * X['dbq5_dt_1']
    #dt5
    if (cross5 == True) and (dt5p == True) and (dt5q == True):
        #ask
        X['aq1dt5_cross'] = X['da1_dt_5'] * X['daq1_dt_5']
        X['aq2dt5_cross'] = X['da2_dt_5'] * X['daq2_dt_5']
        X['aq3dt5_cross'] = X['da3_dt_5'] * X['daq3_dt_5']
        X['aq4dt5_cross'] = X['da4_dt_5'] * X['daq4_dt_5']
        X['aq5dt5_cross'] = X['da5_dt_5'] * X['daq5_dt_5']
        #bid
        X['bq1dt5_cross'] = X['db1_dt_5'] * X['dbq1_dt_5']
        X['bq2dt5_cross'] = X['db2_dt_5'] * X['dbq2_dt_5']
        X['bq3dt5_cross'] = X['db3_dt_5'] * X['dbq3_dt_5']
        X['bq4dt5_cross'] = X['db4_dt_5'] * X['dbq4_dt_5']
        X['bq5dt5_cross'] = X['db5_dt_5'] * X['dbq5_dt_5']
    #dt10
    if (cross10 == True) and (dt10p == True) and (dt10q == True):
        #ask
        X['aq1dt10_cross'] = X['da1_dt_10'] * X['daq1_dt_10']
        X['aq2dt10_cross'] = X['da2_dt_10'] * X['daq2_dt_10']
        X['aq3dt10_cross'] = X['da3_dt_10'] * X['daq3_dt_10']
        X['aq4dt10_cross'] = X['da4_dt_10'] * X['daq4_dt_10']
        X['aq5dt10_cross'] = X['da5_dt_10'] * X['daq5_dt_10']
        #bid
        X['bq1dt10_cross'] = X['db1_dt_10'] * X['dbq1_dt_10']
        X['bq2dt10_cross'] = X['db2_dt_10'] * X['dbq2_dt_10']
        X['bq3dt10_cross'] = X['db3_dt_10'] * X['dbq3_dt_10']
        X['bq4dt10_cross'] = X['db4_dt_10'] * X['dbq4_dt_10']
        X['bq5dt10_cross'] = X['db5_dt_10'] * X['dbq5_dt_10']
    df = pd.concat([data, X], axis = 1)
    df = df.dropna()
    df = df.reset_index(drop = True)
    X = X.dropna()
    X = X.reset_index(drop = True)
    return X, df

#normalize
def normalize(df):
    norm = df.apply(lambda x: x / (np.std(x)))
    return norm

#scale back to the true range
def true_range(df, x, response):
    X = x * np.std(df[response])
    return X

#rolling windows
def train_windows(df, response, ref_day=10, predict_day=1):
    X_train, Y_train = [], []
    for i in range(df.shape[0]-predict_day-ref_day+1):
        #扣掉response
        #記得最後一行要放response
        X_train.append(np.array(df.iloc[i:i+ref_day,:-1]))
        Y_train.append(np.array(df.iloc[i+ref_day-1:i+ref_day][response]))
    return np.array(X_train), np.array(Y_train)

#LSTM (when add the drop out layer in order to deal with overfitting)
def lstm_stock_model(shape):
    model = Sequential()
    #return sequence = True -> means the network will have long memory
    #first layer
    model.add(LSTM(256, input_shape=(shape[1], shape[2]), return_sequences=True))
    #first drop out layer(drop 50% of the previous one to avoid over-fitting)
    model.add(Dropout(0.5))
    #second layer
    model.add(LSTM(256, return_sequences=True))
    #second drop out layer
    model.add(Dropout(0.5))
    #add dense in every time step, dimension -> 1
    model.add(TimeDistributed(Dense(1)))
    #third drop out layer
    model.add(Dropout(0.5))
    #flatten: 多維 -> 一維
    model.add(Flatten())
    #forth drop out layer
    #model.add(Dropout(0.5))
    #dense layer
    model.add(Dense(10,activation='linear'))
    #1 -> output dimension
    model.add(Dense(1,activation='linear'))
    #change learning rate 0.001 -> 0.0005
    adam = keras.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, decay=0.0, amsgrad=False)
    model.compile(loss="mean_squared_error", optimizer=adam,metrics=['mean_squared_error'])
    #using MSE
    #model.compile(loss="mean_squared_error", optimizer="adam",metrics=['mean_squared_error'])
    model.summary()
    return model

#record transaction
def record_transaction(para, date, tick, action_side, price, equity_value, transaction_history):
    info = {
                'Date': date,
                'tick': tick,
                'Action': action_side,
                'Price': price,
                'Equity_value': equity_value
            }
    if para in transaction_history:
        transaction_history[para].append(info)
    else:
        transaction_history.update({para: [info]})    
    
    return transaction_history

#performance
def performance(equity_value_, transaction):
    #return
    Return = equity_value_.iloc[-1][0]
    print('return:', Return)
    #vol
    Std = equity_value_.std()[0]
    print('std:', Std)
    #sharp ratio
    Sharp_ratio = (Return - 0.0052) / Std
    print('sharp ratio:', Sharp_ratio)
    #nums of trading
    Trades = len(transaction) / 2
    print('trades:', Trades)
    #odds ratio
    R = 0
    for i in range(len(transaction)):
        if transaction[i]['Equity_value'] > 0:
            R += 1
    Odd = R / (len(transaction) / 2)
    print('odds ratio:', Odd)
    #mdd
    Mdd = (equity_value_.cummax() - equity_value_).max()[0]
    print('mdd:', Mdd)
    #skew
    Skew = equity_value_.skew()[0]
    print('skewness:', Skew)
     
    return Return, Std, Sharp_ratio, Trades, Odd, Mdd, Skew

#model performance
def model_performance(y, f):
    tot = np.sum(np.square(y - np.mean(y)))
    res = np.sum(np.square(y - f))
    r_squared = 1 - res / tot
    mse = np.mean(np.square(y - f)) * (1 / len(y))
    print('MSE:', mse)
    print('r-squared:', r_squared)
    return r_squared

#strategy
def strategy(df_train, paras):
    #adding hard stop 
    #main strategy (train set)
    #record transaction
    transaction_history1 = {}
    #record equity values
    equity_value_1 = pd.DataFrame()
    #record each returns
    #each_return1 = pd.DataFrame()
    #record unrealized gain
    unrealized_1 = pd.DataFrame()
    for l in range(len(paras)):
        df_train['side'] = pd.DataFrame(data = np.zeros((len(df_train), 1)), index = df_train.index[:])
        for j in range(len(df_train)):
            if df_train['pred_return'].iloc[j] > paras[l][0]:
                #long side
                df_train['side'].iloc[j] = 1
            elif df_train['pred_return'].iloc[j] < paras[l][1]:
                #short side
                df_train['side'].iloc[j] = -1
        date_idx = df_train['Date'].unique()
        #紀錄已實現
        equity_value1 = pd.DataFrame()
        #紀錄未實現
        unrealized1 = pd.DataFrame()
        for k in date_idx:
            df = df_train[df_train.Date == k]
            #side = 0
            for i in range(len(df)):
                #first tick
                if i == 0:
                    #ask1 may be 0, which means 漲停 (買不到)
                    if (df['side'].iloc[i] == 1) and (df['Ask1'].iloc[i] != 0):
                        #side = 1
                        df['holding'].iloc[i] = 1
                        df['equity_value'].iloc[i] = 0
                        df['unrealized'].iloc[i] = 0
                        transaction_history1 = record_transaction(paras[l], k, i, 1, df['Ask1'].iloc[i], 0, transaction_history1)
                    elif (df['side'].iloc[i] == 1) and (df['Ask1'].iloc[i] == 0):
                        side = 0
                        df['equity_value'].iloc[i] = 0
                        df['unrealized'].iloc[i] = 0
                    elif df['side'].iloc[i] == -1:
                        #side = -1
                        df['holding'].iloc[i] = -1
                        df['equity_value'].iloc[i] = - 0.0015
                        df['unrealized'].iloc[i] = -0.0015
                        transaction_history1 = record_transaction(paras[l], k, i, -1, df['Bid1'].iloc[i], -0.0015, transaction_history1)
                    else:
                        #side = 0
                        df['holding'].iloc[i] = 0
                        df['equity_value'].iloc[i] = 0
                        df['unrealized'].iloc[i] = 0
                #last tick
                elif i == (len(df) - 1):
                    if df['holding'].iloc[i-1] == 1:
                        df['equity_value'].iloc[i] = (df['Bid1'].iloc[i] / transaction_history1[paras[l]][-1]['Price'] - 1) - 0.0015
                        df['unrealized'].iloc[i] = (df['Bid1'].iloc[i] / df['mid1'].iloc[i-1] - 1) - 0.0015 
                        transaction_history1 = record_transaction(paras[l], k, i, -1, df['Bid1'].iloc[i], df['equity_value'].iloc[i], transaction_history1)
                    #ask1 may be 0, which means 漲停 (買不到)
                    elif (df['holding'].iloc[i-1] == -1) and (df['Ask1'].iloc[i] != 0):
                        df['equity_value'].iloc[i] = (df['Ask1'].iloc[i] / transaction_history1[paras[l]][-1]['Price'] - 1) * (-1)
                        df['unrealized'].iloc[i] = (df['Ask1'].iloc[i] / df['mid1'].iloc[i-1] - 1) * (-1) 
                        transaction_history1 = record_transaction(paras[l], k, i, 1, df['Ask1'].iloc[i], df['equity_value'].iloc[i], transaction_history1)
                    #收盤如果漲停，空單結不掉，先用mid1結
                    elif (df['holding'].iloc[i-1] == -1) and (df['Ask1'].iloc[i] == 0):
                        df['equity_value'].iloc[i] = (df['mid1'].iloc[i] / transaction_history1[paras[l]][-1]['Price'] - 1) * (-1)
                        df['unrealized'].iloc[i] = (df['mid1'].iloc[i] / df['mid1'].iloc[i-1] - 1) * (-1) 
                        transaction_history1 = record_transaction(paras[l], k, i, 1, df['mid1'].iloc[i], df['equity_value'].iloc[i], transaction_history1)
                    else:
                        df['equity_value'].iloc[i] = 0
                        df['unrealized'].iloc[i] = 0
                else:
                    if df['holding'].iloc[i-1] == 1:
                        #hard stop 1 -1 = 0 -> 0
                        if (df_train['pred_return'].iloc[i]) < (paras[l][1] / 2): #or ((df['mid1'].iloc[i-1] / transaction_history1[paras[l]][-1]['Price'] - 1) > paras[l][2]):
                            df['holding'].iloc[i] = 0
                            df['equity_value'].iloc[i] = (df['Bid1'].iloc[i] / transaction_history1[paras[l]][-1]['Price'] - 1) - 0.0015
                            df['unrealized'].iloc[i] = (df['Bid1'].iloc[i] / df['mid1'].iloc[i-1] - 1) - 0.0015 
                            transaction_history1 = record_transaction(paras[l], k, i, -1, df['Bid1'].iloc[i], df['equity_value'].iloc[i], transaction_history1)
                        #change side 1 -2 -> -1
                        elif (df['side'].iloc[i] == -1): #and ((df['mid1'].iloc[i-1] / transaction_history1[paras[l]][-1]['Price'] - 1) > 0.01):
                            df['holding'].iloc[i] = -1
                            df['equity_value'].iloc[i] = (df['Bid1'].iloc[i] / transaction_history1[paras[l]][-1]['Price'] - 1) - (0.0015 * 2)
                            df['unrealized'].iloc[i] = (df['Bid1'].iloc[i] / df['mid1'].iloc[i-1] - 1) - (0.0015 * 2) 
                            transaction_history1 = record_transaction(paras[l], k, i, -1, df['Bid1'].iloc[i], df['equity_value'].iloc[i] + 0.0015, transaction_history1)
                            transaction_history1 = record_transaction(paras[l], k, i, -1, df['Bid1'].iloc[i], -0.0015, transaction_history1)
                        else:
                            df['holding'].iloc[i] = df['holding'].iloc[i-1]
                            df['equity_value'].iloc[i] = 0
                            df['unrealized'].iloc[i] = (df['mid1'].iloc[i] / df['mid1'].iloc[i-1] - 1) 
                    elif df['holding'].iloc[i-1] == -1:
                        #hard stop -1 +1 -> 0
                        if (df['Ask1'].iloc[i] != 0) and ((df_train['pred_return'].iloc[i]) > (paras[l][0] / 2)): #or ((df['mid1'].iloc[i-1] / transaction_history1[paras[l]][-1]['Price'] - 1) * (-1) > paras[l][2])):
                            df['holding'].iloc[i] = 0
                            df['equity_value'].iloc[i] = (df['Ask1'].iloc[i] / transaction_history1[paras[l]][-1]['Price'] - 1) * (-1)
                            df['unrealized'].iloc[i] = (df['Ask1'].iloc[i] / df['mid1'].iloc[i-1] - 1) * (-1) 
                            transaction_history1 = record_transaction(paras[l], k, i, 1, df['Ask1'].iloc[i], df['equity_value'].iloc[i], transaction_history1)
                        #change side -1 +2 -> 1
                        elif (df['Ask1'].iloc[i] != 0) and (df['side'].iloc[i] == 1): #and ((df['mid1'].iloc[i-1] / transaction_history1[paras[l]][-1]['Price'] - 1) * (-1) > 0.01):
                            df['holding'].iloc[i] = 1
                            df['equity_value'].iloc[i] = (df['Ask1'].iloc[i] / transaction_history1[paras[l]][-1]['Price'] - 1) * (-1)
                            df['unrealized'].iloc[i] = (df['Ask1'].iloc[i] / df['mid1'].iloc[i-1] - 1) * (-1) 
                            transaction_history1 = record_transaction(paras[l], k, i, 1, df['Ask1'].iloc[i], df['equity_value'].iloc[i], transaction_history1)
                            transaction_history1 = record_transaction(paras[l], k, i, 1, df['Ask1'].iloc[i], 0, transaction_history1)
                        else:
                            df['holding'].iloc[i] = df['holding'].iloc[i-1]
                            df['equity_value'].iloc[i] = 0
                            df['unrealized'].iloc[i] = (df['mid1'].iloc[i] / df['mid1'].iloc[i-1] - 1) * (-1) 
                    elif df['holding'].iloc[i-1] == 0:
                        if (df['side'].iloc[i] == 1) and (df['Ask1'].iloc[i] != 0):
                            df['holding'].iloc[i] = 1
                            df['equity_value'].iloc[i] = 0
                            df['unrealized'].iloc[i] = 0
                            transaction_history1 = record_transaction(paras[l], k, i, 1, df['Ask1'].iloc[i], 0, transaction_history1)
                        elif df['side'].iloc[i] == -1:
                            df['holding'].iloc[i] = -1
                            df['equity_value'].iloc[i] = - 0.0015
                            df['unrealized'].iloc[i] = -0.0015 
                            transaction_history1 = record_transaction(paras[l], k, i, -1, df['Bid1'].iloc[i], -0.0015, transaction_history1)
                        else:
                            df['holding'].iloc[i] = 0
                            df['equity_value'].iloc[i] = 0
                            df['unrealized'].iloc[i] = 0
            equity_value1 = pd.concat([equity_value1, df[['equity_value']]])
            #if k == date_idx[0]:
            unrealized1 = pd.concat([unrealized1, df[['unrealized']]])
            #else:
            #    unrealized_1 = pd.concat([unrealized_1, df['unrealized'] + unrealized_1.iloc[len(unrealized_1) - 1]])
        #concat
        equity_value_1 = pd.concat([equity_value_1, np.cumsum(equity_value1)], axis = 1)
        #each_return1 = pd.concat([each_return1, equity_value1], axis = 1)
    
        unrealized_1 = pd.concat([unrealized_1, unrealized1], axis = 1)
    return equity_value_1, unrealized_1, transaction_history1

#unrealized return
def unrealized(equity_value_1, unrealized_1):
    idx = []
    for i in range(len(equity_value_1) - 1):
        if (equity_value_1.iloc[i] != equity_value_1.iloc[i+1]) and (equity_value_1.iloc[i+1] != -0.0015):
            idx.append(i+1)
    unrealized_return = pd.DataFrame()
    for i in range(len(idx)):
        if i == 0:
            unrealized_1.iloc[[idx[i]]] = equity_value_1.iloc[[idx[i]]]
            unrealized_return = pd.concat([unrealized_return, np.cumsum(unrealized_1.iloc[:idx[i]])])
        else:
            unrealized_1.iloc[[idx[i]]] = equity_value_1.iloc[[idx[i]]]
            unrealized_return = pd.concat([unrealized_return, np.cumsum(unrealized_1.iloc[idx[i-1]:idx[i]])])
    return unrealized_return

In [4]:
data_train['Date'] = pd.to_datetime(data_train['TxTime'])
data_train['Date'] = [str(x)[0:10] for x in data_train['Date']]
data_test['Date'] = pd.to_datetime(data_test['TxTime'])
data_test['Date'] = [str(x)[0:10] for x in data_test['Date']]

In [5]:
X_in, data_train1 = AlphaGenerate_Tick(data_train,ma=True,macd=True,
                                      spread_pq=True,price_diff_pq=True,
                                      dt5_pq=True,dt10_pq=True,
                                      dt5q=True,dt10q=True,dt5p=True,dt10p=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/

In [6]:
#Y
tick_N = 10
y=(data_train1['Ask1']+data_train1['Bid1'])/2
y[data_train1['Bid1']==0]=data_train1['Ask1']
y[data_train1['Ask1']==0]=data_train1['Bid1']
y=y.shift(-tick_N)/y-1
y[data_train1['Date']!=data_train1['Date'].shift(-tick_N)]=0.
y.fillna(0,inplace=True)
y.replace([np.inf, -np.inf], 0,inplace=True)
#X_in_normal['return_10t'] = y
X_in['return_10t'] = y
X_in_normal = normalize(X_in)

In [7]:
#test set
X_out, data_test1 = AlphaGenerate_Tick(data_test,ma=True,macd=True,
                                      spread_pq=True,price_diff_pq=True,
                                      dt5_pq=True,dt10_pq=True,
                                      dt5q=True,dt10q=True,dt5p=True,dt10p=True)
#X_out_normal = normalize(X_out.iloc[:,34:])
#Y
y=(data_test1['Ask1']+data_test1['Bid1'])/2
y[data_test1['Bid1']==0]=data_test1['Ask1']
y[data_test1['Ask1']==0]=data_test1['Bid1']
y=y.shift(-tick_N)/y-1
y[data_test1['Date']!=data_test1['Date'].shift(-tick_N)]=0.
y.fillna(0,inplace=True)
y.replace([np.inf, -np.inf], 0,inplace=True)
#X_out_normal['return_10t'] = y
X_out['return_10t'] = y
X_out_normal = normalize(X_out)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

In [8]:
train_pred_return = pd.read_csv('./data/train_2327_mix1.csv')
test_pred_return  = pd.read_csv('./data/test_2327_mix1.csv')

In [9]:
train_pred_return = train_pred_return.drop(['Unnamed: 0'], axis = 1)
train_pred_return.columns = ['pred_return']
train_pred_return

Unnamed: 0,pred_return
0,0.000323
1,0.000084
2,-0.000058
3,-0.000039
4,-0.000085
...,...
736299,0.000070
736300,0.000051
736301,0.000033
736302,0.000027


In [10]:
test_pred_return = test_pred_return.drop(['Unnamed: 0'], axis = 1)
test_pred_return.columns = ['pred_return']
test_pred_return

Unnamed: 0,pred_return
0,-0.000162
1,-0.000256
2,-0.000278
3,-0.000264
4,-0.000201
...,...
337685,0.000030
337686,-0.000015
337687,-0.000178
337688,-0.000180


In [11]:
df_train = X_in.iloc[10:]
df_train = df_train.reset_index(drop = True)
df_train = pd.concat([df_train, train_pred_return], axis = 1)
df_train['Date'] = data_train1['Date'].iloc[10:].values
df_train['Ask1'] = data_train1['Ask1'].iloc[10:].values
df_train['Bid1'] = data_train1['Bid1'].iloc[10:].values
df_train['mid1'] = data_train1['mid1'].iloc[10:].values

In [12]:
df_test = X_out.iloc[10:]
df_test = df_test.reset_index(drop = True)
df_test = pd.concat([df_test, test_pred_return], axis = 1)
df_test['Date'] = data_test1['Date'].iloc[10:].values
df_test['Ask1'] = data_test1['Ask1'].iloc[10:].values
df_test['Bid1'] = data_test1['Bid1'].iloc[10:].values
df_test['mid1'] = data_test1['mid1'].iloc[10:].values

In [13]:
df_test

Unnamed: 0,MA50_ask1,MA50_ask2,MA50_ask3,MA50_ask4,MA50_ask5,MA50_bid1,MA50_bid2,MA50_bid3,MA50_bid4,MA50_bid5,...,dbq2_dt_10,dbq3_dt_10,dbq4_dt_10,dbq5_dt_10,return_10t,pred_return,Date,Ask1,Bid1,mid1
0,-878.08,-1978.56,0.00,0.00,0.00,3370.20,-1470.60,0.00,-68.20,-156.63,...,5.0,0.0,0.0,0.0,-0.000729,-0.000162,2020-04-24,343.0,342.5,342.75
1,-1015.28,-1930.47,0.00,0.00,0.00,3301.70,-1771.56,0.00,-61.38,190.68,...,6.0,0.0,0.0,-1.0,-0.000729,-0.000256,2020-04-24,343.0,342.5,342.75
2,-1152.48,-1882.38,0.00,0.00,0.00,3226.35,-1730.52,0.00,279.62,197.49,...,6.0,0.0,-1.0,-1.0,-0.000729,-0.000278,2020-04-24,343.0,342.5,342.75
3,-1310.26,-1834.29,0.00,0.00,0.00,-21015.80,-1689.48,0.00,279.62,204.30,...,6.0,0.0,-1.0,-1.0,-0.000729,-0.000264,2020-04-24,343.0,342.5,342.75
4,-1269.10,-1786.20,0.00,0.00,0.00,-7489.47,2723.34,-9008.16,11986.70,-33764.51,...,-12.0,27.0,-36.0,101.0,0.000000,-0.000201,2020-04-24,343.0,342.0,342.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337685,-24879.72,19197.40,-2065.44,-2969.03,-8518.65,3493.20,0.00,-24.54,-261.44,0.00,...,0.0,0.0,0.0,0.0,0.000000,0.000030,2020-05-05,411.0,410.0,410.50
337686,-24476.69,18835.98,-2032.19,-2910.96,-8344.80,58088.31,-41402.06,19206.34,-23112.26,37572.71,...,103.0,-48.0,57.0,-94.0,0.000000,-0.000015,2020-05-05,411.0,410.5,410.75
337687,-23679.09,18474.56,-1998.94,-2852.89,-8170.95,2279.61,844.94,-400.48,221.41,-766.79,...,0.0,0.0,0.0,0.0,0.000000,-0.000178,2020-05-05,411.0,410.0,410.50
337688,5708.16,-418.44,873.37,-353.15,521.55,2230.41,844.94,-392.30,229.58,-766.79,...,0.0,0.0,0.0,0.0,0.000000,-0.000180,2020-05-05,410.5,410.0,410.25


In [14]:
df_train

Unnamed: 0,MA50_ask1,MA50_ask2,MA50_ask3,MA50_ask4,MA50_ask5,MA50_bid1,MA50_bid2,MA50_bid3,MA50_bid4,MA50_bid5,...,dbq2_dt_10,dbq3_dt_10,dbq4_dt_10,dbq5_dt_10,return_10t,pred_return,Date,Ask1,Bid1,mid1
0,-1529.15,-254.96,2677.03,-6359.55,2317.59,-963.80,329.17,-133.92,2865.60,-4483.86,...,0.0,0.0,0.0,1.0,0.002103,0.000323,2020-03-23,238.5,237.0,237.75
1,-1495.52,-230.67,2610.51,-6196.14,2241.20,-532.26,277.33,-138.47,2621.36,-4136.04,...,0.0,0.0,0.0,1.0,-0.001052,0.000084,2020-03-23,238.5,237.0,237.75
2,-1461.89,-440.60,2543.99,-6032.73,2164.81,-579.45,225.49,-143.02,2377.12,-3788.22,...,0.0,0.0,0.0,0.0,-0.001052,-0.000058,2020-03-23,238.5,237.0,237.75
3,1601.73,532.70,-1033.87,2835.04,-6847.71,-626.64,173.65,-147.57,2132.88,-3670.70,...,0.0,0.0,0.0,1.0,0.000000,-0.000039,2020-03-23,238.0,237.0,237.50
4,1802.32,3581.97,-84.50,-690.53,1962.63,-673.83,121.81,-152.12,1888.64,-3318.18,...,0.0,0.0,0.0,2.0,0.002107,-0.000085,2020-03-23,237.5,237.0,237.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
736299,34.50,2660.35,553.60,0.00,728.70,268.71,-976.96,-178.62,-603.68,0.00,...,1.0,0.0,1.0,0.0,0.000000,0.000070,2020-04-23,345.0,344.5,344.75
736300,34.50,2584.34,525.92,0.00,700.94,-179.14,-928.80,-171.75,-583.10,0.00,...,0.0,0.0,1.0,0.0,0.000000,0.000051,2020-04-23,345.0,344.5,344.75
736301,34.50,2508.33,166.08,0.00,673.18,-282.49,-880.64,-164.88,-562.52,0.00,...,0.0,0.0,1.0,0.0,0.000000,0.000033,2020-04-23,345.0,344.5,344.75
736302,34.50,2093.73,152.24,0.00,645.42,-289.38,-832.48,-158.01,-541.94,0.00,...,0.0,0.0,1.0,0.0,0.000000,0.000027,2020-04-23,345.0,344.5,344.75


In [15]:
#已實現損益
df_train['equity_value'] = pd.DataFrame(data = np.zeros((len(df_train), 1)), index = df_train.index[:])
#未實現損益
df_train['unrealized'] = pd.DataFrame(data = np.zeros((len(df_train), 1)), index = df_train.index[:])
df_train['holding'] = pd.DataFrame(data = np.zeros((len(df_train), 1)), index = df_train.index[:])
#已實現損益
df_test['equity_value'] = pd.DataFrame(data = np.zeros((len(df_test), 1)), index = df_test.index[:])
#未實現損益
df_test['unrealized'] = pd.DataFrame(data = np.zeros((len(df_test), 1)), index = df_test.index[:])
df_test['holding'] = pd.DataFrame(data = np.zeros((len(df_test), 1)), index = df_test.index[:])

In [16]:
#setting parameters
import itertools 
long_side, short_side= [0.001, 0.0013, 0.0015], [-0.001, -0.0013, -0.0015]
paras = list(itertools.product(long_side, short_side))
paras

[(0.001, -0.001),
 (0.001, -0.0013),
 (0.001, -0.0015),
 (0.0013, -0.001),
 (0.0013, -0.0013),
 (0.0013, -0.0015),
 (0.0015, -0.001),
 (0.0015, -0.0013),
 (0.0015, -0.0015)]

In [17]:
equity_value_1, unrealized_1, transaction_history1 = strategy(df_train, paras)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

In [18]:
equity_value_1.columns = paras

In [19]:
equity_value_1.iloc[-1]

(0.001, -0.001)     -0.438149
(0.001, -0.0013)    -0.131138
(0.001, -0.0015)    -0.012558
(0.0013, -0.001)    -0.282503
(0.0013, -0.0013)   -0.088115
(0.0013, -0.0015)    0.003774
(0.0015, -0.001)    -0.216227
(0.0015, -0.0013)   -0.079508
(0.0015, -0.0015)   -0.020943
Name: 736303, dtype: float64

In [20]:
performance(equity_value_1[[(0.0013, -0.0015)]], transaction_history1[(0.0013, -0.0015)])

return: 0.0037737286534609174
std: 0.008048206028795564
sharp ratio: -0.17721605801790438
trades: 31.0
odds ratio: 0.45161290322580644
mdd: 0.02889563878534064
skewness: 0.2360253838722485


(0.0037737286534609174,
 0.008048206028795564,
 -0.17721605801790438,
 31.0,
 0.45161290322580644,
 0.02889563878534064,
 0.2360253838722485)

In [21]:
np.sum(df_test['pred_return'] > 0.0013)

3

In [22]:
np.sum(df_test['pred_return'] < -0.0015)

0

In [23]:
profit_col = [(0.0013, -0.0015)]
equity_value_2, unrealized_2, transaction_history2 = strategy(df_test, profit_col)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

In [24]:
equity_value_2

Unnamed: 0,equity_value
0,0.000000
1,0.000000
2,0.000000
3,0.000000
4,0.000000
...,...
337685,-0.005336
337686,-0.005336
337687,-0.005336
337688,-0.005336
