# References

* http://www.sciencedirect.com/science/article/pii/S0925231203003722
* http://ac.els-cdn.com/S0957417400000270/1-s2.0-S0957417400000270-main.pdf?_tid=3a06fc62-1d5b-11e6-877f-00000aab0f27&acdnat=1463619013_cba9f7ee840313639128ce15571f73ac
* Technical Analysis of Stock Trends, Robert D. Edwards and John Magee
* https://www.jbs.cam.ac.uk/fileadmin/user_upload/research/workingpapers/wp0030.pdf
* http://www.sciencedirect.com/science/article/pii/0261560692900483
* https://www.quantopian.com/posts/technical-analysis-indicators-without-talib-code

In [250]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

%load_ext version_information
%version_information numpy, pandas

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The version_information extension is already loaded. To reload it, use:
  %reload_ext version_information


Software,Version
Python,3.5.1 64bit [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
IPython,4.1.2
OS,Linux 3.13.0 86 generic x86_64 with debian jessie sid
numpy,1.10.4
pandas,0.18.0
Fri May 20 18:14:45 2016 BST,Fri May 20 18:14:45 2016 BST


In [48]:
%matplotlib inline

import ast
import numpy as np
import pandas as pd

np.set_printoptions(threshold=np.nan)

# Technical Indicator Functions

In [241]:
def stoch_K(close, window):
    '''Calculates the fast stochastic oscillator %K.
    
    Input:
    close  -- DataFrame to calculate on
    window -- Size of the window
    
    Output: 
    %K -- double
    '''
    
    low = close.rolling(window, center = False).min()
    high = close.rolling(window, center = False).max()
    
    return 100 * (close - low) / (high - low)

def stoch_D(K, window):
    '''Calculates the stochastic oscillator %D.
    %D is the moving average of %K.
    
    Input:
    close  -- DataFrame to calculate on
    window -- Size of the window
    
    Output: 
    %D -- double
    '''
    
    return K.rolling(window, center = False).mean()

def slow_D(D, window):
    '''Calculates the slow stochastic oscillator %D.
    Slow %D is the moving average of %D.
    
    Input:
    close  -- DataFrame to calculate on
    window -- Size of the window
    
    Output: 
    Slow %D -- double
    '''
    
    return D.rolling(window, center = False).mean()

def momentum(close, window):
    '''Calculates the momentum.
    
    Input:
    close  -- DataFrame to calculate on
    window -- Size of the window
    
    Output: 
    Momentum -- double
    '''
    
    dif = lambda x: x[-1] - x[0]
    
    return close.rolling(window, center = False).apply(dif)

def roc(close, window):
    ratio = lambda x: x[-1] / x[0]
    
    return 100 * close.rolling(window, center = False).apply(ratio)

def lw_R(close, window):
    low = close.rolling(window, center = False).min()
    high = close.rolling(window, center = False).max()
    
    return 100 * (high - close) / (high - low)
    
def ad_osc(close, window):
    low = close.rolling(window, center = False).min()
    high = close.rolling(window, center = False).max()
    prev_close = close.rolling(window, center = False).apply(lambda x: x[-2])
    
    return (high - prev_close) / (high - low)

def disp(close, window):
    MA = close.rolling(window, center = False).mean()
    
    return 100 * close / MA

def oscp(close, window1, window2):
    MA1 = close.rolling(window1, center = False).mean()
    MA2 = close.rolling(window2, center = False).mean()
    
    return (MA1 - MA2) / MA1

def rsi(close, window):
    up, down = close.diff().copy(), close.diff().copy()
    up[up < 0] = 0
    down[down > 0] = 0
    
    RS = up.rolling(window, center = False).mean() / down.rolling(window, center = False).mean().abs()
    
    return 100 - (100 / (1 + RS))

def cci(close, window):
    low = close.rolling(window, center = False).min()
    high = close.rolling(window, center = False).max()
    
    MT = (close + low + high) / 3
    SMT = MT.rolling(window, center = False).mean()
    DT = MT.rolling(window, center = False).std()
    
    return (MT - SMT) / DT

# Feature Engineering of Feature Set 1 - simple price and volume features

In [158]:
lob_data = pd.read_csv('../btc-data/BTC_LOB_collected.csv')

In [159]:
lob_features10 = pd.DataFrame(lob_data)
lob_features10.set_index(lob_data['Unnamed: 0'], inplace = True)
lob_features10.drop('Unnamed: 0', axis = 1, inplace = True)
lob_features10.index = pd.to_datetime(lob_features10.index)
lob_features10['asks'] = lob_features10['asks'].map(ast.literal_eval)
lob_features10['bids'] = lob_features10['bids'].map(ast.literal_eval)

In [160]:
lob_features10['total ask volume'] = lob_features10['asks'].map(lambda x: sum(x.values()))
lob_features10['total bid volume'] = lob_features10['bids'].map(lambda x: sum(x.values()))
lob_features10['ask price'] = lob_features10['asks'].map(min)
lob_features10['bid price'] = lob_features10['bids'].map(max)

In [161]:
lob_features10['bid-ask spread'] = lob_features10['ask price'] - lob_features10['bid price']
lob_features10['mid price'] = (lob_features10['ask price'] + lob_features10['bid price'])/2
lob_features10['ask price spread'] = lob_features10['asks'].map(max) - lob_features10['ask price']
lob_features10['bid price spread'] = lob_features10['bid price'] - lob_features10['bids'].map(min)
lob_features10['mean ask volume'] = lob_features10['total ask volume'] / 20
lob_features10['mean bid volume'] = lob_features10['total bid volume'] / 20
lob_features10['mean ask price'] = lob_features10['asks'].map(sum) / 20
lob_features10['mean bid price'] = lob_features10['bids'].map(sum) / 20

In [162]:
lob_features10

Unnamed: 0_level_0,asks,bids,total ask volume,total bid volume,ask price,bid price,bid-ask spread,mid price,ask price spread,bid price spread,mean ask volume,mean bid volume,mean ask price,mean bid price
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2016-01-01 00:00:00,"{432.84: 39.75, 432.36: 6.9631, 432.83: 9.2466...","{429.31: 3.88, 429.33: 39.75, 429.47: 32.0333,...",318.632799,387.595528,430.89,429.84,1.05,430.365,2.11,1.52,15.931640,19.379776,432.1405,429.0090
2016-01-01 00:00:10,"{432.88: 41.73, 432.48: 13.8492, 432.99: 9.257...","{429.38: 16.114, 429.2: 48.9702, 429.33: 39.75...",318.629099,323.428038,430.89,429.84,1.05,430.365,2.11,1.19,15.931455,16.171402,432.1875,429.1115
2016-01-01 00:00:20,"{432.64: 6.9491, 432.52: 1.17, 432.84: 39.75, ...","{429.41: 9.2521, 429.33: 39.75, 429.01: 14.836...",290.919269,354.135003,430.89,429.86,1.03,430.375,2.44,1.54,14.545963,17.706750,432.2465,429.0100
2016-01-01 00:00:30,"{432.11: 4.635, 432.36: 6.9556000000000004, 43...","{428.65: 0.02799486, 429.33: 39.75, 429.35: 16...",232.153855,172.031328,430.89,429.89,1.00,430.390,2.80,1.56,11.607693,8.601566,432.3350,429.0750
2016-01-01 00:00:40,"{432.11: 4.635, 432.36: 6.9556000000000004, 43...","{429.49: 32.0333, 428.65: 0.02799486, 428.83: ...",290.952369,322.855038,430.89,429.90,0.99,430.395,2.44,1.25,14.547618,16.142752,432.2360,429.1115
2016-01-01 00:00:50,"{432.11: 4.635, 432.36: 6.9556000000000004, 43...","{428.65: 0.02799486, 429.45: 39.75, 429.28: 0....",290.952369,287.730638,430.89,429.90,0.99,430.395,2.44,1.25,14.547618,14.386532,432.2360,429.1010
2016-01-01 00:01:00,"{432.11: 4.635, 432.36: 6.9556000000000004, 43...","{428.65: 0.02799486, 429.2: 48.9764, 429.01: 1...",290.871738,374.632128,430.89,429.90,0.99,430.395,2.44,1.58,14.543587,18.731606,432.2360,428.9660
2016-01-01 00:01:10,"{432.11: 4.635, 432.35: 6.9594000000000005, 43...","{429.45: 39.75, 429.47: 14.916, 428.65: 0.0279...",290.875538,322.244428,430.89,429.90,0.99,430.395,2.44,1.55,14.543777,16.112221,432.2355,429.0825
2016-01-01 00:01:20,"{432.11: 4.635, 432.35: 6.8916, 433.0: 0.1, 43...","{429.02: 29.13, 429.91: 32.0333, 429.47: 2.35,...",279.887738,309.666428,430.89,429.91,0.98,430.400,2.32,1.56,13.994387,15.483321,432.2295,429.1055
2016-01-01 00:01:30,"{432.95: 13.8492, 432.37: 6.913, 433.17: 1.307...","{429.02: 29.13, 429.45: 39.75, 429.01: 14.836,...",285.901138,307.380028,430.89,429.92,0.97,430.405,2.44,1.57,14.295057,15.369001,432.3270,429.0700


In [163]:
lob_features10.drop(['asks', 'bids'], axis = 1, inplace=True)
lob_features10.to_csv(path_or_buf='../btc-data/BTC_LOB_simple_10s.csv')

In [164]:
lob_features30 = lob_features10.reindex(pd.date_range(start = lob_features10.index[0],
                                                      end = lob_features10.index[-1], freq='30s'))

lob_features60 = lob_features10.reindex(pd.date_range(start = lob_features10.index[0],
                                                      end = lob_features10.index[-1], freq='60s'))

lob_features300 = lob_features10.reindex(pd.date_range(start = lob_features10.index[0],
                                                       end = lob_features10.index[-1], freq='300s'))

lob_features600 = lob_features10.reindex(pd.date_range(start = lob_features10.index[0],
                                                       end = lob_features10.index[-1], freq='600s'))

In [171]:
lob_features30

Unnamed: 0,total ask volume,total bid volume,ask price,bid price,bid-ask spread,mid price,ask price spread,bid price spread,mean ask volume,mean bid volume,mean ask price,mean bid price
2016-01-01 00:00:00,318.632799,387.595528,430.89,429.84,1.05,430.365,2.11,1.52,15.931640,19.379776,432.1405,429.0090
2016-01-01 00:00:30,232.153855,172.031328,430.89,429.89,1.00,430.390,2.80,1.56,11.607693,8.601566,432.3350,429.0750
2016-01-01 00:01:00,290.871738,374.632128,430.89,429.90,0.99,430.395,2.44,1.58,14.543587,18.731606,432.2360,428.9660
2016-01-01 00:01:30,285.901138,307.380028,430.89,429.92,0.97,430.405,2.44,1.57,14.295057,15.369001,432.3270,429.0700
2016-01-01 00:02:00,312.929338,327.741478,430.89,429.95,0.94,430.420,2.28,1.30,15.646467,16.387074,432.3600,429.1935
2016-01-01 00:02:30,312.832581,326.718378,430.89,430.05,0.84,430.470,2.28,1.40,15.641629,16.335919,432.3450,429.2110
2016-01-01 00:03:00,279.716681,336.122423,430.89,430.09,0.80,430.490,2.28,1.41,13.985834,16.806121,432.3365,429.2315
2016-01-01 00:03:30,279.744681,306.098773,430.89,430.14,0.75,430.515,2.28,1.44,13.987234,15.304939,432.3665,429.3355
2016-01-01 00:04:00,279.871681,295.116008,430.89,430.17,0.72,430.530,2.33,1.52,13.993584,14.755800,432.3890,429.2450
2016-01-01 00:04:30,231.934879,179.662946,430.89,430.19,0.70,430.540,2.61,2.07,11.596744,8.983147,432.4435,428.9195


In [172]:
lob_features60

Unnamed: 0,total ask volume,total bid volume,ask price,bid price,bid-ask spread,mid price,ask price spread,bid price spread,mean ask volume,mean bid volume,mean ask price,mean bid price
2016-01-01 00:00:00,318.632799,387.595528,430.89,429.84,1.05,430.365,2.11,1.52,15.931640,19.379776,432.1405,429.0090
2016-01-01 00:01:00,290.871738,374.632128,430.89,429.90,0.99,430.395,2.44,1.58,14.543587,18.731606,432.2360,428.9660
2016-01-01 00:02:00,312.929338,327.741478,430.89,429.95,0.94,430.420,2.28,1.30,15.646467,16.387074,432.3600,429.1935
2016-01-01 00:03:00,279.716681,336.122423,430.89,430.09,0.80,430.490,2.28,1.41,13.985834,16.806121,432.3365,429.2315
2016-01-01 00:04:00,279.871681,295.116008,430.89,430.17,0.72,430.530,2.33,1.52,13.993584,14.755800,432.3890,429.2450
2016-01-01 00:05:00,217.439947,342.194493,431.06,430.19,0.87,430.625,2.74,1.50,10.871997,17.109725,432.6220,429.2145
2016-01-01 00:06:00,361.630784,294.113648,432.40,430.54,1.86,431.470,1.56,1.71,18.081539,14.705682,433.3085,429.6675
2016-01-01 00:07:00,347.402014,395.102693,432.52,431.46,1.06,431.990,1.45,2.11,17.370101,19.755135,433.3250,430.2590
2016-01-01 00:08:00,347.029411,338.648408,432.53,431.46,1.07,431.995,1.44,2.10,17.351471,16.932420,433.3880,430.4625
2016-01-01 00:09:00,414.224478,267.615308,432.53,431.46,1.07,431.995,1.43,2.45,20.711224,13.380765,433.3280,430.1980


In [173]:
lob_features300

Unnamed: 0,total ask volume,total bid volume,ask price,bid price,bid-ask spread,mid price,ask price spread,bid price spread,mean ask volume,mean bid volume,mean ask price,mean bid price
2016-01-01 00:00:00,318.632799,387.595528,430.89,429.84,1.05,430.365,2.11,1.52,15.931640,19.379776,432.1405,429.0090
2016-01-01 00:05:00,217.439947,342.194493,431.06,430.19,0.87,430.625,2.74,1.50,10.871997,17.109725,432.6220,429.2145
2016-01-01 00:10:00,414.543886,305.888470,432.41,431.46,0.95,431.935,1.49,2.53,20.727194,15.294424,433.1320,430.1110
2016-01-01 00:15:00,183.180919,222.309648,432.38,431.46,0.92,431.920,1.49,2.67,9.159046,11.115482,433.0830,430.0345
2016-01-01 00:20:00,311.297511,398.569889,432.42,431.46,0.96,431.940,1.47,2.45,15.564876,19.928494,433.2915,430.2975
2016-01-01 00:25:00,419.611941,434.477720,432.40,431.46,0.94,431.930,1.52,2.44,20.980597,21.723886,433.2520,430.1780
2016-01-01 00:30:00,234.429204,264.975279,432.39,431.46,0.93,431.925,1.41,2.45,11.721460,13.248764,433.0735,430.3140
2016-01-01 00:35:00,427.122203,306.567417,432.40,430.45,1.95,431.425,1.50,1.62,21.356110,15.328371,433.1205,429.7195
2016-01-01 00:40:00,248.873168,253.628860,431.11,430.09,1.02,430.600,2.35,1.41,12.443658,12.681443,432.6055,429.3755
2016-01-01 00:45:00,216.691350,225.566000,431.02,430.46,0.56,430.740,2.77,1.67,10.834567,11.278300,432.7370,429.4195


In [174]:
lob_features600

Unnamed: 0,total ask volume,total bid volume,ask price,bid price,bid-ask spread,mid price,ask price spread,bid price spread,mean ask volume,mean bid volume,mean ask price,mean bid price
2016-01-01 00:00:00,318.632799,387.595528,430.89,429.84,1.05,430.365,2.11,1.52,15.931640,19.379776,432.1405,429.0090
2016-01-01 00:10:00,414.543886,305.888470,432.41,431.46,0.95,431.935,1.49,2.53,20.727194,15.294424,433.1320,430.1110
2016-01-01 00:20:00,311.297511,398.569889,432.42,431.46,0.96,431.940,1.47,2.45,15.564876,19.928494,433.2915,430.2975
2016-01-01 00:30:00,234.429204,264.975279,432.39,431.46,0.93,431.925,1.41,2.45,11.721460,13.248764,433.0735,430.3140
2016-01-01 00:40:00,248.873168,253.628860,431.11,430.09,1.02,430.600,2.35,1.41,12.443658,12.681443,432.6055,429.3755
2016-01-01 00:50:00,254.093468,239.650073,431.05,430.16,0.89,430.605,2.64,1.81,12.704673,11.982504,432.7790,429.2280
2016-01-01 01:00:00,280.695908,189.337964,431.05,429.80,1.25,430.425,2.12,1.46,14.034795,9.466898,432.2610,429.0650
2016-01-01 01:10:00,254.614361,360.892833,431.35,429.43,1.92,430.390,2.45,1.11,12.730718,18.044642,432.5795,428.8460
2016-01-01 01:20:00,248.382690,304.781962,431.38,430.82,0.56,431.100,2.31,2.14,12.419135,15.239098,432.5520,429.2240
2016-01-01 01:30:00,257.805188,214.429454,431.23,429.80,1.43,430.515,2.57,1.50,12.890259,10.721473,432.4005,428.8950


In [175]:
lob_features30.to_csv(path_or_buf='../btc-data/BTC_LOB_simple_30s.csv')
lob_features60.to_csv(path_or_buf='../btc-data/BTC_LOB_simple_60s.csv')
lob_features300.to_csv(path_or_buf='../btc-data/BTC_LOB_simple_300s.csv')
lob_features600.to_csv(path_or_buf='../btc-data/BTC_LOB_simple_600s.csv')

# Feature Engineering of Feature Set 2 - better technical indicators

In [274]:
lob_techind10 = pd.DataFrame(lob_features10['mid price'].copy(), index = lob_features10.index)
lob_techind10['B-ASPREAD'] = lob_features10['bid-ask spread'].copy()

lob_techind30 = pd.DataFrame(lob_features30['mid price'].copy(), index = lob_features30.index)
lob_techind30['B-ASPREAD'] = lob_features30['bid-ask spread'].copy()

lob_techind60 = pd.DataFrame(lob_features60['mid price'].copy(), index = lob_features60.index)
lob_techind60['B-ASPREAD'] = lob_features60['bid-ask spread'].copy()

lob_techind300 = pd.DataFrame(lob_features300['mid price'].copy(), index = lob_features300.index)
lob_techind300['B-ASPREAD'] = lob_features300['bid-ask spread'].copy()

lob_techind600 = pd.DataFrame(lob_features600['mid price'].copy(), index = lob_features600.index)
lob_techind600['B-ASPREAD'] = lob_features600['bid-ask spread'].copy()

In [275]:
def generate_features(frame, freq):
    close = frame['mid price']
    frame['K360'] = stoch_K(close, 360)
    frame['K180'] = stoch_K(close, 180)
    frame['K60'] = stoch_K(close, 60)
    frame['D360'] = stoch_D(frame['K360'], 360)
    frame['D180'] = stoch_D(frame['K180'], 180)
    frame['D60'] = stoch_D(frame['K60'], 60)
    frame['sD360'] = slow_D(frame['D360'], 360)
    frame['sD180'] = slow_D(frame['D180'], 180)
    frame['sD60'] = slow_D(frame['D60'], 60)
    frame['MOM360'] = momentum(close, 360)
    frame['MOM180'] = momentum(close, 180)
    frame['MOM60'] = momentum(close, 60)
    frame['ROC360'] = roc(close, 360)
    frame['ROC180'] = roc(close, 180)
    frame['ROC60'] = roc(close, 60)
    frame['LWR360'] = lw_R(close, 360)
    frame['LWR180'] = lw_R(close, 180)
    frame['LWR60'] = lw_R(close, 60)
    frame['ADOSC360'] = ad_osc(close, 360)
    frame['ADOSC180'] = ad_osc(close, 180)
    frame['ADOSC60'] = ad_osc(close, 60)
    frame['DISP360'] = disp(close, 360)
    frame['DISP180'] = disp(close, 180)
    frame['DISP60'] = disp(close, 60)
    frame['OSCP180-360'] = oscp(close, 180, 360)
    frame['OSCP60-180'] = oscp(close, 60, 180)
    frame['RSI360'] = rsi(close, 360)
    frame['RSI180'] = rsi(close, 180)
    frame['RSI60'] = rsi(close, 60)
    frame['CCI360'] = cci(close, 360)
    frame['CCI180'] = cci(close, 180)
    frame['CCI60'] = cci(close, 60)
    frame['DELTAP'] = close.diff()
    
    frame['mid price'] = frame['mid price'].shift(-1)
    frame['B-ASPREAD'] = frame['B-ASPREAD'].shift(-1)
    frame['DELTAP'] = frame['DELTAP'].shift(-1)
    frame.set_index(frame.index.shift(1, freq=freq), inplace = True)
    frame = frame[3*359:-1]
    
    return frame

In [276]:
lob_techind10 = generate_features(lob_techind10, '10s')
lob_techind10.replace([np.inf, -np.inf], np.nan, inplace = True)
lob_techind10.fillna(method='ffill', inplace = True)

In [277]:
lob_techind30 = generate_features(lob_techind30, '30s')
lob_techind30.replace([np.inf, -np.inf], np.nan, inplace = True)
lob_techind30.fillna(method='ffill', inplace = True)

In [278]:
lob_techind60 = generate_features(lob_techind60, '60s')
lob_techind60.replace([np.inf, -np.inf], np.nan, inplace = True)
lob_techind60.fillna(method='ffill', inplace = True)

In [279]:
lob_techind300 = generate_features(lob_techind300, '300s')
lob_techind300.replace([np.inf, -np.inf], np.nan, inplace = True)
lob_techind300.fillna(method='ffill', inplace = True)

In [280]:
lob_techind600 = generate_features(lob_techind600, '600s')
lob_techind600.replace([np.inf, -np.inf], np.nan, inplace = True)
lob_techind600.fillna(method='ffill', inplace = True)

In [281]:
lob_techind10.to_csv(path_or_buf='../btc-data/BTC_LOB_techind_10s.csv')
lob_techind30.to_csv(path_or_buf='../btc-data/BTC_LOB_techind_30s.csv')
lob_techind60.to_csv(path_or_buf='../btc-data/BTC_LOB_techind_60s.csv')
lob_techind300.to_csv(path_or_buf='../btc-data/BTC_LOB_techind_300s.csv')
lob_techind600.to_csv(path_or_buf='../btc-data/BTC_LOB_techind_600s.csv')