# Download Data

In [1]:
import pandas as pd
import twx
import pandas as pd
import numpy as np
import keras 
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.ar_model import AR
import matplotlib.pyplot as plt
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

# download data
tickers = ['FB', 'GOOG']
days = ['20170412']

data_for_corelation = pd.DataFrame()
for ticker in tickers:
    data_final = pd.DataFrame()
    for day in days:
        try:
            print('{} - {}'.format(ticker, day))

            data = twx.bookquery(
                 day, 
                 ticker, 
                'time: 9:30am 4pm 1s', 
                'direct.mid')
            data_final = data_final.append(data)
        except Exception as e:
            print(str(e))
    data_for_corelation[ticker] = data_final['direct.mid']
    data_for_corelation.to_csv('tech_stock_data_1day.csv')

Using TensorFlow backend.


FB - 20170412
GOOG - 20170412


# Train LSTM RNN

In [3]:
stock1 = 'FB'
stock2 = 'GOOG'
data = pd.read_csv('tech_stock_data_1day.csv')

def regress(returns1,returns2):
    x = np.asarray(returns1).reshape(-1,1)
    y = np.asarray(returns2).reshape(-1,1)
    model = LinearRegression()
    model.fit(x,y)
    a = model.intercept_[0]
    b = model.coef_[0,0]
    residuals = y-model.predict(x)
    return residuals, a,b

def returns(midprices):
    return np.diff(midprices, axis=-1)/midprices[:-1]

l1 = list(data[stock1])
l2 = list(data[stock2])
dataset = regress(l1,l2)[0]

# split into train and test sets
train_size = int(len(dataset) * 0.67)
val_size = int(len(dataset) * 0.10)
test_size = len(dataset) - train_size - val_size

train, val, test = dataset[0:train_size,:],dataset[train_size:train_size+val_size,:],  dataset[train_size+val_size:len(dataset),:]
print(len(train), len(val), len(test))

# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=10):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

# reshape into X=t and Y=t+1
look_back = 10
trainX, trainY = create_dataset(train, look_back)
valX, valY = create_dataset(val, look_back)
testX, testY = create_dataset(test, look_back)

# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
valX = np.reshape(valX, (valX.shape[0], 1, valX.shape[1]))


# create and fit the LSTM network
model = Sequential()
model.add(LSTM(25, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, validation_data=([valX, valY]), epochs=25, batch_size=128, verbose=2)


# make predictions
import sklearn
testPredict = model.predict(testX)
test_mse = sklearn.metrics.mean_squared_error(testY, testPredict)
train_mse = sklearn.metrics.mean_squared_error(trainY, model.predict(trainX))
val_mse = sklearn.metrics.mean_squared_error(valY, model.predict(valX))

print('Train MSE: {}'.format(train_mse))
print('Val MSE: {}'.format(val_mse))
print('Test MSE: {}'.format(test_mse))

(15678, 2340, 5382)
Train on 15667 samples, validate on 2329 samples
Epoch 1/25
 - 1s - loss: 0.1704 - val_loss: 1.1661
Epoch 2/25
 - 0s - loss: 0.0152 - val_loss: 0.3560
Epoch 3/25
 - 0s - loss: 0.0082 - val_loss: 0.2458
Epoch 4/25
 - 0s - loss: 0.0066 - val_loss: 0.2037
Epoch 5/25
 - 0s - loss: 0.0057 - val_loss: 0.1776
Epoch 6/25
 - 0s - loss: 0.0051 - val_loss: 0.1425
Epoch 7/25
 - 0s - loss: 0.0045 - val_loss: 0.1250
Epoch 8/25
 - 0s - loss: 0.0040 - val_loss: 0.1114
Epoch 9/25
 - 0s - loss: 0.0037 - val_loss: 0.1037
Epoch 10/25
 - 0s - loss: 0.0034 - val_loss: 0.0905
Epoch 11/25
 - 0s - loss: 0.0032 - val_loss: 0.0837
Epoch 12/25
 - 0s - loss: 0.0030 - val_loss: 0.0840
Epoch 13/25
 - 0s - loss: 0.0029 - val_loss: 0.0742
Epoch 14/25
 - 0s - loss: 0.0028 - val_loss: 0.0759
Epoch 15/25
 - 0s - loss: 0.0027 - val_loss: 0.0736
Epoch 16/25
 - 0s - loss: 0.0026 - val_loss: 0.0650
Epoch 17/25
 - 0s - loss: 0.0025 - val_loss: 0.0686
Epoch 18/25
 - 0s - loss: 0.0024 - val_loss: 0.0631
Epoc

In [6]:
# Show that simple prediction works!
arr = np.array([[[2.22424243, 2.2394794 , 2.2394794 , 2.22424243, 2.22424243,
        2.22424243, 2.22424243, 2.28924243, 2.28924243, 2.28924243]]])
model.predict(arr)[0][0]

2.1779106

In [None]:
%%time
import sys
from simulator import (
    Simulator, string_to_micro, micro_to_time,
    BUY, SELL, SHORT, EXCH_INET,
    BOOK_DEPTH1_PRICE, ORDER_EVENTS,
    )
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.ar_model import AR


class Ave_Lee(object):
    def __init__(self, session, date, tickers, start_time, end_time, model):
        self.session = session
        self.date = date
        self.tickers = tickers
        self.ticker1 = self.tickers[0]
        self.ticker2 = self.tickers[1]
        self.start_time = start_time
        self.end_time = end_time
        self.interval = string_to_micro("1s")
        self.state = 'NULL'
        
        # load RNN Model
        self.model = model # RNN
        print('test')
        arr = np.array([[[2.22424243, 2.2394794 , 2.2394794 , 2.22424243, 2.22424243,
        2.22424243, 2.22424243, 2.28924243, 2.28924243, 2.28924243]]])
        print(arr)
        
        # try SAME prediction as outside of loop
        #model.predict(arr)[0][0]
        print(self.model.predict(arr)[0][0])
        
        
        # variables for BUY or SELL
        self.side1 = 0
        self.side2 = 0
        # variables for order size
        self.order_size1 = 100
        self.order_size2 = 1
        # variables to keep track of total shares bought/sold and the corresponding amount of money
        self.buy_shares1 = 0
        self.buy_dollars = 0
        self.sell_shares1 = 0
        self.sell_dollars = 0
        self.buy_shares2 = 0
        self.sell_shares2 = 0
        # minimum increment in a bid
        self.tick_size = 10000
        
        # variables to keep track of how many positions we have opened and closed respectively
        self.open_longs = 0
        self.open_shorts = 0
        self.close_longs = 0
        self.close_shorts = 0 
        self.runs = 0
        
        # variables used for the fitOU, when to open/close a position and how far we look back
        self.dt = 1
        self.long_open = 1.25 #1.50 #1.25
        self.long_close = 0.50 #0.25 #0.50
        self.short_open = 1.25 #1.50 #1.25;
        self.short_close = 0.75 #0.50 #0.75
        self.training_size = 10
        
        # start timer/ call the start_callback function
        self.session.add_timer(self.start_time, self.start_callback)
        
        # list to store pnl every time we update it
        self.pnl = []
        # dictionary to store time, midprices and the returns each timestep
        self.results = {'time': []}
        for ticker in self.tickers:
            self.results[ticker] = []
            self.results['return {}'.format(ticker)] = []
        
        # subscribe to the tickers of interest, and set the first timer
        for ticker in self.tickers:
            self.session.subscribe_ticker_all_feeds(ticker)
    
    def start_callback(self, time):
        for ticker in self.tickers:
            self.session.subscribe_event(ticker, ORDER_EVENTS, self.event_callback)
        self.session.add_timer(time, self.timer_callback)
        
    
    def event_callback(self, ticker, event_params):
        # call the execution manager whenever we have an execution
        self.process_executions(event_params)
        
    def timer_callback(self, time):
        self.runs += 1
        self.results['time'].append(micro_to_time(time))
        # get the best bid and offer, compute the midmarket
        bid1, ask1 = self.session.get_inside_market(self.ticker1)
        bid2, ask2 = self.session.get_inside_market(self.ticker2)
        # append the midprices
        self.results[self.ticker1].append(self.get_midmarket(self.ticker1) / 1000000.0)
        self.results[self.ticker2].append(self.get_midmarket(self.ticker2) / 1000000.0)
        
        # start calculating returns after 1 second
        if time > self.start_time + 10**6:
            self.results['return {}'.format(self.ticker1)].append(np.float(self.returns(self.results[self.ticker1][-2:])))
            self.results['return {}'.format(self.ticker2)].append(np.float(self.returns(self.results[self.ticker2][-2:])))
        
        # start collecting signals after training_size * 1 second
        if time > self.start_time + self.training_size * 10**6:
            # collect the last training_size of returns
            returns1 = self.results['return {}'.format(self.ticker1)][-self.training_size:]
            returns2 = self.results['return {}'.format(self.ticker2)][-self.training_size:]
            # regress the returns and fit the residuals, calculate the s-score
            residuals, a,b = self.regress(returns1,returns2)
            kappa, m, sigma, sigmaeq = self.fitOU(residuals)
            
            print(residuals)
            #arr = np.array([[[1.47795598e-04,  1.77768732e-04,  2.41230408e-05, -6.10925309e-06,
            #                1.16484998e-04, -4.54248280e-04,  1.35327673e-04, -7.98121981e-05,
            #                2.77442353e-05, -8.90745451e-05]]])
            #print(self.model.predict(arr))
            #s = self.model.predict(np.asarray(residuals).reshape(1,1,10))[0][0]
            #print(s)
            
            s = self.sscore(m, sigmaeq)
            # find current net position (=0: neutral, <0: we are short asset 1, >0: we are long asset 1)
            pos = self.buy_shares1 - self.sell_shares1            
            # feature to check if we have orders at the market before we open a position
            orders = self.session.get_all_orders()
        
            if not orders:
                if pos == 0:
                    if s < -self.long_open:
                        self.side1 = BUY
                        self.side2 = SELL
                        price1 = ask1['price']# - self.tick_size
                        price2 = bid2['price']# + self.tick_size
                        # make the portfolio self financing by making sure we sell for as much as we buy
                        self.order_size2 = int(price1 * self.order_size1 / price2)
                        self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                        self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)
                        self.open_longs += 1
                        #print("open long")
                    elif s > self.short_open:
                        self.side1 = SELL
                        self.side2 = BUY
                        price1 = bid1['price']# + self.tick_size
                        price2 = ask2['price']# - self.tick_size
                        # make the portfolio self financing by making sure we buy for as much as we sell
                        self.order_size2 = int(price1 * self.order_size1 / price2)
                        self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                        self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)
                        self.open_shorts += 1
                        #print("open short")
                elif pos < 0 and s < self.short_close:
                    self.side1 = BUY
                    self.side2 = SELL
                    price1 = ask1['price']# - self.tick_size
                    price2 = bid2['price']# + self.tick_size
                    self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                    self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)
                    self.close_shorts += 1
                    #print("short close")
                elif pos > 0 and s > -self.long_close:
                    self.side1 = SELL
                    self.side2 = BUY
                    price1 = bid1['price']# + self.tick_size
                    price2 = ask2['price']# - self.tick_size
                    self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                    self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)
                    self.close_longs += 1
                    #print("long close")
        # update pnl every second to see how it evolves over the day            
        pnl = self.get_pnl()
        self.pnl.append(pnl / 1000000.0)
            
        # reset the timer unless we are done 
        if time < self.end_time:
            self.session.add_timer(time + self.interval, self.timer_callback) 
                
            
    def process_executions(self, evp):
        # make sure that we only update if we have executed any orders
        # when we want to add transaction costs we do it in this function
        if 'executed_orders' in evp:
            time = self.session.current_time()
            for ex in evp['executed_orders']:
                order = ex['order']
                side = order['side']
                ticker = order['ticker']
                if ticker == self.ticker1:
                    if side == 'B':
                        self.buy_shares1 += ex['quantity_executed']
                        #self.buy_dollars += ex['quantity_executed'] * ex['price_executed']
                        # buy in midmarker to check if spread is "eating" profits
                        self.buy_dollars += ex['quantity_executed'] * self.get_midmarket(ticker)
                    else:
                        self.sell_shares1 += ex['quantity_executed']
                        #self.sell_dollars += ex['quantity_executed'] * ex['price_executed']
                        # sell in midmarker to check if spread is "eating" profits
                        self.sell_dollars += ex['quantity_executed'] * self.get_midmarket(ticker)
                    pos = self.buy_shares1 - self.sell_shares1
                elif ticker == self.ticker2:
                    if side == 'B':
                        self.buy_shares2 += ex['quantity_executed']
                        #self.buy_dollars += ex['quantity_executed'] * ex['price_executed']
                        # buy in midmarker to check if spread is "eating" profits
                        self.buy_dollars += ex['quantity_executed'] * self.get_midmarket(ticker)
                    else:
                        self.sell_shares2 += ex['quantity_executed']
                        #self.sell_dollars += ex['quantity_executed'] * ex['price_executed']
                        # sell in midmarker to check if spread is "eating" profits
                        self.sell_dollars += ex['quantity_executed'] * self.get_midmarket(ticker)
                    pos = self.buy_shares2 - self.sell_shares2        
                pnl = self.get_pnl()
                #print "{0} {1} {quantity_executed} {price_executed} {liquidity} {2} {3}".format(time, side, pos, pnl, **ex)
                
    def create_dataset(dataset, look_back=10):
        dataX, dataY = [], []
        for i in range(len(dataset)-look_back-1):
            a = dataset[i:(i+look_back), 0]
            dataX.append(a)
            dataY.append(dataset[i + look_back, 0])
        return np.array(dataX), np.array(dataY)

    def get_midmarket(self, ticker):
        bid, ask = self.session.get_inside_market(ticker)
        return (bid['price'] + ask['price']) / 2.0

    def get_pnl(self):
        # mark to the midmarket
        mid1 = self.get_midmarket(self.ticker1)
        mid2 = self.get_midmarket(self.ticker2)
        pnl = self.sell_dollars - self.buy_dollars + (self.buy_shares1 - self.sell_shares1) * mid1 + (self.buy_shares2 - self.sell_shares2) * mid2
        return pnl
    
    def regress(self, returns1,returns2):
        x = np.asarray(returns1).reshape(-1,1)
        y = np.asarray(returns2).reshape(-1,1)
        model = LinearRegression()
        model.fit(x,y)
        a = model.intercept_[0]
        b = model.coef_[0,0]
        residuals = y-model.predict(x)
        return residuals, a,b
    
    def returns(self, midprices):
        return np.diff(midprices, axis=-1)/midprices[:-1]
    
    def fitOU(self, residual):
        ou = np.cumsum(residual)
        model = AR(ou)
        fittedmodel = model.fit(maxlag=1, disp=-1)  
        a = fittedmodel.params[0]
        b = fittedmodel.params[1]
        var =  fittedmodel.sigma2
        if b > 0.0 and b < np.exp(-2.0/self.training_size):
            kappa = -np.log(b) / self.dt    
            m = a / (1.0 - np.exp(-kappa * self.dt))
            sigma = np.sqrt(var * 2.0 * kappa / (1.0 - np.exp(-2.0 * kappa * self.dt)))
            sigmaeq = np.sqrt(var / (1.0 - np.exp(-2.0 * kappa * self.dt)));
            return kappa, m, sigma, sigmaeq
        else:
            return -1.0,0,0,0
    
    def sscore(self, m, sigmaeq):
        if sigmaeq != 0:
            return -m/sigmaeq
        elif m>0:
            return 10000000
        else:
            return -10000000
    
    def end(self):
        print("Total long opens: " + str(self.open_longs))
        print("Total short opens: " + str(self.open_shorts))
        print("Total long closes: " + str(self.close_longs))
        print("Total short closes: " + str(self.close_shorts))
        print('Runs: ' + str(self.runs))
        # plot the pnl
        plt.plot(np.asarray(self.pnl))
        plt.show()
        return self.get_pnl()

'''
tickers = ['GOOG', 'MSFT', 'AAPL', 'AMZN', 'NFLX', 'CSCO', 'FB']
for t1 in tickers:
    for t2 in tickers:
        if t1 == t2:
            continue
        print('####')
        print('{} and {}'.format(t1, t2))
        date = "20170413"
        tickers = [t1, t2]
        start_time = string_to_micro("9:30")
        end_time = string_to_micro("10:30")
        sim = Simulator(Ave_Lee)
        sim.run(date, tickers, use_om=True, start_time=start_time, end_time=end_time)
        print('####')
'''


date = "20170413"
tickers = ['GOOGL', 'FB']
start_time = string_to_micro("9:30")
end_time = string_to_micro("10:30")
sim = Simulator(Ave_Lee)
sim.run(date, tickers, use_om=True, start_time=start_time, end_time=end_time, model=model)
print('####')

test
[[[2.22424243 2.2394794  2.2394794  2.22424243 2.22424243 2.22424243
   2.22424243 2.28924243 2.28924243 2.28924243]]]
