In [None]:
%%time
import sys
from simulator import (
    Simulator, string_to_micro, micro_to_time,
    BUY, SELL, SHORT, EXCH_INET,
    BOOK_DEPTH1_PRICE, ORDER_EVENTS,
    )
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.ar_model import AR
from scipy import integrate 
from scipy import optimize


class Ave_Lee(object):
    def __init__(self, session, date, tickers, start_time, end_time, pnl, buy_shares1, buy_dollars, 
                 sell_shares1, sell_dollars, buy_shares2, sell_shares2, threshold, interval, training_size, trades):
        self.session = session
        self.date = date
        self.tickers = tickers
        self.ticker1 = self.tickers[0]
        self.ticker2 = self.tickers[1]
        self.start_time = start_time
        self.end_time = end_time
        self.halt_trading = string_to_micro('5m') # close position 5 minutes before trading ends
        self.interval = string_to_micro(interval)
        self.interval_pnl = string_to_micro('1s')
        self.state = 'NULL'
        
        # variables for BUY or SELL
        self.side1 = 0
        self.side2 = 0
        
        # variables for order size
        self.order_size1 = 100
        self.order_size2 = 1
        self.buy_size = 10000 * 10**6 # how much we can buy for in each trade
        
        # variables to keep track of total shares bought/sold and the corresponding amount of money
        self.buy_shares1 = buy_shares1
        self.buy_dollars = buy_dollars
        self.sell_shares1 = sell_shares1
        self.sell_dollars = sell_dollars
        self.buy_shares2 = buy_shares2
        self.sell_shares2 = sell_shares2
        
        # variables to keep track of how many positions we have opened and closed respectively
        self.trades = trades
        
        # variables used for the fitOU, when to open/close a position and how far we look back
        self.dt = 1
        self.long_open = threshold[0]
        self.long_close = threshold[1]
        self.short_open = threshold[0]
        self.short_close = threshold[2]
        self.training_size = training_size
        
        # Start timer/call the start_callback function
        self.session.add_timer(self.start_time, self.start_callback)
        
        # List to store pnl every time we update it
        self.pnl = pnl
        
        # Dictionary to store time, midprices and the returns each timestep
        self.results = {'time': []}
        for ticker in self.tickers:
            self.results[ticker] = []
            self.results['return {}'.format(ticker)] = []
        
        # subscribe to the tickers of interest, and set the first timer
        for ticker in self.tickers:
            self.session.subscribe_ticker_all_feeds(ticker)
    
    
    def start_callback(self, time):
        # Start subscribing to executed orders which then calls event_callback
        for ticker in self.tickers:
            self.session.subscribe_event(ticker, ORDER_EVENTS, self.event_callback)
        # Add timers for updating pnl and running the algo
        self.session.add_timer(time, self.timer_callback_pnl)
        self.session.add_timer(time, self.timer_callback_algo)
        
    
    def event_callback(self, ticker, event_params):
        # Call the execution manager whenever we have an execution
        self.process_executions(event_params)
        
        
    def timer_callback_pnl(self, time):
        # This timer callback updates pnl every second to see how it evolves over the day            
        pnl = self.get_pnl()
        self.pnl.append(pnl / 1000000.0)
        
        if time < self.end_time:
            self.session.add_timer(time + self.interval_pnl, self.timer_callback_pnl)
            
    
    def timer_callback_algo(self, time):
        # This timer callback performs the algorithm
        
        # Append the time
        self.results['time'].append(micro_to_time(time))       
        # Append the midprices
        self.results[self.ticker1].append(self.get_midmarket(self.ticker1) / 1000000.0)
        self.results[self.ticker2].append(self.get_midmarket(self.ticker2) / 1000000.0)
        
        # Start calculating returns after 1 second
        if time > self.start_time + 10**6:
            self.results['return {}'.format(self.ticker1)].append(np.float(returns(self.results[self.ticker1][-2:])))
            self.results['return {}'.format(self.ticker2)].append(np.float(returns(self.results[self.ticker2][-2:])))
        
        # Get the best bid and offer, compute the midmarket
        bid1, ask1 = self.session.get_inside_market(self.ticker1)
        bid2, ask2 = self.session.get_inside_market(self.ticker2)
        
        # Start collecting signals after training_size * 1 second + halt trading
        if time > self.start_time + self.halt_trading + self.training_size * 10**6:
            # Collect the last training_size of returns
            returns1 = self.results['return {}'.format(self.ticker1)][-self.training_size:]
            returns2 = self.results['return {}'.format(self.ticker2)][-self.training_size:]
            
            # Regress the returns and fit the residuals, calculate the s-score
            residuals, a,b = regress(returns1,returns2)
            kappa, m, sigma, sigmaeq = fitOU(residuals, self.training_size)
            s = sscore(m, sigmaeq)
            
            # Find current net position (=0: neutral, <0: we are short asset 1, >0: we are long asset 1)
            pos = self.buy_shares1 - self.sell_shares1
            
            # Feature to check if we have orders at the market before we open a new position
            orders = self.session.get_all_orders()        
            
            if not orders and b > 0 and kappa > 0 and sigma > 0 and time < self.end_time - self.halt_trading:
                if pos == 0:
                    if s < -self.long_open:
                        self.side1 = BUY
                        self.side2 = SELL
                        price1 = ask1['price']
                        price2 = bid2['price']
                        
                        # The regression coefficient determines the sell size for ticker 2
                        self.order_size1 = self.buy_size // price1
                        self.order_size2 = int(b * self.order_size1)
                        self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                        self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)
                        self.trades += 1                   
                    elif s > self.short_open:
                        self.side1 = SELL
                        self.side2 = BUY
                        price1 = bid1['price']
                        price2 = ask2['price']
                        
                        # The regression coefficient determines the buy size for ticker 2 
                        self.order_size1 = self.buy_size // price1
                        self.order_size2 = int(b * self.order_size1)
                        
                        self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                        self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)
                        self.trades += 1                    
                elif pos < 0 and s < self.short_close:
                    self.side1 = BUY
                    self.side2 = SELL
                    
                    price1 = ask1['price']
                    price2 = bid2['price']
                    
                    self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                    self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)  
                elif pos > 0 and s > -self.long_close:
                    self.side1 = SELL
                    self.side2 = BUY
                    
                    price1 = bid1['price']
                    price2 = ask2['price']
                    
                    self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                    self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)
        
        # Close any open position halt_trading time steps before trading ends
        if time >= self.end_time - self.halt_trading and pos != 0:
            if pos < 0:
                self.side1 = BUY
                self.side2 = SELL
                
                price1 = ask1['price']
                price2 = bid2['price']
                
                self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)
                
            elif pos > 0:
                self.side1 = SELL
                self.side2 = BUY
                
                price1 = bid1['price']
                price2 = ask2['price']
                
                self.session.add_order(self.ticker1, self.side1, self.order_size1, price1, exchange=EXCH_INET)
                self.session.add_order(self.ticker2, self.side2, self.order_size2, price2, exchange=EXCH_INET)
                
        # Reset the timer unless we are done 
        if time < self.end_time:
            self.session.add_timer(time + self.interval, self.timer_callback_algo)
                
            
    def process_executions(self, evp):
        # Make sure that we only update if we have executed any orders
        if 'executed_orders' in evp:
            time = self.session.current_time()
            for ex in evp['executed_orders']:
                order = ex['order']
                side = order['side']
                ticker = order['ticker']
                
                if ticker == self.ticker1:
                    if side == 'B':
                        self.buy_shares1 += ex['quantity_executed']
                        #self.buy_dollars += ex['quantity_executed'] * ex['price_executed']
                        
                        # buy in midmarker to check if spread is "eating" profits
                        self.buy_dollars += ex['quantity_executed'] * self.get_midmarket(ticker)
                        
                    else:
                        self.sell_shares1 += ex['quantity_executed']
                        #self.sell_dollars += ex['quantity_executed'] * ex['price_executed']
                        
                        # sell in midmarker to check if spread is "eating" profits
                        self.sell_dollars += ex['quantity_executed'] * self.get_midmarket(ticker)
                        
                    pos = self.buy_shares1 - self.sell_shares1
                elif ticker == self.ticker2:
                    if side == 'B':
                        self.buy_shares2 += ex['quantity_executed']
                        #self.buy_dollars += ex['quantity_executed'] * ex['price_executed']
                        
                        # buy in midmarker to check if spread is "eating" profits
                        self.buy_dollars += ex['quantity_executed'] * self.get_midmarket(ticker)
                    else:
                        self.sell_shares2 += ex['quantity_executed']
                        #self.sell_dollars += ex['quantity_executed'] * ex['price_executed']
                        
                        # sell in midmarker to check if spread is "eating" profits
                        self.sell_dollars += ex['quantity_executed'] * self.get_midmarket(ticker)
                        
                    pos = self.buy_shares2 - self.sell_shares2
                    
                pnl = self.get_pnl()
                
                

    def get_midmarket(self, ticker):
        bid, ask = self.session.get_inside_market(ticker)
        return (bid['price'] + ask['price']) / 2.0
    
    
    def get_pnl(self):
        # Mark to the midmarket
        mid1 = self.get_midmarket(self.ticker1)
        mid2 = self.get_midmarket(self.ticker2)
        pnl = self.sell_dollars - self.buy_dollars + (self.buy_shares1 - self.sell_shares1) * mid1 + (self.buy_shares2 - self.sell_shares2) * mid2
        return pnl
    
    
    def end(self):
        return (self.pnl, self.buy_shares1, self.buy_dollars, self.sell_shares1, self.sell_dollars, self.buy_shares2, self.sell_shares2, self.trades)


def regress(returns1,returns2):
    x = np.asarray(returns1).reshape(-1,1)
    y = np.asarray(returns2).reshape(-1,1)
    model = LinearRegression()
    model.fit(x,y)
    a = model.intercept_[0]
    b = model.coef_[0,0]
    residuals = y-model.predict(x)
    return residuals, a,b


def returns(midprices):
    log_return = np.diff(np.log(midprices), axis=-1)
    return log_return


def fitOU(residual, training_size):
    dt = 1
    ou = np.cumsum(residual)
    model = AR(ou)
    fittedmodel = model.fit(maxlag=1, disp=-1)  
    a = fittedmodel.params[0]
    b = fittedmodel.params[1]
    var =  fittedmodel.sigma2
    if b > 0.0 and b < np.exp(-2.0/training_size):
        kappa = -np.log(b) / dt    
        m = a / (1.0 - np.exp(-kappa * dt))
        sigma = np.sqrt(var * 2.0 * kappa / (1.0 - np.exp(-2.0 * kappa * dt)))
        sigmaeq = np.sqrt(var / (1.0 - np.exp(-2.0 * kappa * dt)));
        return kappa, m, sigma, sigmaeq
    else:
        return -1.0,0,0,0


def sscore(m, sigmaeq):
    if sigmaeq != 0:
        return -m/sigmaeq
    elif m>0:
        return 10000000
    else:
        return -10000000


def plot_graph(pnl, ticker1, ticker2):
    pnl_array = np.asarray(pnl)
    plt.plot(pnl_array)
    plt.title('PnL for {} vs {}'.format(ticker1, ticker2))
    plt.show()
    return None
    

# Simulate the algorithm on several pairs and for multiple days
start_time = string_to_micro("9:30")
end_time = string_to_micro("16:00")

# dates to be tested on
dates = ['20170508', '20170509', '20170510', '20170511', '20170512', '20170515', '20170516', '20170517',
                    '20170518', '20170519']

tickers1 = ['ORCL', 'AAPL', 'NVDA', 'TXN', 'AAPL']
tickers2 = ['NVDA', 'ORCL', 'MSFT', 'AAPL', 'CSCO']

# hyperparameters
threshold = [1.4, 0.5, 0.5]
interval = '1m' '15s'
training_size = 250

for i in range(len(tickers1)):  
    ticker1 = tickers1[i]
    ticker2 = tickers2[i]
        
    print(str(ticker1) + ' vs ' + str(ticker2))
    
    # Reset the pnl, and other variables when we have a new pair of tickers
    pnl = []          
    buy_shares1 = 0
    buy_dollars = 0
    sell_shares1 = 0
    sell_dollars = 0
    buy_shares2 = 0
    sell_shares2 = 0       
    trades = 0
    
    # Run simulator for all dates and aggregate the pnl
    for date in dates:
        sim = Simulator(Ave_Lee)
        (pnl, buy_shares1, buy_dollars, sell_shares1, sell_dollars, buy_shares2, sell_shares2, trades) = sim.run(date, [ticker1, ticker2], use_om=True, start_time=start_time, end_time=end_time, 
                    pnl=pnl, buy_shares1=buy_shares1, buy_dollars=buy_dollars, sell_shares1=sell_shares1,
                    sell_dollars=sell_dollars, buy_shares2=buy_shares2, sell_shares2=sell_shares2, threshold=threshold,
                    interval=interval, training_size=training_size, trades=trades)
        
    pnl_array = np.asarray(pnl)
         
    print('Last PnL of {}: {}'.format(opt, pnl[-1]))
    print('Dollar per trade for {}: {}'.format(opt, pnl[-1] / (10000 * trades)))
        
    plt.plot(pnl_array)
    plt.title('PnL for {} vs {}'.format(ticker1, ticker2))
    plt.show()