In [22]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

from sklearn.linear_model import LinearRegression

import backtrader as bt
import pandas as pd 
import backtrader.feeds as btfeed
import statsmodels.tsa.stattools as ts
import numpy as np

import importlib
import PandasDataWrapper

import os.path
import sys

import datetime

import matplotlib.pyplot as plt

# Pair Search

In [23]:
'''
At that moment there are no reason to choose automatically pairs to trade, 
so that they are choosen via fundamental analysis 
'''

# Cointegration testing
class coint_df_test:
    
    def __init__(self, close_1, close_2): # close_1, close_2 - must be pandas.Series
        self.close_1 = close_1
        self.close_2 = close_2
    
    def df_tests(self):
        lr = LinearRegression(copy_X=True, fit_intercept=True, normalize=False)
        lr.fit(self.close_1.values.reshape(-1,1), self.close_2.values)
        y_hat1 = lr.coef_ * self.close_1 + lr.intercept_
        res1 = self.close_2 - y_hat1
        result1 = ts.adfuller(res1, 1)
        lr.fit(self.close_2.values.reshape(-1,1), self.close_1.values)
        
        y_hat2 = lr.coef_ * self.close_2 + lr.intercept_
        res2 = self.close_1 - y_hat2
        result2 = ts.adfuller(res2, 1)
        
        if (result1 < result2):
            self.spread = res1
            self.which_=1 
        else:
            self.spread = res2
            self.which_=2

            
    def get_spread(self):
        return (self.spread, self.which_)   

# Pair trading strategy

In [31]:
importlib.reload(PandasDataWrapper)

class pair_trading(PandasDataWrapper.btPandasStrategy):
    params = (
                ('lookback_window', 100),
                ('std_pivot', 2)
             )
    
    def log(self, txt, dt=None):
        ''' Logging function for this strategy'''
        dt = dt or self.datas[0].datetime.date(0)
        
        print('%s, %s' % (dt.isoformat(), txt))
        
    def __init__(self):
        
        self.dataclose_1 = self.datas[0].close
        self.dataclose_2 = self.datas[1].close
        
        self.lr = LinearRegression(copy_X=True, fit_intercept=True, normalize=False)
        self.close1_pos = 0
        self.close2_pos = 0;        

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
        print(222222222222)

        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)

            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')

        # Write down: no pending order
        # self.order = None

    def next(self):
        # Simply log the closing price of the series from the reference
        # self.log('Close, %.2f' % self.dataclose[0])

        # Check if an order is pending ... if yes, we cannot send a 2nd one
        #if self.order:
        #   return
        
        # We can't eval sma while there aren't enough prices
        if len(self) < self.params.lookback_window:
            return
 
        pd_close_1 = self.get_df_slice(self.dataclose_1, size=self.params.lookback_window) 
        pd_close_2 = self.get_df_slice(self.dataclose_2, size=self.params.lookback_window)         
        
        self.lr.fit(pd_close_1.values, pd_close_2.values)
        
       # print("Regression coef = " + str(self.lr.coef_))
        
        spread = self.lr.predict(pd_close_1.values) - pd_close_2.values
        
        spread = spread.reshape(-1,)
        
        rolling_mean = np.average(spread)
        std = np.std(spread)
        
        upper_bound = rolling_mean + self.params.std_pivot * std
        lower_bound = rolling_mean - self.params.std_pivot * std
    
        
        ### Decision making ###
        order_size = self.broker.getcash() * (1 - 0.98) #Make orders for twi percent of the capital
        stop_loss_amount = order_size * 0.5 #For the sake of risk-management stop-loss should be exposed to half of the price 
        ###
       
        #print("spread - upper_bound = " + str(spread[-1] - upper_bound))
        if (spread[-1] >= upper_bound and self.close1_pos >= 0):
            
            buy_pos = (int)(order_size / self.datas[1][-1])
            sell_pos = (int)(order_size / self.datas[0][-1])

            self.close1_pos = -sell_pos
            self.close2_pos = buy_pos
            
            self.buy(data = self.datas[1], size = buy_pos, exectype=bt.Order.StopTrail, trailamount=stop_loss_amount)            
            self.sell(data = self.datas[0], size = sell_pos , exectype=bt.Order.StopTrail, trailamount=stop_loss_amount)
            
            self.log("SHORT SPREAD, sell close1: %.2f buy close2: %.2f" % (self.datas[0][-1], self.datas[1][-1]))
        
       # print("spread - lower_bound = " + str(spread[-1] - lower_bound))

        if (spread[-1] <= lower_bound and self.close1_pos <= 0):  
            
            buy_pos = (int)(order_size / self.datas[0][-1])
            sell_pos = (int)(order_size / self.datas[1][-1])
                
            self.close1_pos = buy_pos
            self.close2_pos = -sell_pos
            
            self.buy(data = self.datas[0], size=buy_pos, exectype=bt.Order.StopTrail, trailamount=stop_loss_amount)
            self.sell(data = self.datas[1], size=sell_pos, exectype=bt.Order.StopTrail, trailamount=stop_loss_amount)
            
            self.log("LONG SPREAD, buy close1: %.2f sell close2: %.2f" % (self.datas[0][-1], self.datas[1][-1]))


        if (spread[-1] < upper_bound and spread[-1] > rolling_mean and self.close1_pos >= 0):
            self.close(data = self.datas[0])
            self.close(data = self.datas[1])

            self.close1_pos = 0
            self.close2_pos = 0
        
        if (spread[-1] > lower_bound and spread[-1] < rolling_mean and self.close1_pos <= 0):
            self.close(data = self.datas[0])
            self.close(data = self.datas[1])
            
            self.close1_pos = 0
            self.close2_pos = 0
       # self.log("Current equity: %f" % self.broker.getcash())
        ###    
        

## Run algorithm 

In [32]:
def runstrat():        
    '''Function created just for incapsulation of data preparation'''
    
    #common stocks preprocessing
    df_common = pd.read_csv('SBER_180801_190119_h.csv')

    df_common.rename(columns={'<DATE>':'DATE', '<TIME>': 'TIME'}, inplace=True)
    df_common.TIME = df_common.TIME.astype(str)
    df_common.DATE = df_common.DATE.astype(str)

    df_common['DATE'] = pd.to_datetime(df_common.DATE + df_common.TIME, format = "%Y%m%d%H%M%S")
    df_common.set_index(pd.DatetimeIndex(df_common['DATE']), inplace=True)
    df_common.drop(columns=['TIME','DATE'],inplace = True)
    #
    
    #pref. stocks preprpcessing
    df_pref = pd.read_csv('SBERP_180801_190119_h.csv')

    df_pref.rename(columns={'<DATE>':'DATE', '<TIME>': 'TIME'}, inplace=True)
    df_pref.TIME = df_pref.TIME.astype(str)
    df_pref.DATE = df_pref.DATE.astype(str)

    df_pref['DATE'] = pd.to_datetime(df_pref.DATE+df_pref.TIME, format = "%Y%m%d%H%M%S")
    df_pref.set_index(pd.DatetimeIndex(df_pref['DATE']), inplace=True)
    df_pref.drop(columns=['TIME','DATE'],inplace = True)
    
    #
    s1 = pd.DataFrame(df_pref.loc[:,'<CLOSE>'].rename('close'))
    s2 = pd.DataFrame(df_common.loc[:,'<CLOSE>'].rename('close'))
    #
    
    # datas not the same size
    pref_ind = s1.index
    com_ind = s2.index

    s2.drop(com_ind.difference(pref_ind), inplace=True)
    #
    
    cerebro = bt.Cerebro()
        
    #Add strategy
    cerebro.addstrategy(pair_trading)

    
    data1 = bt.feeds.PandasData(dataname=s1)
    data2 = bt.feeds.PandasData(dataname=s2)

    
    cerebro.adddata(data1, name="comm")
    cerebro.adddata(data2, name="pref")
    
    #Broker
    cerebro.broker.setcash(100000.0)
    cerebro.broker.setcommission(commission=2, margin=True)
    
    print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())

    # Run over everything
    cerebro.run()

    # Print out the final result
    print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())

In [33]:
runstrat()

                      close
DATE                       
2018-08-01 10:05:00  182.28
2018-08-01 10:10:00  182.79
2018-08-01 10:15:00  182.71
2018-08-01 10:20:00  182.70
2018-08-01 10:25:00  182.50
2018-08-01 10:30:00  182.59
2018-08-01 10:35:00  182.38
2018-08-01 10:40:00  182.52
2018-08-01 10:45:00  182.20
2018-08-01 10:50:00  182.25
2018-08-01 10:55:00  182.15
2018-08-01 11:00:00  182.30
2018-08-01 11:05:00  182.39
2018-08-01 11:10:00  182.38
2018-08-01 11:15:00  182.48
2018-08-01 11:20:00  182.49
2018-08-01 11:25:00  182.55
2018-08-01 11:30:00  182.70
2018-08-01 11:35:00  182.41
2018-08-01 11:40:00  182.56
2018-08-01 11:45:00  182.64
2018-08-01 11:50:00  182.58
2018-08-01 11:55:00  182.64
2018-08-01 12:00:00  182.26
2018-08-01 12:05:00  182.47
2018-08-01 12:10:00  182.11
2018-08-01 12:15:00  182.10
2018-08-01 12:20:00  182.08
2018-08-01 12:25:00  181.47
2018-08-01 12:30:00  181.08
...                     ...
2019-01-18 16:20:00  176.54
2019-01-18 16:25:00  176.45
2019-01-18 16:30:00 

KeyboardInterrupt: 

### Some Rubbish

In [None]:
pref_ind = df_pref.index
com_ind = df_common.index
s2.drop(com_ind.difference(pref_ind), inplace=True)
print(com_ind.difference(pref_ind))

In [None]:
plt.figure(figsize=(15,9))
ax1 = df_close.close_pref.plot(color='blue', grid=True, label='pref close')
ax2 = df_close.close_common.plot(color='red', grid=True,  label='common close')

h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()


plt.legend(h1+h2, l1+l2, loc=2)

In [None]:
spread = s2-s1

plt.figure(figsize=(13,5))

ax1 = pd.DataFrame(spread).plot(color='blue', grid=True, label = 'Spread')
ax2 = rolling.plot(color='red')


h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()

plt.axhline(y=spread.mean(), color='r', linestyle='-')

plt.legend(h1+h2, l1+l2, loc=2)

In [None]:
rolling = pd.concat([spread,pd.DataFrame([spread[:i+1].mean() for i in range(len(spread))], index=spread.index)], axis=1)

In [None]:
cft = coint_df_test(s1, s2)
cft.df_tests()

In [None]:
spread = cft.get_spread()
spread.plot(grid=True, figsize=(15,8))