In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import itertools
from statistical_functions import preprocess, test_is_I1, test_is_tradable
import matplotlib.pyplot as plt

  from pandas.core import datetools


In [2]:
from arch.unitroot import ADF
from scipy import odr
def TLS_regresssion(stock_1, stock_2):
    def f(B, x):
        '''Linear function y = m*x + b'''
        # B is a vector of the parameters.
        # x is an array of the current x values.
        # x is in the same format as the x passed to Data or RealData.
        # Return an array in the same format as y passed to Data or RealData.
        return B[0]*x + B[1]

    linear_model = odr.Model(f)
    used_data = odr.Data(stock_1, stock_2)
    TLS_regression_model = odr.ODR(used_data, linear_model, beta0=[1., 2.])
    result = TLS_regression_model.run()
    return result

In [3]:
data = pd.read_csv("finance_data.csv")
data.columns = [
    'code', 'name', 'date', 'price'
]
data.date = data.date.apply(lambda x: datetime.strptime(str(x), "%Y%m%d"))
data['year'] = data.date.apply(lambda x: x.year)
data['month'] = data.date.apply(lambda x: x.month)
data['week'] = data.date.apply(lambda x: x.week)
data['dayofweek'] = data.date.apply(lambda x: x.dayofweek)
data.sort_values('date', inplace=True)

timeframe = data[['year', 'month', 'week']].drop_duplicates()
timeframe.reset_index(drop=True, inplace=True)

# create dataframe of date for available data
# it will be used to check whether stocks containing enough data
# before further cointegration testing
# trade off between speed and more possible pairs
date = pd.DataFrame(data.date.unique(), columns=['date'])
date['year'] = date.date.apply(lambda x: x.year)
date['month'] = date.date.apply(lambda x: x.month)
date['week'] = date.date.apply(lambda x: x.week)
date['dayofweek'] = date.date.apply(lambda x: x.dayofweek)
trailing_windows = 52
ASR_threshold = 1.5
take_profit_sigma_num = 0.5

data.head()

Unnamed: 0,code,name,date,price,year,month,week,dayofweek
0,2801,彰銀,2000-01-04,10.89,2000,1,1,1
11,2849,安泰銀,2000-01-04,11.12,2000,1,1,1
10,2845,遠東銀,2000-01-04,4.38,2000,1,1,1
9,2838,聯邦銀,2000-01-04,5.57,2000,1,1,1
7,2834,臺企銀,2000-01-04,11.09,2000,1,1,1


In [None]:
trade_result = pd.read_csv('trade_result_finance.csv')
trade_result.reason.unique()

array(['cointegration_invalid', 'reversion', 'take profit', 'stop_loss'], dtype=object)

In [None]:
temp_result = trade_result[trade_result.reason == 'cointegration_invalid']
temp_result.head()

Unnamed: 0.1,Unnamed: 0,ADF_statistic_in,ADF_statistic_out,ASR_in,ASR_out,PS_in,PS_out,date_in,date_out,hedge_ratio_in,...,spread_in,spread_out,stock_1_in,stock_1_log_in,stock_1_log_out,stock_1_out,stock_2_in,stock_2_log_in,stock_2_log_out,stock_2_out
0,"(2809, 2887)",-2.537345,,2.271674,,1.554106,,2009-12-07,2009-12-14,2.370609,...,-0.315256,,6.8,1.916923,,6.72,6.01,1.793425,,5.75
1,"(2816, 2883)",-2.124908,,2.169104,,1.10155,,2009-12-07,2009-12-14,0.605361,...,0.15955,,32.31,3.475377,,31.98,6.01,1.793425,,6.0
2,"(2801, 2891)",-2.97627,,1.744411,,1.72153,,2009-12-07,2009-12-21,1.815708,...,-0.107922,,8.26,2.111425,,7.96,8.61,2.152924,,8.22
3,"(2809, 2888)",-2.123514,,1.71506,,1.068899,,2009-12-07,2009-12-21,1.40797,...,-0.186886,,6.8,1.916923,,6.91,10.84,2.383243,,9.91
4,"(2809, 2889)",-2.507929,,1.737984,,1.324117,,2009-12-07,2009-12-28,1.484936,...,-0.200017,,6.8,1.916923,,6.87,5.55,1.713798,,5.66


In [None]:
no_cointergration_counter = 0
ok_counter = 0
len_not_match_counter = 0
for i in range(len(temp_result)):
    pair = tuple(list(map(lambda x: int(x), temp_result['Unnamed: 0'].iloc[i][1:11].split(', '))))
    target = date[date.date == temp_result.date_out.iloc[i]]
    
    index = timeframe[
        (timeframe.year == target.year.values[0]) &
        (timeframe.month == target.month.values[0]) & 
        (timeframe.week == target.week.values[0])
    ]
    
    current_step = index.index[0]
    timeframe_for_model = timeframe.iloc[current_step-trailing_windows: current_step]
    
    start_date = date[
        (date.year == timeframe_for_model.iloc[0]['year']) &
        (date.month == timeframe_for_model.iloc[0]['month']) &
        (date.week == timeframe_for_model.iloc[0]['week'])
    ].iloc[0]['date']

    end_date = date[
        (date.year == timeframe_for_model.iloc[-1]['year']) &
        (date.month == timeframe_for_model.iloc[-1]['month']) &
        (date.week == timeframe_for_model.iloc[-1]['week'])
    ].iloc[-1]['date']

    date_for_model = date[(date.date >= start_date) & (date.date<=end_date)]

    data_for_model = data[data.date.isin(date_for_model.date)]

    stock_1 = data_for_model[data_for_model.code == pair[0]]
    stock_2 = data_for_model[data_for_model.code == pair[1]]
    if len(stock_1) == len(stock_2) == len(date_for_model):
        temp_pair = test_is_tradable(stock_1.price, stock_2.price)
        if temp_pair is None:
            print('   no cointergration! ', pair, target.date.values[0])
            no_cointergration_counter += 1
        else:
            print('   ok', pair, target.date.values[0])
            ok_counter += 1
    else:
        print('   len not match! ', pair, target.date.values[0])
        len_not_match_counter += 1

   ok (2809, 2887) 2009-12-14T00:00:00.000000000
   no cointergration!  (2816, 2883) 2009-12-14T00:00:00.000000000
   no cointergration!  (2801, 2891) 2009-12-21T00:00:00.000000000
   no cointergration!  (2809, 2888) 2009-12-21T00:00:00.000000000
   no cointergration!  (2809, 2889) 2009-12-28T00:00:00.000000000
   no cointergration!  (2801, 2816) 2009-12-28T00:00:00.000000000
   ok (2851, 2852) 2010-01-04T00:00:00.000000000
   ok (2851, 2886) 2010-01-04T00:00:00.000000000
   ok (2809, 2891) 2010-01-11T00:00:00.000000000
   no cointergration!  (2883, 2885) 2010-01-11T00:00:00.000000000
   no cointergration!  (2801, 2892) 2010-01-18T00:00:00.000000000
   ok (2809, 2852) 2010-01-18T00:00:00.000000000
   no cointergration!  (2809, 2892) 2010-01-18T00:00:00.000000000
   no cointergration!  (2809, 2812) 2010-01-18T00:00:00.000000000
   ok (2809, 2890) 2010-01-18T00:00:00.000000000
   no cointergration!  (2801, 2885) 2010-01-18T00:00:00.000000000
   no cointergration!  (2801, 2834) 2010-01-18

   no cointergration!  (2812, 2852) 2010-07-19T00:00:00.000000000
   no cointergration!  (2801, 2882) 2010-07-19T00:00:00.000000000
   no cointergration!  (2836, 2881) 2010-07-19T00:00:00.000000000
   no cointergration!  (2832, 2849) 2010-07-19T00:00:00.000000000
   no cointergration!  (2885, 2887) 2010-07-26T00:00:00.000000000
   no cointergration!  (2849, 2884) 2010-07-26T00:00:00.000000000
   no cointergration!  (2838, 2889) 2010-07-26T00:00:00.000000000
   no cointergration!  (2834, 2849) 2010-07-26T00:00:00.000000000
   no cointergration!  (2838, 2885) 2010-07-26T00:00:00.000000000
   no cointergration!  (2838, 2890) 2010-07-26T00:00:00.000000000
   no cointergration!  (2816, 2838) 2010-07-26T00:00:00.000000000
   no cointergration!  (2885, 2891) 2010-08-02T00:00:00.000000000
   no cointergration!  (2816, 2836) 2010-08-09T00:00:00.000000000
   no cointergration!  (2809, 2834) 2010-08-09T00:00:00.000000000
   no cointergration!  (2816, 2892) 2010-08-16T00:00:00.000000000
   no coin

   no cointergration!  (2809, 2891) 2011-09-19T00:00:00.000000000
   no cointergration!  (2880, 2892) 2011-09-19T00:00:00.000000000
   no cointergration!  (2820, 2887) 2011-09-19T00:00:00.000000000
   no cointergration!  (2832, 2880) 2011-09-19T00:00:00.000000000
   no cointergration!  (2885, 2889) 2011-09-26T00:00:00.000000000
   no cointergration!  (2838, 2892) 2011-09-26T00:00:00.000000000
   no cointergration!  (2823, 2882) 2011-09-26T00:00:00.000000000
   no cointergration!  (2850, 2881) 2011-09-26T00:00:00.000000000
   no cointergration!  (2882, 2886) 2011-09-26T00:00:00.000000000
   no cointergration!  (2881, 2885) 2011-09-26T00:00:00.000000000
   no cointergration!  (2823, 2885) 2011-09-26T00:00:00.000000000
   no cointergration!  (2809, 2885) 2011-09-26T00:00:00.000000000
   no cointergration!  (2881, 2886) 2011-10-03T00:00:00.000000000
   no cointergration!  (2809, 2845) 2011-10-11T00:00:00.000000000
   ok (2809, 2820) 2011-10-11T00:00:00.000000000
   no cointergration!  (283

   no cointergration!  (2849, 2888) 2012-10-08T00:00:00.000000000
   ok (2849, 2883) 2012-10-15T00:00:00.000000000
   no cointergration!  (2890, 2892) 2012-10-15T00:00:00.000000000
   ok (2836, 2849) 2012-10-15T00:00:00.000000000
   no cointergration!  (2832, 2836) 2012-10-22T00:00:00.000000000
   no cointergration!  (2836, 2852) 2012-10-22T00:00:00.000000000
   no cointergration!  (2809, 2823) 2012-10-29T00:00:00.000000000
   no cointergration!  (2823, 2838) 2012-10-29T00:00:00.000000000
   no cointergration!  (2801, 2852) 2012-11-01T00:00:00.000000000
   no cointergration!  (2812, 2887) 2012-11-12T00:00:00.000000000
   no cointergration!  (2801, 2881) 2012-11-12T00:00:00.000000000
   no cointergration!  (2812, 2845) 2012-11-12T00:00:00.000000000
   no cointergration!  (2881, 2891) 2012-11-12T00:00:00.000000000
   no cointergration!  (2852, 2880) 2012-11-12T00:00:00.000000000
   no cointergration!  (2845, 2852) 2012-11-12T00:00:00.000000000
   no cointergration!  (2809, 2845) 2012-11-

   no cointergration!  (2823, 2883) 2013-11-18T00:00:00.000000000
   no cointergration!  (2832, 2886) 2013-11-18T00:00:00.000000000
   no cointergration!  (2881, 2890) 2013-11-18T00:00:00.000000000
   no cointergration!  (2812, 2888) 2013-11-18T00:00:00.000000000
   no cointergration!  (2832, 2885) 2013-11-18T00:00:00.000000000
   no cointergration!  (2832, 2887) 2013-11-25T00:00:00.000000000
   no cointergration!  (2832, 5880) 2013-11-25T00:00:00.000000000
   no cointergration!  (2801, 2832) 2013-11-25T00:00:00.000000000
   no cointergration!  (2832, 2880) 2013-11-25T00:00:00.000000000
   no cointergration!  (2816, 2832) 2013-11-25T00:00:00.000000000
   no cointergration!  (2834, 2851) 2013-12-02T00:00:00.000000000
   no cointergration!  (2888, 2890) 2013-12-16T00:00:00.000000000
   no cointergration!  (2832, 2849) 2014-01-02T00:00:00.000000000
   no cointergration!  (2851, 2880) 2014-01-06T00:00:00.000000000
   no cointergration!  (2851, 2867) 2014-01-20T00:00:00.000000000
   no coin

In [None]:
len(temp_result) == ok_counter

In [None]:
len_not_match_counter

# check

In [None]:


pair = (2881, 2891)
pair_date = '2016-08-01'
target = date[date.date == pair_date]

index = timeframe[
    (timeframe.year == target.year.values[0]) &
    (timeframe.month == target.month.values[0]) & 
    (timeframe.week == target.week.values[0])
]
# target

current_step = index.index[0]
timeframe_for_model = timeframe.iloc[current_step-trailing_windows: current_step]
# timeframe_for_model.tail(1)

start_date = date[
    (date.year == timeframe_for_model.iloc[0]['year']) &
    (date.month == timeframe_for_model.iloc[0]['month']) &
    (date.week == timeframe_for_model.iloc[0]['week'])
].iloc[0]['date']

end_date = date[
    (date.year == timeframe_for_model.iloc[-1]['year']) &
    (date.month == timeframe_for_model.iloc[-1]['month']) &
    (date.week == timeframe_for_model.iloc[-1]['week'])
].iloc[-1]['date']

date_for_model = date[(date.date >= start_date) & (date.date<=end_date)]

data_for_model = data[data.date.isin(date_for_model.date)]

timeframe_for_spread = timeframe.iloc[current_step]

date_for_spread = date[
    (date.year == timeframe_for_spread.year) &
    (date.month == timeframe_for_spread.month) &
    (date.week == timeframe_for_spread.week)
]

data_for_spread = data[
    (data.year == timeframe_for_spread.year) &
    (data.week == timeframe_for_spread.week)
]
data_for_spread = data_for_spread[data_for_spread.date == date_for_spread.date.iloc[0]]


stock_1 = data_for_model[data_for_model.code == pair[0]]
stock_2 = data_for_model[data_for_model.code == pair[1]]
if len(stock_1) == len(stock_2) == len(date_for_model):
    temp_pair = test_is_tradable(stock_1.price, stock_2.price)
    if temp_pair is None:
        print('   no cointergration!')
    else:
        print('   ok')
else:
    print('   len not match!', )

TLS_result = TLS_regresssion(np.log(stock_1.price.values), np.log(stock_2.price.values))
residual = np.log(stock_2.price.values) - TLS_result.beta[1] - TLS_result.beta[0]*np.log(stock_1.price.values)

stock_1 = data_for_spread[data_for_spread.code == pair[0]]
stock_2 = data_for_spread[data_for_spread.code == pair[1]]
new_spread = np.log(stock_2.price.values) - TLS_result.beta[1] - TLS_result.beta[0]*np.log(stock_1.price.values)
residual = np.append(residual, new_spread)

plt.plot(residual, c='blue')

plt.plot([np.std(residual)*ASR_threshold]*(len(residual)+1), c='orange', alpha=0.5)
plt.plot([-np.std(residual)*ASR_threshold]*(len(residual)+1), c='orange', alpha=0.5)
plt.plot([np.std(residual)*take_profit_sigma_num]*(len(residual)+1), c='red', alpha=0.5)
plt.plot([-np.std(residual)*take_profit_sigma_num]*(len(residual)+1), c='red', alpha=0.5)

plt.plot([0]*(len(residual)+1), c='grey')
plt.plot(residual);

In [None]:
plt.plot(np.log(data_for_model[data_for_model.code == pair[0]].price), label='stock_1')
plt.plot(np.log(data_for_model[data_for_model.code == pair[1]].price), label='stock_2')
plt.legend();

In [None]:
result = trade_result[trade_result['Unnamed: 0'] == str(pair)]
print("   ", result[result.date_out == pair_date]['spread_out'].values[0])
print("   ", new_spread[0])