In [12]:
# Import the required packages
import numpy as np
import pandas as pd
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller

import matplotlib.pyplot as plt
import seaborn as sns
# sns.set(style="whitegrid")

pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader import data as pdr
import datetime
from dateutil import rrule
from dateutil.relativedelta import relativedelta
import yfinance as yf
import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
# yf.pdr_override()

In [13]:
tickers = ['A2201', 'C2201', 'I2201', 'JM2201', 'OI2201', 'P2201', 'Y2201']
df = pd.read_csv("data.csv")
df.drop('Unnamed: 0', axis=1, inplace=True)
minutes = range(len(df))
df['Minutes'] = minutes
df

Unnamed: 0,A2201,C2201,I2201,JM2201,OI2201,P2201,Y2201,Minutes
0,5413,2762,927.5,1600.0,9142,6128,7150,0
1,5413,2776,920.5,1600.0,9017,6112,7132,1
2,5413,2766,920.5,1600.0,8989,6112,7120,2
3,5413,2765,920.5,1600.0,8957,6112,7140,3
4,5413,2765,920.5,1600.0,8966,6112,7126,4
...,...,...,...,...,...,...,...,...
82645,5860,2625,721.5,2366.0,12143,10162,9368,82645
82646,5860,2625,721.5,2366.0,12143,10162,9368,82646
82647,5860,2625,721.5,2366.0,12143,10162,9368,82647
82648,5830,2625,721.5,2366.0,12143,10162,9368,82648


In [14]:
# Find two co-integrated stocks and return the second variable, which is the pValue of the co-integration pair
def find_cointegrated_pairs(data):
    n = data.shape[1]
    score_matrix = np.zeros((n, n))
    pvalue_matrix = np.ones((n, n))
    keys = data.keys()
    pairs = []
    min_value = 1
    min_pair = []
    for i in range(n):
        for j in range(i+1, n):
            S1 = data[keys[i]]
            S2 = data[keys[j]]
            result = coint(S1, S2)
            score = result[0]
            pvalue = result[1]
            score_matrix[i, j] = score
            pvalue_matrix[i, j] = pvalue
            if pvalue < 0.05:
                pairs.append((keys[i], keys[j]))
                if pvalue < min_value:
                    min_value = pvalue
                    min_pair=[keys[i], keys[j]]
                
    return score_matrix, pvalue_matrix, pairs, min_pair

In [15]:
# Calculate returns
def trade(split, b, S1, S2, date, window1, window2, principal, trade_times, show_pic):

    if (window1 == 0) or (window2 == 0):
        return 0
    
    target = b * S1 - S2
    
    if show_pic:
        target[:split].plot(figsize=(12,6))
        plt.axhline(target[:split].mean())
        plt.title(u'train dataset: b * S1 - S2')
        plt.show()
        
    
    ma1 = target.rolling(window=window1,
                               center=False).mean()
    ma2 = target.rolling(window=window2,
                               center=False).mean()
    std = target.rolling(window=window1,
                                center=False).std()
    zscore = (ma2 - ma1)/std
    
    if show_pic:
        zscore[:split].plot(figsize=(12,6))
        plt.axhline(zscore[:split].mean())
        plt.title(u'train dataset: b * S1 - S2 zscore')
        plt.show()
    
    open_signal = pd.Series([] * len(S1))
    open_signal = pd.Series([] * len(S1))
    close_signal = pd.Series([] * len(S1))
    
    
    open_signal2 = pd.Series([] * len(S1))
    open_signal2 = pd.Series([] * len(S1))
    close_signal2 = pd.Series([] * len(S1))

    countS1 = 0
    countS2 = 0
    S1_o = 0
    S2_o = 0
    has_socket = 0
    rets = 0
    k = 0
    next_t = date.iloc[len(target)-1]
    # target = b * S1 - S2
    #for i in range(len(target)):
    for i in range(split+date.iloc[0], len(target)+date.iloc[0]):
        #print(zscore[i])
        if trade_times is not None and k>=trade_times:
            next_t = date[i]
            break
        # short S2
        if zscore[i] < -1 and has_socket==0:

            countS1 = principal*b/(S1[i]*b+S2[i])
            countS2 = -countS1 / b
                
            has_socket = 1
            S1_o = S1[i]
            S2_o = S2[i]
            
            open_signal[i] = zscore[i]
            open_signal2[i] = target[i]
        
        # short S1
        elif zscore[i] > 1 and has_socket==0:

            countS1 = - principal*b/(S1[i]*b+S2[i])
            countS2 = - countS1 / b
                
            has_socket = 2
            S1_o = S1[i]
            S2_o = S2[i]
            
            open_signal[i] = zscore[i]
            open_signal2[i] = target[i]
             
        elif (has_socket==1 and zscore[i]>-0.1) or (has_socket==2 and zscore[i]<0.1): 
            k+=1
            has_socket = 0
            rets += countS1 * (S1[i] - S1_o) + countS2 * (S2[i] - S2_o)
            countS1 = 0
            countS2 = 0
            S1_o = 0
            S2_o = 0
            close_signal[i] = zscore[i]
            close_signal2[i] = target[i]
            
            
    rets += countS1 * (S1.iloc[-1] - S1_o) + countS2 * (S2.iloc[-1] - S2_o)
    
    if countS1!=0:
        close_signal[i] = zscore[date.iloc[0]+len(target)-1]
        close_signal2[i] = target[date.iloc[0]+len(target)-1]

    if show_pic:
        zscore.plot(figsize=(12,6))
        open_signal.plot(color='r', linestyle='None', marker='^')
        open_signal.plot(color='r', linestyle='None', marker='^')
        close_signal.plot(color='g', linestyle='None', marker='^')
        plt.title(u'all dataset: b * S1 - S2 and zscore')
        plt.show()

        target.plot(figsize=(12,6))
        open_signal2.plot(color='r', linestyle='None', marker='^')
        open_signal2.plot(color='r', linestyle='None', marker='^')
        close_signal2.plot(color='g', linestyle='None', marker='^')
        plt.show()
    
    return principal, rets, next_t

In [16]:
def main(start, end, train_len, test_len=None, trade_times=None, long_win=30, short_win=5, principal=None, select=None):
    rets_list = []
    pairs_list = []
    
    temp = start + train_len
    i = 0
    
    while temp + test_len+10 < end:
       
        df_cur = df[start: temp]

        # Select test data set
        df_next = df[temp:]
        if test_len is not None:
            next_t = temp + test_len
            df_next = df_next[:test_len]
        
        split = len(df_cur)
        df_all = pd.concat([df_cur, df_next], axis=0)
        df_all.sort_values(by="Minutes", inplace=True)
        
        # The smaller the pvalue, the higher the integration degree
        scores, pvalues, pairs, min_pair = find_cointegrated_pairs(df_cur)

        if len(min_pair)==0:
            #rets_list.append(0)
            #pairs_list.append([])
            # If there are no integration pairs, use data one day later
            start = start + 1
            temp = temp + 1
            continue

        ## Select an integration pair
        name1 = min_pair[0]
        name2 = min_pair[1]
        
        # Calculate b
        # OLS for polynomial fitting
        S1= df_cur[name1]
        S2= df_cur[name2]
        S1 = sm.add_constant(S1)
        results = sm.OLS(S2, S1).fit() 
        S1 = S1[name1]
        b = results.params[name1]
        
        principal, rets, next_t = trade(split, b, df_all[name1], df_all[name2], df_all['Minutes'], long_win, short_win, principal, trade_times, select==i)
        
        #print("【{}】The first stock:{}, The second stock{}，b = {}".format(i, name1, name2, b))
        #print("Train date:{}     {}".format(start, temp))
        #print("Trade date:{}     {}".format(temp, next_t))
        
        t1 = df_all[name1]*b
        t2 = df_all[name1]
        if select==i:
            t1.plot(figsize=(12,6))
            t2.plot(figsize=(12,6))
            plt.title(u'two sockets')
            plt.show()
        
        #print("Principal{}, Return:{}, Rate:{}".format(principal, rets, rets/principal))
        #print()
        #print()
        if rets != 0:
            rets_list.append(rets)
        pairs_list.append([name1, name2])
        
        i+=1
        start = next_t - train_len
        temp = next_t
            
    return rets_list

In [17]:
rets_list = main(1, 2000, train_len = 100, test_len = 20, trade_times = 2, long_win = 40, short_win = 4, principal = 1000000, select = None)
print("=============================================================================================")
total = 0
total_rets = []
for i in rets_list:
    total += i
    total_rets.append(total)
print("Total:{}".format(total))

ValueError: Invalid input, x is constant

In [11]:
x= range(len(total_rets))
plt.figure(figsize=(12, 6))
plt.plot(x, total_rets, marker='o')
plt.title('z-score pair trading')
plt.xlabel('trading time')
plt.ylabel('rets')
plt.grid(True)
plt.show()

NameError: name 'total_rets' is not defined