In [4]:
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm

import matplotlib.pyplot as plt

import statsmodels.tsa.stattools as ts

# 引入Kalman函数库
from pykalman import KalmanFilter

In [5]:
pair_info = pd.read_csv("pairs_beta_config.csv",index_col = 0)

In [8]:
pair_info

Unnamed: 0_level_0,p1,p2,pearson,adf,beta,alpha,hurst,half-life,category
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,000798.XSHE,600257.XSHG,0.883462,True,0.683189,0.682904,0.249367,4342.118783,A04
2,002069.XSHE,600257.XSHG,0.848794,True,1.014951,1.01933,0.254728,4607.122667,A04
3,600121.XSHG,601666.XSHG,0.95934,True,0.895073,0.894126,0.050772,9397.328081,B06
4,000983.XSHE,601699.XSHG,0.977082,True,1.082275,1.085812,0.231515,6606.635213,B06
5,603727.XSHG,603979.XSHG,0.942469,True,0.564421,0.558902,0.11651,531.045123,B11
6,002629.XSHE,300191.XSHE,0.929276,True,3.039864,3.013906,0.090525,23.095977,B11
7,002695.XSHE,002852.XSHE,0.941431,True,1.087774,1.08346,0.289372,624.301809,C13
8,002515.XSHE,603536.XSHG,0.924499,True,1.54379,1.541325,0.290671,281.836863,C13
9,000752.XSHE,600365.XSHG,0.963845,True,0.487351,0.494515,0.241357,438.752494,C15
10,000848.XSHE,600199.XSHG,0.951112,True,0.559222,0.57437,0.294207,2512.142166,C15


In [17]:
def kalman_beta(sec1 = '000858.XSHE' ,sec2 = '000300.XSHG',count = 400, end_date = '2015-3-1'):
    # 赋初值
    secs = [sec1, sec2]
    ncount = count
    end_date = end_date
    
    # 获取价格数据
    data = get_price(secs, count =  ncount, end_date= end_date, frequency='1d', fields='close',fq = "pre")['close']
    data.index.name = 'Date'
    
    
    data = data.fillna(0)
    # 观察矩阵
    # 注意：
    # 1、观察到的是sec1数据，sec1是自变量x，sec2是因变量y
    # 2、需要使用add_constant来模拟alpha
    # 3、需要使用np.newaxis来增加维度
    obs_mat = sm.add_constant(data[secs[0]].values, prepend=False)[:, np.newaxis]

    kf = KalmanFilter(n_dim_obs=1, n_dim_state=2, # y is 1-dimensional, (alpha, beta) is 2-dimensional
                  initial_state_mean=np.ones(2),
                  initial_state_covariance=np.ones((2, 2)),
                  transition_matrices=np.eye(2),  # 不发生变化，都是单位矩阵
                  observation_matrices=obs_mat,   # 观察矩阵
                  observation_covariance=10**2,
                  transition_covariance=0.01**2 * np.eye(2))
    
    # 相当于使用sec2来进行训练，模拟出beta、alpha
    state_means, state_covs = kf.filter(data[secs[1]][:, np.newaxis])
    return state_means[-1]

In [21]:
result_total = []
end_date = "2018-3-15"
ncount = 200


for i_row in range(0,pair_info.shape[0]):
    
    result_list = []
    
    result_list.append(pair_info.iloc[i_row,:]['p1'])
    result_list.append(pair_info.iloc[i_row,:]['p2'])

    result_list.append(pair_info.iloc[i_row,:]['pearson'])
    result_list.append(pair_info.iloc[i_row,:]['adf'])
    
    
    beta_kf = kalman_beta(pair_info.iloc[i_row,:]['p1'] ,pair_info.iloc[i_row,:]['p2'],ncount, end_date)
    beta = beta_kf[0]
    alpha = beta_kf[1]
    
    result_list.append(beta)
    result_list.append(alpha)

    result_list.append(pair_info.iloc[i_row,:]['hurst'])
    result_list.append(pair_info.iloc[i_row,:]['half-life'])
    result_list.append(pair_info.iloc[i_row,:]['category'])

    
    result_total.append(result_list)

In [25]:
pd.DataFrame(result_total,columns = ['p1','p2','pearson','adf','beta','alpha','hurst','half-life','category']).to_csv('pairs_beta_config_update.csv')