In [None]:
import pandas as pd
import numpy as np
import tushare as ts
import seaborn
%matplotlib inline
from matplotlib import pyplot as plt
stocks_pair = ['600199', '600702']

data1 = ts.get_k_data('600199', '2013-06-01', '2014-12-31')[['date','close']]
data2 = ts.get_k_data('600702', '2013-06-01', '2014-12-31')['close']
data = pd.concat([data1, data2], axis=1)
data.set_index('date',inplace = True)
data.columns = stocks_pair
data.plot(figsize= (8,6))

#基于股票价格比率的配对交易
data['pair_ratio'] =data[stocks_pair[0]]/data[stocks_pair[1]]
mean = data['pair_ratio'].mean()
std = data['pair_ratio'].std()
data['ratio_mean'] = mean
data['ratio_ceiling'] = mean + std
data['ratio_floor'] = mean - std

#基于股票价差协整关系的配对交易
data.corr() #协方差矩阵
plt.figure(figsize =(10,8))
plt.title('Stock Correlation')
plt.plot(data['600199'], data['600702'], '.') #两支股票的散点图
plt.xlabel('600199')
plt.ylabel('600702')
data.dropna(inplace = True)
[slope, intercept] = np.polyfit(data.iloc[:,0], data.iloc[:,1], 1).round(2) #两支股票回归并提取斜率和截距
data['spread'] = data.iloc[:,1] - (data.iloc[:,0]*slope + intercept) #得到ε的序列
data['spread'].plot(figsize = (10,8),title = 'Price Spread')
data['zscore'] = (data['spread'] - data['spread'].mean())/data['spread'].std() #对ε标准化
data['zscore'].plot(figsize = (10,8),title = 'Z-score')
plt.axhline(1.5)
plt.axhline(0)
plt.axhline(-1.5)
data[data['zscore'] < - 1.5].head()
len(data[data['zscore'] < - 1.5])
data['position_1'] = np.where(data['zscore'] > 1.5, 1, np.nan)
data['position_1'] = np.where(data['zscore'] < -1.5, -1, data['position_1'])
data['position_1'] = np.where(abs(data['zscore']) < 0.5, 0, data['position_1'])
data['position_1'] = data['position_1'].fillna(method = 'ffill')
data['position_1'].plot(ylim=[-1.1, 1.1], figsize=(10, 6), title = 'Trading Signal_Uptrade')
data['position_2'] = -np.sign(data['position_1'])
data['position_2'].plot(ylim=[-1.1, 1.1], figsize=(10, 6), title = 'Trading Signal_Downtrade')

data['returns_1'] = np.log(data['600199'] / data['600199'].shift(1))
data['returns_2'] = np.log(data['600702'] / data['600702'].shift(1))
data['strategy'] = 0.5*(data['position_1'].shift(1) * data['returns_1'])+0.5*(data['position_2'].shift(1) * data['returns_2']) #假设等权重做多和做空，但实际上应该用Hedge Ratio进行做多和做空
data[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).plot(figsize=(10, 6), title = 'Strategy_Backtesting')