In [72]:
import pandas as pd
import numpy as np
import pickle
from pair_trading_test import CointTest
import os
import itertools
import warnings
warnings.filterwarnings('ignore')

In [6]:
data_base = '/Users/lihaohan/Desktop/ubiq-data-master/1724317843'

In [34]:
def get_data(code: str, direction='bid', ind='price', date='Day7', fields=None):
    path = os.path.join(data_base, direction, ind, date, code+'.pkl')
    with open(path, 'rb') as f:
        data = pickle.load(f)
    if fields is not None:
        data = data[fields]
    return data

def calc_date_mid_price(codes: list, date: str):
    '''
    计算某一天所有股票的mid_price
    Return:
    DataFrame: columns - codes, values - mid_price
    '''
    mid_prices = pd.DataFrame()
    for code in codes:
        bid_price = get_data(code=code, date=date, fields=['bid1']).squeeze()
        ask_price = get_data(code=code, direction='ask', date=date, fields=['ask1']).squeeze()
        mid_price = pd.Series((bid_price + ask_price) / 2)
        mid_prices[f'{code}mid_price'] = mid_price
    return mid_prices

In [37]:
codes = ['UBIQ{:03d}'.format(i) for i in range(50)]
mid_prices = calc_date_mid_price(codes, date='Day7')

In [102]:
from tqdm import tqdm
cols_combinations = list(itertools.combinations(mid_prices.columns, 2))
adf_p_values = pd.DataFrame(columns=mid_prices.columns)
adf_diff_p_values = pd.DataFrame(columns=mid_prices.columns)
params = pd.DataFrame(columns=['asset_x', 'asset_y', 'b', 'beta', 'e'])
resid_stats = pd.DataFrame(columns=['asset_x', 'asset_y','ADF_stat', 'p_value', 'lags', 'critical_values'])

for col1, col2 in tqdm(cols_combinations):
    X1 = np.ravel(mid_prices.loc[:, col1].values)
    X2 = np.ravel(mid_prices.loc[:, col2].values)
    
    coint_test = CointTest(X1, X2)

    p_x1, p_x2 = coint_test.adf_test()
    adf_p_values.loc[col1, col2] = p_x1
    adf_p_values.loc[col2, col1] = p_x2
    
    diff_p_x1, diff_p_x2 = coint_test.adf_test_diff()
    adf_diff_p_values.loc[col1, col2] = diff_p_x1
    adf_diff_p_values.loc[col2, col1] = diff_p_x2

    b, beta, sample_e = coint_test.fit()
    
    params = pd.concat([params, pd.DataFrame([{'asset_x': col1, 'asset_y': col2, 'b': b, 'beta': beta, 'e': sample_e}])], ignore_index=True)
    
    ADF_stat, p_value, lags, critical_values = coint_test.resid_con_test()
    resid_stats = pd.concat([resid_stats, pd.DataFrame([{'asset_x': col1, 'asset_y': col2, 'ADF_stat': ADF_stat, 'p_value': p_value, 'lags': lags, 'critical_values': critical_values}])], ignore_index=True)

100%|██████████| 1225/1225 [50:52<00:00,  2.49s/it] 


In [107]:
resid_stats

Unnamed: 0,asset_x,asset_y,ADF_stat,p_value,lags,critical_values
0,UBIQ000mid_price,UBIQ001mid_price,-1.247807,0.652700,28,"{'1%': -3.432644949319746, '5%': -2.8625539514..."
1,UBIQ000mid_price,UBIQ002mid_price,-2.286616,0.176321,25,"{'1%': -3.432642535640906, '5%': -2.8625528854..."
2,UBIQ000mid_price,UBIQ003mid_price,-3.046931,0.030738,26,"{'1%': -3.432643339636199, '5%': -2.8625532405..."
3,UBIQ000mid_price,UBIQ004mid_price,-1.290557,0.633481,27,"{'1%': -3.4326441441956144, '5%': -2.862553595..."
4,UBIQ000mid_price,UBIQ005mid_price,-4.218114,0.000614,22,"{'1%': -3.4326401270338365, '5%': -2.862551821..."
...,...,...,...,...,...,...
1220,UBIQ046mid_price,UBIQ048mid_price,-1.698578,0.431845,5,"{'1%': -3.4326265732934735, '5%': -2.862545835..."
1221,UBIQ046mid_price,UBIQ049mid_price,-1.179251,0.682457,11,"{'1%': -3.4326313386090552, '5%': -2.862547940..."
1222,UBIQ047mid_price,UBIQ048mid_price,-1.136515,0.700293,4,"{'1%': -3.4326257810090195, '5%': -2.862545485..."
1223,UBIQ047mid_price,UBIQ049mid_price,-3.117882,0.025258,4,"{'1%': -3.4326257810090195, '5%': -2.862545485..."


In [110]:
resid_stats_ = resid_stats[resid_stats['ADF_stat'] < -2.5673093777970064]
len(resid_stats_)

413