In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

data_dir = 'data'
exchange = 'test'

df_1m = pd.read_hdf(os.path.join(data_dir, exchange, 'all-1min.h5'))
first_idx = df_1m['close_test_EOS_USDT'].first_valid_index()
df_1m = df_1m.loc[first_idx:]
df_1m = df_1m.filter(regex=r'(close|volume).*')
df_1m = df_1m.resample("1Min").fillna(method='ffill')

df_1m.resample("15Min").first().plot(subplots=True, figsize=(10, 10))

In [None]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import pickle

def orthogonal_projection(a, b):
    b = np.array(b)
    dims = len(np.shape(a))
    assert(dims == 1 or dims == 2)
    s = a @ b / np.sum(b * b)
    if dims == 2:
        result = b * s[:,np.newaxis]
        if isinstance(a, pd.DataFrame):
            return pd.DataFrame(result, index=a.index, columns=a.columns)
        return result
    result = s * b
    if isinstance(a, pd.Series):
        return pd.Series(result, index=a.index)
    return result

def hyperplane_projection(a, b):
    return a - orthogonal_projection(a, b)

m1 = df_1m.filter(regex=r'close.*')
m1 = m1.rename(columns=lambda x: x[-8:])
# m1 = m1['20190115':'20190222']
m1_mean = m1.mean()
m1_norm = m1 / m1_mean - 1

c = coint_johansen(m1_norm, det_order=-1, k_ar_diff=1)

significant_results = (c.lr1 > c.cvt[:,1]) * (c.lr2 < c.cvm[:,2])
print(significant_results)
A = pd.DataFrame(c.evec[:, significant_results].T, columns=m1.columns)
residuals = [orthogonal_projection(m1_norm, x) for x in A.values]
# Add absolute price variance to residual covariance to reduce overconfidence. Would be nice to give this a more
# rigorous treatment
covs = [rs.cov() * m1_mean[None,:] * m1_mean[:,None] + np.diag(m1.var()/10) for rs in residuals]
print(covs)
x = A.sum()
cov_x = orthogonal_projection(m1_norm, x).cov() * m1_mean[None,:] * m1_mean[:,None] 
pickle.dump({
    'cointegrated_vectors': A,
    'mean_prices': m1_mean,
    'most_recent_data': m1.index[-1],
    'residual_covariances': covs,
    'combined_vector': x,
    'combined_covariance': cov_x
}, open('coint.p','wb'))

C = m1_norm @ x
C.plot()
C2 = m1_norm @ c.evec
C2.plot()

# C.plot(figsize=(16,10))
# C.plot(subplots=True, figsize=(16,16))
# print(x.lr1)
# print(x.cvt)
# print(x.evec)
# print(x.lr2)
# print(x.cvm)
# print(np.cov(x.r0t, rowvar=False))
# print(np.cov(x.rkt, rowvar=False))
# print(np.linalg.norm(x.r0t, axis=1))
# print(x.rkt.sum(axis=0))

In [None]:
m2 = m1['20190207':]
m2_norm = m2 - m2.mean()
m22 = m2_norm.filter(regex=r'BTC_USDT|ETH_USDT')
c = coint_johansen(m22, det_order=-1, k_ar_diff=100)
C = m22 @ c.evec
C.plot()
m22.plot()
print(c.evec / c.evec[0][np.newaxis,:])
significant_results = (c.lr1 > c.cvt[:,1]) * (c.lr2 < c.cvm[:,2])
print(significant_results)
# C.plot(figsize=(16,10))
# C.plot(subplots=True, figsize=(16,16))
# print(x.lr1)
# print(x.cvt)
# print(x.lr2)
# print(x.cvm)
# print(x.r0t.sum(axis=0))
# print(x.rkt.sum(axis=0))

def scatter_plot(data, x, y):
    ix = data.index
    ix = (ix - ix.min()) / (ix.max() - ix.min())
    colors = [plt.cm.viridis(i) for i in ix]
    data.plot.scatter(x=x, y=y, edgecolor='none', color=colors)
    
scatter_plot(m2, 'BTC_USDT', 'ETH_USDT')
m2_ = (hyperplane_projection(m22, c.evec[:,1]) + 0) + m2.mean().filter(regex=r'BTC_USDT|ETH_USDT')
scatter_plot(m2_, 'BTC_USDT', 'ETH_USDT')

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import sys
sys.path.append('..')
from trader.util.linalg import orthogonal_projection

pca = PCA(n_components=2)
scaler = StandardScaler()

m12019 = m1['20180101':'20190201'] #.filter(regex=r'BTC_USDT|ETH_USDT')
m12019_2 = m1['20190201':'20190207']
ix = m12019.index
ix = (ix - ix.min()) / (ix.max() - ix.min())
colors = [plt.cm.viridis(i) for i in ix]
m1_norm = pd.DataFrame(scaler.fit_transform(m12019), index=m12019.index, columns=m12019.columns)
# m1_norm.plot.scatter(x=0,y=1,edgecolor='none',c=colors)
pca.fit(m1_norm)
m1_red = m1_norm @ pca.components_.T
m1_red.plot.scatter(x=0,y=1,edgecolor='none',c=colors)
pcs = pd.DataFrame(pca.components_ / scaler.scale_, columns=m1.columns)

x = orthogonal_projection(m1_norm, pca.components_[0]) * scaler.scale_
(x).plot()

m2_norm = pd.DataFrame(scaler.transform(m12019_2), index=m12019_2.index, columns=m12019_2.columns)
x2 = orthogonal_projection(m2_norm, pca.components_[0]) * scaler.scale_
(x2).plot()

# print(pcs)
# C = (m12019 - scaler.mean_) @ pcs.values.T
# C.plot()
# print(pca.components_, pca.explained_variance_ratio_)

c = coint_johansen(m12019 - scaler.mean_, det_order=-1, k_ar_diff=100)
significant_results = (c.lr1 > c.cvt[:,1]) * (c.lr2 < c.cvm[:,2])
X = c.evec[:,significant_results].T
y = orthogonal_projection(m12019 - scaler.mean_, X[0])
y.plot()
y2 = orthogonal_projection(m12019_2 - scaler.mean_, X[0])
y2.plot()
# C = (m12019 - scaler.mean_) @ c.evec[:,significant_results]
# C.plot()

In [None]:
from statsmodels.tsa.stattools import coint
m15 = m1.resample("15Min").first()['20190201':'20190208']
print(coint(m15['BTC_USDT'], m15['ETH_USDT'], trend='ct'))
scatter_plot(m15, 'BTC_USDT', 'ETH_USDT')
