In [1]:
from QuantConnect import *
from QuantConnect.Data.Market import TradeBar, QuoteBar
from QuantConnect.Research import *
from QuantConnect.Indicators import *
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from scipy.stats import shapiro, pearsonr, linregress
import scipy.stats as ss
import numpy as np
from pykalman import KalmanFilter
import statsmodels.tsa.stattools as sm
import pandas as pd
import math
qb = QuantBook()

s1 = qb.AddEquity("msft")
s2 = qb.AddEquity("amzn")
start_date = datetime(2016,1,1)
end_date = datetime(2018,1,1)
HEDGE_LOOKBACK = 15
dist = 253
entry = 2.0
zstop = 4.5
exit = 0.5

PVALUE = 0.05

series1 = qb.History(s1.Symbol, start_date, end_date, Resolution.Daily).close.values.tolist()
series2 = qb.History(s2.Symbol, start_date, end_date, Resolution.Daily).close.values.tolist()

In [2]:
def correlation(series1, series2):
    r, p = pearsonr(series1, series2)
    if p <= PVALUE:
        return r
    else:
        return float('NaN')
    return r

def cointegration(series1, series2):
    return sm.coint(series1, series2, autolag='BIC', trend = 'ct')[1]

def adfuller(series):
    return sm.adfuller(series,autolag='BIC')[1]

def hurst(series):
    max_window = len(series)-1
    min_window = 10
    window_sizes = list(map(lambda x: int(10**x),np.arange(math.log10(min_window), 
                        math.log10(max_window), 0.25)))
    window_sizes.append(len(series))
    RS = []
    for w in window_sizes:
        rs = []
        for start in range(0, len(series), w):
            if (start+w)>len(series):
                break

            incs = series[start:start+w][1:] - series[start:start+w][:-1]

            mean_inc = (series[start:start+w][-1] - series[start:start+w][0]) / len(incs)
            deviations = incs - mean_inc
            Z = np.cumsum(deviations)
            R = max(Z) - min(Z)
            S = np.std(incs, ddof=1)

            if R != 0 and S != 0:
                rs.append(R/S)
        RS.append(np.mean(rs))
    A = np.vstack([np.log10(window_sizes), np.ones(len(RS))]).T
    H, c = np.linalg.lstsq(A, np.log10(RS), rcond=-1)[0]
    return H

def halflife(series): 
    lag = np.roll(series, 1)
    ret = series - lag
    slope, intercept = linreg(lag,ret)
    halflife = (-np.log(2) / slope)
    return halflife

def shapirowilke(series):
    w, p = shapiro(series)
    return p

def adfprices(series1, series2):
    p1 = sm.adfuller(series1, autolag='BIC')[1]
    p2 = sm.adfuller(series2, autolag='BIC')[1]
    return min(p1,p2)

def zscore(series):
    latest_residuals = series[-HEDGE_LOOKBACK:]
    return abs(ss.zscore(latest_residuals, nan_policy='omit')[-1])

def alpha(series1, series2):
    slope, intercept = linreg(series2, series1)
    y_target_shares = 1
    X_target_shares = -slope
    notionalDol =  abs(y_target_shares * series1[-1]) + abs(X_target_shares * series2[-1])
    (y_target_pct, x_target_pct) = (y_target_shares * series1[-1] / notionalDol, X_target_shares * series2[-1] / notionalDol)
    if (min (abs(x_target_pct), abs(y_target_pct)) > MIN_WEIGHT):
        return slope
    return float('NaN')

def run_kalman(series):
    kf_stock = KalmanFilter(transition_matrices = [1], observation_matrices = [1],
                            initial_state_mean = series[0], 
                            observation_covariance=0.001,
                            transition_covariance=0.0001)
    filtered_series = kf_stock.filter(series)[0].flatten()
    return filtered_series

def get_spreads(series1, series2, length):
    spreads = np.array(series1)/np.array(series2)
    return spreads

def linreg(series1, series2):
    slope, intercept, rvalue, pvalue, stderr = linregress(series1,series2)
    return slope, intercept

In [3]:
spreads = ss.zscore(get_spreads(series1,series2,dist), nan_policy='omit')

print('correlation', correlation(series1,series2))
print('cointegration', cointegration(series1,series2))
print('adfprices', adfprices(series1,series2))

print('adfuller', adfuller(spreads))
print('hurst', hurst(spreads))
print('halflife', halflife(spreads))
print('shapirowilke', shapirowilke(spreads))

In [4]:
spreads = ss.zscore(get_spreads(series2,series1,dist), nan_policy='omit')

print('correlation', correlation(series2,series1))
print('cointegration', cointegration(series2,series1))
print('adfprices', adfprices(series2,series1))

print('adfuller', adfuller(spreads))
print('hurst', hurst(spreads))
print('halflife', halflife(spreads))
print('shapirowilke', shapirowilke(spreads))