In [3]:
!pip install hurst

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting hurst
  Downloading hurst-0.0.5-py3-none-any.whl (5.9 kB)
Installing collected packages: hurst
Successfully installed hurst-0.0.5


# Half Life of time-seri

In [10]:
import numpy as np
import statsmodels.api as sm
from math import log


def half_life(z_array):
  z_lag = np.roll(z_array,1)
  z_lag[0] = 0
  z_ret = z_array - z_lag
  z_ret[0] = 0

  #adds intercept terms to X variable for regression
  z_lag2 = sm.add_constant(z_lag)

  model = sm.OLS(z_ret,z_lag2)
  res = model.fit()

  halflife = -log(2) / res.params[1]

  return halflife


In [25]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api
from hurst import compute_Hc, random_walk
import statsmodels.api


# data = yf.download(tickers_list , start = "2022-5-1" , end = "2023-5-1")['Close']
# global data


p_values_stocks = {}
hurst_exponent_stocks = {}
half_life_stocks = {}

def johansen_test_2(data , column1 , column2 ):

    result = statsmodels.tsa.vector_ar.vecm.coint_johansen(data[[column1 , column2 ]],
                                                          det_order = 0  ,
                                                          k_ar_diff = 3)


    # print(f'{column1} , {column2} , {column3}')
    # print(f'result.max_eig_stat : {result.max_eig_stat}')

    h1 = result.max_eig_stat[0]
    h2 = result.max_eig_stat[1]


    new_stationary_seri = h1 * data[column1] + h2 * data[column2]
    # new_stationary_seri

    # Evaluate Hurst equation
    hurst , c, data = compute_Hc(new_stationary_seri , kind='price', simplified=True)

    # print(f'-------------  p_value : {p_value}       -----------------')
    p_value = adf(new_stationary_seri)

    key = column1 + ' , ' + column2

    p_values_stocks[key] = p_value
    hurst_exponent_stocks[key] = hurst

    half = half_life(new_stationary_seri)
    half_life_stocks[key] = half

    if p_value < 0.1 :
      print('\lr{ ')
      print(key)
      print(' }')

      print()

      print('\lr{ ')
      print(f'-------------  pValue : {p_value}       -----')
      print(' }')

      print()

      print('\lr{ ')
      print(f'-------------  hurst : {hurst}           -----')
      print(' }')

      print()

      print('\lr{ ')
      print(f'-------------  halfLife : {half}        -----')
      print(' }')

      print()

    return p_value

In [26]:
tickers_list = ['AAPL', 'MSFT', 'AMZN' , 'NVDA' , 'GOOGL' , 'GOOG' ,
                'XOM' , 'META' , 'TSLA' , 'UNH'
]


data = yf.download(tickers_list , start = "2022-5-1" , end = "2023-5-1")['Close']

for col1 in tickers_list:
  for col2 in tickers_list :
    if col1 == col2 :
      continue

    johansen_test_2(data , col1 , col2 )


[*********************100%***********************]  10 of 10 completed
\lr{ 
GOOGL , XOM
 }

\lr{ 
-------------  pValue : 0.07282683300825328       -----
 }

\lr{ 
-------------  hurst : 0.4189310404622397           -----
 }

\lr{ 
-------------  halfLife : 24.341578646977933        -----
 }

\lr{ 
GOOG , XOM
 }

\lr{ 
-------------  pValue : 0.07223405746608141       -----
 }

\lr{ 
-------------  hurst : 0.42245969005834394           -----
 }

\lr{ 
-------------  halfLife : 24.32742179592016        -----
 }

\lr{ 
XOM , TSLA
 }

\lr{ 
-------------  pValue : 0.04978805395169708       -----
 }

\lr{ 
-------------  hurst : 0.3204844136821401           -----
 }

\lr{ 
-------------  halfLife : 23.330996977287143        -----
 }



In [5]:
import numpy as np
from statsmodels.regression.linear_model import OLS
from statsmodels.tsa.tsatools import lagmat, add_trend
from statsmodels.tsa.adfvalues import mackinnonp

def adf(ts, maxlag=1):
    """
    Augmented Dickey-Fuller unit root test
    """
    # make sure we are working with an array, convert if necessary
    ts = np.asarray(ts)

    # Get the dimension of the array
    nobs = ts.shape[0]

    # Calculate the discrete difference
    tsdiff = np.diff(ts)

    # Create a 2d array of lags, trim invalid observations on both sides
    tsdall = lagmat(tsdiff[:, None], maxlag, trim='both', original='in')
    # Get dimension of the array
    nobs = tsdall.shape[0]

    # replace 0 xdiff with level of x
    tsdall[:, 0] = ts[-nobs - 1:-1]
    tsdshort = tsdiff[-nobs:]


    # Calculate the linear regression using an ordinary least squares model
    results = OLS(tsdshort, add_trend(tsdall[:, :maxlag + 1], 'c')).fit()


    # print(results)
    adfstat = results.tvalues[0]

    # Get approx p-value from a precomputed table (from stattools)
    pvalue = mackinnonp(adfstat, 'c', N=1)
    # print(f'pvalue : {pvalue}')
    return pvalue

def cadf(x, y):
    """
    Returns the result of the Cointegrated Augmented Dickey-Fuller Test
    """
    # Calculate the linear regression between the two time series
    ols_result = OLS(x, y).fit()

    # Augmented Dickey-Fuller unit root test
    return adf(ols_result.resid)