### Imports

In [91]:
# DATA FORMATION
import pandas as pd
import numpy as np
from numpy import cumsum, log, polyfit, sqrt, std, subtract
import matplotlib.pyplot as plt


# ML ALGO
import sklearn.mixture as mix

# FETCHING DATA
import yfinance as yf
# from pandas_datareader.data import DataReader



# Efficiency Testing Libraries
from statsmodels.tsa.stattools import bds
from statsmodels.sandbox.stats.runs import runstest_1samp
from statsmodels.tsa.stattools import adfuller
import scipy.stats as sps

### Data Extraction

In [92]:
# Data Extraction
start_date = "2021-12-31"
end_date = "2023-02-21"
symbol = "SPY"
# symbol = "ETH-USD"

yf.pdr_override()
df = yf.download(symbol , start_date , end_date )

df["Returns"] = df["Close"] / df["Close"].shift(1) - 1
df.dropna(inplace=True)
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-01-03,476.299988,477.850006,473.850006,477.709991,470.083679,72668200,0.005790
2022-01-04,479.220001,479.980011,475.579987,477.549988,469.926239,71178700,-0.000335
2022-01-05,477.160004,477.980011,468.279999,468.380005,460.902618,104538900,-0.019202
2022-01-06,467.890015,470.820007,465.429993,467.940002,460.469635,86858900,-0.000939
2022-01-07,467.950012,469.200012,464.649994,466.089996,458.649200,85111600,-0.003954
...,...,...,...,...,...,...,...
2023-02-13,408.720001,412.970001,408.239990,412.829987,412.829987,64913500,0.011739
2023-02-14,411.239990,415.049988,408.510010,412.640015,412.640015,88389300,-0.000460
2023-02-15,410.350006,414.059998,409.470001,413.980011,413.980011,61685300,0.003247
2023-02-16,408.790009,412.910004,408.140015,408.279999,408.279999,76431500,-0.013769


In [93]:
# Returns extraction
returns = df["Returns"].values.astype(float)

### Runs Test

Base standard test for randomness based on linearity  

- CHECKING RANDOMNESS OF STOCK

In [94]:
# Convert Returns into binary outcomes
# if return is positive return 1 : else return 0
returns_binary = [ 1 if x >= 0 else 0 for x in returns]

In [95]:
(z_stat, p_value) = runstest_1samp(returns_binary[:10], correction=False)
z_stat = round(z_stat, 3)
p_value = round(p_value, 3)
is_reject_runs = True if p_value < 0.05 else False


print(f"Z-Statistic: {z_stat}")
print(f"P-Value: {p_value}")
print(f"Reject Null: {is_reject_runs}")
print(f"Observable Runs Exceeds Excpected Runs by: {z_stat} Standard Deviations")

# OUTCOME
print('\n------------')
print("OUTCOME: Not Random") if is_reject_runs else print("OUTCOME: Random")
print('------------')

Z-Statistic: -0.562
P-Value: 0.574
Reject Null: False
Observable Runs Exceeds Excpected Runs by: -0.562 Standard Deviations

------------
OUTCOME: Random
------------


### BDS Test

Testing for chaos and nonlinearity. Considered as your last line of defence as takes into account non-linear dependancies after running other efficiency tests.

In [100]:
bds_test = bds(returns[-500:], distance=2)
bds_stat = float(bds_test[0])
pvalue = float(bds_test[1])
print("BDS Test Statistic: ", round(bds_stat, 3))
print("BDS P-Value: ", round(pvalue, 3))


# OUTCOME
print('\n------------')
print("OUTCOME: Not Random") if pvalue < 0.05 else print("OUTCOME: Random")
print('------------')

BDS Test Statistic:  -0.334
BDS P-Value:  0.738

------------
OUTCOME: Random
------------


### Hurst Exponent  (sample size greater than 1 year)

"Whether a market tends to trend, mean revert, or is just random is valuable information for a trader. While the Hurst exponent isn't an entry signal in and of itself, it can serve as a filter on top of a system. Given that market regimes can shift over time to favor one approach or the other, overlaying your model with a Hurst filter could help prevent your algorithm from buying a breakout in a mean reverting market or shorting ahead of a pullback when the market is moving to new highs." - *Find Your Best Market to Trade With the Hurst Exponent (referenced below)*

If Hurst = 0.5, then the market is random.

If Hurst > 0.5, then there is evidence of a trending market.

If Hurst < 0.5, then there is evidence of a mean reverting market.

In [97]:
def hurst(ts, min_lag=1, max_lag=100):
    lags = range(min_lag, max_lag)
    tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]
    poly = polyfit(log(lags), log(tau), 1)
    return poly[0]*2.0

In [98]:
prices = df["Close"].values
hurst_res = hurst(prices)
hurst_res

0.29174206144409603

### AD Fuller Test for Stationarity

In [99]:
dftest = adfuller(returns)
p_value = dftest[1]
t_test = dftest[0] < dftest[4]["1%"]
print("If < 0.05 and True then we can reject the null hypothesis and conclude that the index is stationary")
print("\nThis means that if the test is statistically significant, we can assume that the data is not random and has some structure that can be used to make predictions.\n")
print(p_value, t_test)

If < 0.05 and True then we can reject the null hypothesis and conclude that the index is stationary

This means that if the test is statistically significant, we can assume that the data is not random and has some structure that can be used to make predictions.

1.6704741112257375e-29 True


### Resources and Useful References

NEDL YouTube Channel - Hurst Exponent: https://www.youtube.com/watch?v=l08LICz8Ink

NEDL YouTube Channel - Dynamic Hurst Exponent: https://www.youtube.com/watch?v=v0sivj2wGcA

Hurst Exponent Coding: https://raposa.trade/blog/find-your-best-market-to-trade-with-the-hurst-exponent/

More Hurst Exponent Coding: https://www.quantstart.com/articles/Basics-of-Statistical-Mean-Reversion-Testing/