In [4]:
from itertools import combinations
import itertools
from statsmodels.tsa.stattools import adfuller, coint
import pandas as pd
import random
import matplotlib.pyplot as plt

from statsmodels.tsa.vector_ar.vecm import coint_johansen
import numpy as np
import yfinance as yf


Downloadin Data


In [5]:
cryptos = ['BTC-USD', 'ETH-USD', 'USDT-USD', 'BNB-USD', 'SOL-USD', 'XRP-USD', 'USDC-USD', 'STETH-USD', 'ADA-USD',
           'AVAX-USD', 'DOGE-USD', 'TRX-USD', 'WTRX-USD', 'DOT-USD', 'MATIC-USD', 'LINK-USD', 'TON11419-USD',
           'WBTC-USD', 'ICP-USD', 'SHIB-USD', 'DAI-USD', 'LTC-USD', 'BCH-USD', 'WEOS-USD', 'LEO-USD']

def fetch_data(symbol, start_date, end_date):
    df = yf.download(symbol, start=start_date, end=end_date, interval='1h')
    return df['Close']

Resample 4h from 1h interval

In [6]:
start_date = '2022-11-01'
end_date = '2023-11-01'

market_data = pd.DataFrame()
for crypto in cryptos:
    market_data[crypto] = fetch_data(crypto, start_date, end_date)

market_data_4h = market_data.resample('4H').last().dropna()
print(market_data_4h)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******


    is_cointegrated_cadf(df, significance=0.05):
        This function checks if any pair of time series in the given DataFrame df is cointegrated using the Augmented Dickey-Fuller (ADF) test.
        It iterates over all pairs of columns in df, excluding pairs with the same column, and performs the cointegration test.
        If the p-value of the cointegration test is less than the specified significance level, it returns True, indicating cointegration. Otherwise, it returns False.

    half_life(ts):
        This function calculates the half-life of a time series ts using a simple regression-based approach.
        It first creates a lagged version of the time series (lagged_ts) and computes the differences between the original and lagged series (delta_ts).
        Then, it performs a linear regression with lagged values as predictors and the differences as the response variable.
        The negative reciprocal of the coefficient of the lagged variable is returned as the half-life.

    test_stationarity(timeseries):
        This function tests the stationarity of a given time series using the Augmented Dickey-Fuller (ADF) test.
        It returns the ADF statistic and p-value.

    hurst(ts):
        This function calculates the Hurst exponent of a time series ts.
        It uses the R/S analysis (Range/Standard Deviation) method to estimate the Hurst exponent.

    compute_RS(series, lag):
        This helper function computes the Rescaled Range (R/S) statistic for a given time series series and lag lag.
        It calculates the range and standard deviation over a moving window of size lag and returns the mean of R/S.

    hurst_RS(series, max_lag=20):
        This function estimates the Hurst exponent using the R/S analysis method with different lags up to max_lag.
        It calculates the R/S statistic for each lag and performs a linear regression on the log-log plot of lags versus R/S.
        The slope of the regression line is returned as the estimated Hurst exponent.

In [7]:
def is_cointegrated_cadf(df, significance=0.05):
    for coin1 in df.columns:
        for coin2 in df.columns:
            if coin1 != coin2:
                _, p_value, _ = coint(df[coin1], df[coin2])
                if p_value < significance:
                    return True
    return False

def half_life(ts):
    lagged_ts = np.roll(ts, 1)
    lagged_ts[0] = 0
    delta_ts = ts - lagged_ts
    model = np.array([lagged_ts[1:], np.ones_like(lagged_ts[1:])])
    beta = np.linalg.lstsq(model.T, delta_ts[1:], rcond=None)[0]
    return -np.log(2) / beta[0]

def test_stationarity(timeseries):
    result = adfuller(timeseries, autolag='AIC')
    return result[0], result[1]

def hurst(ts):
    lags = range(2, 100)
    epsilon = 1e-5
    variancelog = [np.log(np.var(ts[lag:]-ts[:-lag]) + epsilon) for lag in lags]
    poly = np.polyfit(np.log(lags), variancelog, 1)
    return poly[0] / 2.0

def compute_RS(series, lag):
    N = len(series)
    x = np.asarray(series)
    y = np.zeros(N-lag+1)
    for t in range(lag, N+1):
        R = np.max(x[:t]) - np.min(x[:t])
        S = np.std(x[:t])
        y[t-lag] = R / S
    return y.mean()

def hurst_RS(series, max_lag=20):
    lags = range(2, max_lag)
    RS = [compute_RS(series, lag) for lag in lags]
    poly = np.polyfit(np.log(lags), np.log(RS), 1)
    return poly[0]

Parameters and Containers:

    min_coins = 3 and max_coins = 4: Define the minimum and maximum number of cryptocurrencies in a combination.
    target_combinations = 10: Set the target number of stationary combinations to find.
    found_combinations = 0: Initialize a counter for the number of found stationary combinations.
    new_series_info = []: Create an empty list to store information about the new time series and combined series.

Iteration Over Number of Coins in a Combination:

    The outer loop iterates over different numbers of cryptocurrencies in a combination (num_coins) from min_coins to max_coins.
    Inside this loop, there is an inner loop that iterates over all combinations of cryptocurrencies (crypto_combination) for the given number of coins.

Cointegration Test:

    The code uses the coint_johansen function to perform a Johansen cointegration test on the logarithmic returns of the cryptocurrencies in the current combination.
    It checks whether the eigenvalue statistic is greater than the critical value at a 5% significance level, and it uses the is_cointegrated_cadf function to further check for cointegration.

ADF Test for Stationarity:

    If the combination is found to be cointegrated, the code calculates the weights for the linear combination using the eigenvector from the Johansen test.
    It then calculates the combined time series (combined_series) by multiplying the market data of the cryptocurrencies by their respective weights.
    An Augmented Dickey-Fuller (ADF) test is performed on the combined series to check for stationarity.

Display Results and Store Information:

    If the combined series is stationary, the code increments the counter (found_combinations), calculates the half-life and Hurst exponent, and stores information about the combination in the new_series_info list.

Display Found Stationary Combinations:

    After the iterations, the code prints information about the found stationary combinations, including the combination, weights, ADF statistic, p-value, Hurst exponent, and half-life.

In [10]:
min_coins = 3
max_coins = 4

target_combinations = 10
found_combinations = 0

new_series_info = []

for num_coins in range(min_coins, max_coins + 1):
    print(f'\nTesting combinations of {num_coins} cryptocurrencies:')
    
    for crypto_combination in combinations(cryptos, num_coins):
        result = coint_johansen(np.log(market_data_4h[list(crypto_combination)]).diff().dropna(), det_order=1, k_ar_diff=1)
        
        eigen_statistic = result.lr2
        crit_vals = result.cvt

        if eigen_statistic[0] > crit_vals[1, 1] and is_cointegrated_cadf(market_data_4h[list(crypto_combination)]):
            
            weights = result.evec[:, 0] / np.sum(result.evec[:, 0])
            
            combined_series = np.dot(market_data_4h[list(crypto_combination)].values, weights)
            
            adf_result = adfuller(combined_series)
            # print(f'Results for ADF test on the entire combination:')
            # print(f'ADF Statistic: {adf_result[0]}')
            # print(f'P-Value: {adf_result[1]}')
            
            if adf_result[1] < 0.05:
                print(f'The entire combination is stationary.')
                
                found_combinations += 1
                
                hl = half_life(combined_series)
                hurst = hurst_RS(combined_series)
                
                new_series_info.append({
                    'combination': crypto_combination,
                    'weights': weights,
                    'combined_series': combined_series,
                    'adf_stat': adf_result[0],
                    'p_value': adf_result[1],
                    'half_life': hl,
                    'hurst': hurst,
                })
                
                if found_combinations == target_combinations:
                    break  

    if found_combinations == target_combinations:
        break  

for i, series_info in enumerate(new_series_info):
    combination_str = ' + '.join([f'{coeff:.2f}*{coin}' for coeff, coin in zip(series_info['weights'], series_info['combination'])])
    print(f"Found stationary series: {combination_str}")
    print(f"ADF Statistic: {series_info['adf_stat']}")
    print(f"P-value: {series_info['p_value']}")
    print(f"Hurst Exponent: {series_info['hurst']}")
    print(f"Half-life: {series_info['half_life']}")
    
    # # Plot the combined series
    # plt.figure(figsize=(10, 6))
    # plt.plot(series_info['combined_series'], label=f"Combined Series {i + 1}")
    # plt.title(f"Stationary Combined Series: {combination_str}")
    # plt.xlabel("Time")
    # plt.ylabel("Value")
    # plt.legend()
    # plt.show()



Testing combinations of 3 cryptocurrencies:
The entire combination is stationary.
The entire combination is stationary.
The entire combination is stationary.
The entire combination is stationary.
The entire combination is stationary.
The entire combination is stationary.
The entire combination is stationary.
The entire combination is stationary.
The entire combination is stationary.
The entire combination is stationary.
Found stationary series: 15.80*BTC-USD + 3.88*ETH-USD + -18.68*WBTC-USD
ADF Statistic: -3.1743220307480575
P-value: 0.02151996939725924
Hurst Exponent: 0.004921051167802987
Half-life: 1.7852801154292661
Found stationary series: 0.22*BTC-USD + 1.00*USDT-USD + -0.22*WBTC-USD
ADF Statistic: -4.268228442169944
P-value: 0.0005039811001694201
Hurst Exponent: 0.005208743318302898
Half-life: 1.4496635253610424
Found stationary series: 17.73*BTC-USD + 2.09*BNB-USD + -18.82*WBTC-USD
ADF Statistic: -4.3008589198716205
P-value: 0.00044269345752254115
Hurst Exponent: 0.005078505612