In [67]:
# imports 
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from pytz import timezone
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from pandas.plotting import register_matplotlib_converters
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from hurst import compute_Hc
register_matplotlib_converters()


In [100]:
def calculate_half_life(series):
    # Add a small constant to series
    series = series + 1e-10

    delta_series = np.log(series).diff().dropna()
    lagged_series = delta_series.shift(1).dropna()
    lagged_series = sm.add_constant(lagged_series)
    delta_series_lagged = delta_series[1:]  # Make sure delta_series_lagged is the same length as lagged_series

    # Check if delta_series_lagged and lagged_series are not empty
    if delta_series_lagged.size > 0 and lagged_series.size > 0:
        model = sm.OLS(delta_series_lagged, lagged_series)
        res = model.fit()

        # Calculate half-life
        half_life = -np.log(2) / res.params['const']
        return half_life
    else:
        return "Cannot calculate half-life because the series is empty."


In [69]:

# Function to calculate Hurst exponent
def hurst_exponent(ts):
    lags = range(2, 20)
    tau = [np.std(np.subtract(ts[lag:], ts[:-lag])) for lag in lags]
    hurst = np.polyfit(np.log(lags), np.log(tau), 1)[0]
    return hurst


In [70]:
def download_4h_data(symbols, start_date, end_date):

    try:
        data = yf.download(symbols, start=start_date, end=end_date, interval='1h')['Close']
        # Resample to 4-hour intervals
        data = data.resample('4h').last()
    except: 
        SyntaxError("Exception in downloading data")
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data.dropna(inplace=True)
    # data = data.reset_index()
    # timestamp_column_name = data.columns[0]  # Assuming the timestamp is the first column
    # data = data[[timestamp_column_name, 'Close']]
    # data.columns = ['timestamp', 'price']
    # print(symbol, start_date, end_date, interval, data.shape[0])
    return data

In [71]:
def find_cointegration(cryptos, start_date, end_date,counter, res_str=""):
    data = download_4h_data(cryptos, start_date=start_date, end_date=end_date)
    
    result = coint_johansen(data.values, det_order=0, k_ar_diff=1)
    
    eigenvectors = result.evec
    # print(eigenvectors)
    for i in range(eigenvectors.shape[1]):
        # print('**')
        stationary_series = np.dot(data.values, eigenvectors[:, i])
        result = adfuller(stationary_series)
        if result[1] < 0.05:
            hurst = hurst_exponent(stationary_series)
            half_life = calculate_half_life(pd.Series(stationary_series))
            print(*cryptos )
            print(*eigenvectors[:, i])
            print(f'P-Value: {result[1]}')
            print(f'Hurst Exponent: {hurst}')
            print(f'Half-Life: {half_life}')
            print('-------------------------------------------------------------------------------------------------------')
            res_str += str(cryptos).replace('[','').replace(']','') +'\n'
            res_str +=str(eigenvectors[:, i]).replace('[','').replace(']','') + '\n'
            res_str += "----\n"
            counter +=1
    return res_str, counter

In [75]:
res_str = ""
cryptos = ['BTC-USD', 'ETH-USD', 'USDT-USD', 'SOL-USD', 'USDC-USD', 'STETH-USD', 'BNB-USD', 'XRP-USD', 'DOGE-USD']  
    start_date = datetime(2022, 11, 1, tzinfo=timezone('Asia/Tehran'))
    end_date = datetime(2023, 11, 1, tzinfo=timezone('Asia/Tehran'))
counter = 0
i=0    
j=i+1
cryptos_len = len(cryptos)
while( counter <10):
    crypto_group = [cryptos[i], cryptos[j]]
    res_str, counter = find_cointegration( crypto_group, start_date, end_date, counter, res_str)     
    j += 1
    if j==cryptos_len:
        i +=1
        j =i+1

    

[*********************100%%**********************]  2 of 2 completed
[*********************100%%**********************]  2 of 2 completed
BTC-USD USDT-USD
1.646426037523512e-05 -975.2392646241341
P-Value: 4.419942616456486e-05
Hurst Exponent: 0.202684333529463
Half-Life: Cannot calculate half-life because the series is empty.
-------------------------------------------------------------------------------------------------------
[*********************100%%**********************]  2 of 2 completed
BTC-USD SOL-USD
0.0002514638713570391 -0.23745998211005442
P-Value: 0.0021971809980979528
Hurst Exponent: 0.5760562021617769
Half-Life: -1277.8017798570784
-------------------------------------------------------------------------------------------------------
[*********************100%%**********************]  2 of 2 completed
BTC-USD USDC-USD
1.124102972257997e-05 -261.6013552074727
P-Value: 6.4742120866502e-13
Hurst Exponent: 0.20188801313085367
Half-Life: Cannot calculate half-life because t

In [None]:
print(res_str)

In [77]:
file_path = "output.txt"
with open(file_path, 'w') as file:
    file.write(res_str)

In [101]:
find_cointegration(['BTC-USD', 'ETH-USD', 'SOL-USD'],start_date, end_date,0,'')

[*********************100%%**********************]  3 of 3 completed
BTC-USD ETH-USD SOL-USD
0.0004857416180584321 -0.006749029071452546 -0.17347195608095864
P-Value: 0.0007332982345492613
Hurst Exponent: 0.5492012637408994
Half-Life: Cannot calculate half-life because the series is empty.
-------------------------------------------------------------------------------------------------------


("'BTC-USD', 'ETH-USD', 'SOL-USD'\n 0.00048574 -0.00674903 -0.17347196\n----\n",
 1)

In [95]:
new_data = download_4h_data(['BTC-USD', 'ETH-USD', 'SOL-USD'],start_date,end_date)

[*********************100%%**********************]  3 of 3 completed


In [98]:
new_data ['new_price'] = new_data['BTC-USD'] * 0.0004857416180584321 + new_data["ETH-USD"] * (-0.006749029071452546 ) + \
                        new_data['SOL-USD'] * (-0.17347195608095864)

In [99]:
adfuller(new_data['new_price'])[1]

0.0007332982345492455