In [1]:
from warnings import filterwarnings

filterwarnings("ignore")

import pandas as pd
import numpy as np

from statsmodels.tsa.stattools import grangercausalitytests
import matplotlib.pyplot as plt
from itertools import permutations
from tqdm import tqdm

from quant_invest_lab.utils import get_cac40_symbols
from quant_invest_lab.data_provider import build_multi_crypto_dataframe, download_crypto_historical_data

In [2]:
MININUM_HISTORY_YEAR = 1
N = 365
# get_list_of_symbols()
symbols = set(
    [
        "OAS-USDT", "NEO-BTC", "KNC-USDT", "LYM-USDT", "NIM-ETH",
        "TON-USDT", "CFX-ETH", "WAX-BTC", "DOGE-USDC", "CWAR-USDT", "ICPUP-USDT", "AAVE-USDT", "EWT-KCS",
        "ROSE-USDT",
        "AVAX-USDT",
        "UNI-USDT",
        "ATOM-USDT",
        "ICP-USDT",
        "THETA-USDT",
        "VET-USDT",
        "DOGE3S-USDT",
        "BNB-USDT",
        "NEAR-BTC",
        "BTC-USDT",
        "NEAR-USDT",
        "SOLVE-BTC",
        "ETH-USDT",
        "KDA-USDT",
        "SOL-USDT",
        "EGLD-USDT",
        "ALGO-USDT",
        "FTM-USDT",
        "AXS-USDT",
        "ADA-USDT",
        "NKN-USDT",
        "LOOM-BTC",
        "CUSD-USDT",
        "LTC3L-USDT",
        "XPR-USDT",
    ]
)


closes = build_multi_crypto_dataframe(symbols)

cols_to_remove = [
    col
    for col, nans in closes.isna().sum().items()
    if len(closes) - nans < int(N * MININUM_HISTORY_YEAR)
]


closes.drop(columns=cols_to_remove, inplace=True)
returns = closes.pct_change().dropna()
log_returns = np.log(closes).pct_change().dropna()
returns.head()

Unnamed: 0_level_0,LOOM-BTC_Close,ROSE-USDT_Close,VET-USDT_Close,AVAX-USDT_Close,SOLVE-BTC_Close,UNI-USDT_Close,ALGO-USDT_Close,THETA-USDT_Close,ETH-USDT_Close,ATOM-USDT_Close,FTM-USDT_Close,ADA-USDT_Close,BTC-USDT_Close,BNB-USDT_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-03-18,0.210909,-0.113455,0.011595,0.0154,0.285714,-0.041741,-0.024277,0.071096,-0.025642,0.056637,-0.066114,-0.110428,-0.020839,-0.031226
2021-03-19,-0.063063,-0.03522,0.043658,0.123951,0.0,0.110345,-0.00779,-0.022681,0.018789,0.005138,0.039372,0.053323,0.00651,0.006515
2021-03-20,-0.019231,-0.002097,0.1059,-0.084177,-0.146538,-0.039371,-0.032927,0.012084,-0.00178,-0.059096,0.030256,-0.072942,0.001539,0.004986
2021-03-21,0.006536,0.167348,0.046807,-0.042039,0.056604,0.026449,-0.021953,0.151563,-0.01269,-0.01849,-0.000211,-0.011763,-0.013135,0.001433
2021-03-22,0.061688,-0.13601,-0.057433,-0.107483,-0.028571,-0.000113,-0.066576,0.044366,-0.057339,-0.065317,-0.074746,-0.071503,-0.056741,-0.036593


In [3]:


closes = closes.dropna()
closes_standard = (closes - closes.mean()) / closes.std()


def get_max_pvalue(
    timeseries: pd.DataFrame, perm: list[str], max_lag: int = 10
) -> tuple[str, float, int]:
    """Return the max p-value and lag for a given timeseries permutation of 2 with the Granger Causality Test

    Args:
        timeseries (pd.DataFrame): The timeseries at least 2 columns.
        perm (list[str]): The permutation of 2 columns to test.
        max_lag (int): The max lag to test. Defaults to 10.

    Returns:
        tuple[str, float, int]: The permutation tested, the max p-value and the lag.
    """
    assert len(perm) == 2, "Only 2 columns timeseries are supported"
    assert all([col in timeseries.columns for col in perm]), "Columns not in timeseries"
    assert max_lag > 0 and max_lag < len(
        timeseries
    ), "max_lag must be greater than 0 and less than the timeseries length"

    gct_res = grangercausalitytests(timeseries[perm], max_lag, verbose=False)
    return ",".join(perm), *min(
        [(v[0]["ssr_ftest"][1], v[0]["ssr_ftest"][-1]) for v in gct_res.values()],
        key=lambda val: val[0],
    )


p_vals = []
for perm in tqdm(map(list, permutations(closes_standard.columns.tolist(), 2))):
    p_vals.append(get_max_pvalue(closes_standard, perm, 20))


p_vals_filtered = filter(lambda x: x[1] < 0.05, p_vals)
p_vals_filtered_high_lags = filter(lambda x: x[-1] >= 2, p_vals_filtered)
sorted_p_vals = sorted(p_vals_filtered_high_lags, key=lambda x: x[1], reverse=True)

sorted_p_vals

182it [02:37,  1.16it/s]


[('LOOM-BTC_Close,THETA-USDT_Close', 0.04828110402928003, 16),
 ('ALGO-USDT_Close,LOOM-BTC_Close', 0.047886884841091104, 4),
 ('THETA-USDT_Close,AVAX-USDT_Close', 0.04725658826275443, 11),
 ('AVAX-USDT_Close,UNI-USDT_Close', 0.045776620889966306, 5),
 ('FTM-USDT_Close,THETA-USDT_Close', 0.04558798614084004, 15),
 ('ETH-USDT_Close,BNB-USDT_Close', 0.04229745480322453, 13),
 ('ROSE-USDT_Close,ADA-USDT_Close', 0.04153009722587432, 10),
 ('BTC-USDT_Close,ADA-USDT_Close', 0.04131019429004342, 7),
 ('ATOM-USDT_Close,SOLVE-BTC_Close', 0.038290381726750825, 13),
 ('BTC-USDT_Close,LOOM-BTC_Close', 0.03434807848460262, 17),
 ('ADA-USDT_Close,LOOM-BTC_Close', 0.033937656972443275, 4),
 ('LOOM-BTC_Close,BTC-USDT_Close', 0.03372394635860658, 6),
 ('BTC-USDT_Close,SOLVE-BTC_Close', 0.03294269425856433, 19),
 ('LOOM-BTC_Close,AVAX-USDT_Close', 0.03256285867605351, 2),
 ('BTC-USDT_Close,AVAX-USDT_Close', 0.03220351055950523, 18),
 ('UNI-USDT_Close,LOOM-BTC_Close', 0.030327452171089418, 15),
 ('BTC-USD