In [48]:
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import pandas as pd
from itertools import combinations

In [49]:
tickers_1m = {
    "SOL_1m"  : "../Data/1m_Data/SOL_USDC_USDC_1m.csv",
    "ETH_1m"  : "../Data/1m_Data/ETH_USDC_USDC_1m.csv",
    "BTC_1m"  : "../Data/1m_Data/BTC_USDC_USDC_1m.csv",
    "AVAX_1m" : "../Data/1m_Data/AVAX_USDC_USDC_1m.csv",
    "NEAR_1m" : "../Data/1m_Data/NEAR_USDC_USDC_1m.csv",
    "OP_1m"   : "../Data/1m_Data/OP_USDC_USDC_1m.csv",
    "ARB_1m"  : "../Data/1m_Data/ARB_USDC_USDC_1m.csv",
    "LDO_1m"  : "../Data/1m_Data/LDO_USDC_USDC_1m.csv",
    "LINK_1m" : "../Data/1m_Data/LINK_USDC_USDC_1m.csv",
    "AAVE_1m" : "../Data/1m_Data/AAVE_USDC_USDC_1m.csv"
}


tickers_3m = {
    "SOL_3m"  : "../Data/3m_Data/SOL_USDC_USDC_3m.csv",
    "ETH_3m"  : "../Data/3m_Data/ETH_USDC_USDC_3m.csv",
    "BTC_3m"  : "../Data/3m_Data/BTC_USDC_USDC_3m.csv",
    "AVAX_3m" : "../Data/3m_Data/AVAX_USDC_USDC_3m.csv",
    "NEAR_3m" : "../Data/3m_Data/NEAR_USDC_USDC_3m.csv",
    "OP_3m"   : "../Data/3m_Data/OP_USDC_USDC_3m.csv",
    "ARB_3m"  : "../Data/3m_Data/ARB_USDC_USDC_3m.csv",
    "LDO_3m"  : "../Data/3m_Data/LDO_USDC_USDC_3m.csv",
    "LINK_3m" : "../Data/3m_Data/LINK_USDC_USDC_3m.csv",
    "AAVE_3m" : "../Data/3m_Data/AAVE_USDC_USDC_3m.csv"
}


tickers_5m = {
    "SOL_5m"  : "../Data/5m_Data/SOL_USDC_USDC_5m.csv",
    "ETH_5m"  : "../Data/5m_Data/ETH_USDC_USDC_5m.csv",
    "BTC_5m"  : "../Data/5m_Data/BTC_USDC_USDC_5m.csv",
    "AVAX_5m" : "../Data/5m_Data/AVAX_USDC_USDC_5m.csv",
    "NEAR_5m" : "../Data/5m_Data/NEAR_USDC_USDC_5m.csv",
    "OP_5m"   : "../Data/5m_Data/OP_USDC_USDC_5m.csv",
    "ARB_5m"  : "../Data/5m_Data/ARB_USDC_USDC_5m.csv",
    "LDO_5m"  : "../Data/5m_Data/LDO_USDC_USDC_5m.csv",
    "LINK_5m" : "../Data/5m_Data/LINK_USDC_USDC_5m.csv",
    "AAVE_5m" : "../Data/5m_Data/AAVE_USDC_USDC_5m.csv"
}


tickers_15m = {
    "SOL_15m"  : "../Data/15m_Data/SOL_USDC_USDC_15m.csv",
    "ETH_15m"  : "../Data/15m_Data/ETH_USDC_USDC_15m.csv",
    "BTC_15m"  : "../Data/15m_Data/BTC_USDC_USDC_15m.csv",
    "AVAX_15m" : "../Data/15m_Data/AVAX_USDC_USDC_15m.csv",
    "NEAR_15m" : "../Data/15m_Data/NEAR_USDC_USDC_15m.csv",
    "OP_15m"   : "../Data/15m_Data/OP_USDC_USDC_15m.csv",
    "ARB_15m"  : "../Data/15m_Data/ARB_USDC_USDC_15m.csv",
    "LDO_15m"  : "../Data/15m_Data/LDO_USDC_USDC_15m.csv",
    "LINK_15m" : "../Data/15m_Data/LINK_USDC_USDC_15m.csv",
    "AAVE_15m" : "../Data/15m_Data/AAVE_USDC_USDC_15m.csv"
}

tickers_1h = {
    "SOL_1h"  : "../Data/1h_Data/SOL_USDC_USDC_1h.csv",
    "ETH_1h"  : "../Data/1h_Data/ETH_USDC_USDC_1h.csv",
    "BTC_1h"  : "../Data/1h_Data/BTC_USDC_USDC_1h.csv",
    "AVAX_1h" : "../Data/1h_Data/AVAX_USDC_USDC_1h.csv",
    "NEAR_1h" : "../Data/1h_Data/NEAR_USDC_USDC_1h.csv",
    "OP_1h"   : "../Data/1h_Data/OP_USDC_USDC_1h.csv",
    "ARB_1h"  : "../Data/1h_Data/ARB_USDC_USDC_1h.csv",
    "LDO_1h"  : "../Data/1h_Data/LDO_USDC_USDC_1h.csv",
    "LINK_1h" : "../Data/1h_Data/LINK_USDC_USDC_1h.csv",
    "AAVE_1h" : "../Data/1h_Data/AAVE_USDC_USDC_1h.csv"
}

tickers_4h = {
    "SOL_4h"  : "../Data/4h_Data/SOL_USDC_USDC_4h.csv",
    "ETH_4h"  : "../Data/4h_Data/ETH_USDC_USDC_4h.csv",
    "BTC_4h"  : "../Data/4h_Data/BTC_USDC_USDC_4h.csv",
    "AVAX_4h" : "../Data/4h_Data/AVAX_USDC_USDC_4h.csv",
    "NEAR_4h" : "../Data/4h_Data/NEAR_USDC_USDC_4h.csv",
    "OP_4h"   : "../Data/4h_Data/OP_USDC_USDC_4h.csv",
    "ARB_4h"  : "../Data/4h_Data/ARB_USDC_USDC_4h.csv",
    "LDO_4h"  : "../Data/4h_Data/LDO_USDC_USDC_4h.csv",
    "LINK_4h" : "../Data/4h_Data/LINK_USDC_USDC_4h.csv",
    "AAVE_4h" : "../Data/4h_Data/AAVE_USDC_USDC_4h.csv"
}


In [50]:
ticker_map = {
    "1m": tickers_1m,
    "3m": tickers_3m,
    "5m": tickers_5m,
    "15m": tickers_15m,
    "1h" : tickers_1h,
    "4h" : tickers_4h
}

In [None]:
timeframe = "4h"
tickers = ticker_map[timeframe]

results = []

combination_of_tickers = combinations(tickers.items(),2)

for (sym1,path1) , (sym2,path2) in combination_of_tickers: 
    try: 
        df1 = pd.read_csv(path1)
        df2 = pd.read_csv(path2)

        close1 = df1['close']
        close2 = df2['close']

        min_len = min(len(close1),len(close2))
        close1 = close1[:min_len]
        close2 = close2[:min_len]


        prices = np.column_stack([close1,close2])

        prices_centered = prices - prices.mean(axis=0)


        U,S,Vt = np.linalg.svd(prices_centered,full_matrices=False)
        principal_vector_one = Vt[0]
        principal_vector_two = Vt[1]


        singular_vals_squared = [v**2 for v in S]
        n = prices.shape[0]
        lambda_1 = singular_vals_squared[0]/ (n-1)
        lambda_2 = singular_vals_squared[1]/(n-1)
        total_var = lambda_1 + lambda_2
        lambda_1_pct = (lambda_1 / total_var) * 100
        lambda_2_pct = (lambda_2 / total_var) * 100

        projecting_onto_pc2 = prices_centered @ principal_vector_two


        adf_result = adfuller(projecting_onto_pc2)
        p_value = adf_result[1]

        results.append({
            "Pair": f"{sym1.split('_')[0]}–{sym2.split('_')[0]}",
            "λ1 %": round(lambda_1_pct, 2),
            "λ2 %": round(lambda_2_pct, 2),
            "ADF p-value": round(p_value, 5)
        })

    except Exception as e:
        print(f"Error with {sym1} and {sym2}: {e}")

In [52]:
results_df = pd.DataFrame(results)

results_df = results_df.sort_values(by="ADF p-value")

results_df.to_csv(f"../Data/results/pca_adf_results_{timeframe}.csv",index=False)