### Importing Packages & Downloading Data

In [45]:
import os
import yfinance as yf
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import coint

tickers = [
    "AMD", "NVDA", "AVGO", "MU", "INTC", "TSM", "QCOM", "MRVL", "ADI", "NET",
    "SNOW", "CRM", "AMZN", "MSFT", "GOOGL", "DDOG", "ESTC", "DOCN", "PLTR", "MDB", 
    "OKTA", "CRWD", "ZS", "PANW", "S", "FTNT", "CYBR", "APP", "U",
    "TTD", "ROKU", "PINS", "SNAP", "PUBM", "CFLT", "GTLB", "FROG", "PD",
    "ORCL"
]

#"NBIS"
data = yf.download(tickers, period="2y")[['Close']]

# Convert to log prices
log_prices = np.log(data)
log_prices.to_csv("log_price_data.csv")
n_stocks = len(tickers)
pvalue_matrix = pd.DataFrame(index=tickers, columns=tickers)

log_prices_flat = log_prices.droplevel(0, axis=1)
log_prices_flat.to_csv("log_prices_flat.csv")

  data = yf.download(tickers, period="2y")[['Close']]
[*********************100%***********************]  39 of 39 completed


In [46]:
# Before running cointegration tests, check data quality
print("Data shape:", log_prices_flat.shape)
print("Any NaN values:", log_prices_flat.isnull().sum())
print("Any inf values:", np.isinf(log_prices_flat).sum())



Data shape: (502, 39)
Any NaN values: Ticker
ADI      0
AMD      0
AMZN     0
APP      0
AVGO     0
CFLT     0
CRM      0
CRWD     0
CYBR     0
DDOG     0
DOCN     0
ESTC     0
FROG     0
FTNT     0
GOOGL    0
GTLB     0
INTC     0
MDB      0
MRVL     0
MSFT     0
MU       0
NET      0
NVDA     0
OKTA     0
ORCL     0
PANW     0
PD       0
PINS     0
PLTR     0
PUBM     0
QCOM     0
ROKU     0
S        0
SNAP     0
SNOW     0
TSM      0
TTD      0
U        0
ZS       0
dtype: int64
Any inf values: Ticker
ADI      0
AMD      0
AMZN     0
APP      0
AVGO     0
CFLT     0
CRM      0
CRWD     0
CYBR     0
DDOG     0
DOCN     0
ESTC     0
FROG     0
FTNT     0
GOOGL    0
GTLB     0
INTC     0
MDB      0
MRVL     0
MSFT     0
MU       0
NET      0
NVDA     0
OKTA     0
ORCL     0
PANW     0
PD       0
PINS     0
PLTR     0
PUBM     0
QCOM     0
ROKU     0
S        0
SNAP     0
SNOW     0
TSM      0
TTD      0
U        0
ZS       0
dtype: int64


### Pairwise Cointegration Testing

In [49]:
def test_pairs(log_prices_flat: pd.DataFrame, tickers: list) -> pd.DataFrame:
    for i, stock1 in enumerate(tickers):
        for j, stock2 in enumerate(tickers):
            if i != j:
                prices1 = log_prices_flat[stock1]
                prices2 = log_prices_flat[stock2]
                _, pvalue, _ = coint(prices1, prices2)
                pvalue_matrix.loc[stock1, stock2] = pvalue
            else:
                pvalue_matrix.loc[stock1, stock2] = 0  

    print(pvalue_matrix)
    # Save p-value matrix to CSV
    pvalue_matrix.to_csv("cointegration_pvalues.csv")

test_pairs(log_prices_flat, tickers)

            AMD      NVDA      AVGO        MU      INTC       TSM      QCOM  \
AMD           0  0.678008  0.639085  0.668555  0.555618  0.669126  0.727116   
NVDA   0.874178         0  0.672447  0.664363  0.198364  0.009698  0.908462   
AVGO   0.972597  0.793987         0  0.873251  0.446545   0.48653  0.990857   
MU     0.592717  0.534297   0.51105         0  0.523763  0.468117  0.767859   
INTC    0.71109  0.164056  0.258576  0.621728         0  0.281927  0.660254   
TSM    0.926055  0.010871  0.391659  0.730993  0.363439         0   0.98372   
QCOM   0.510407  0.565665  0.638118  0.505423  0.368831  0.660706         0   
MRVL   0.708702  0.613558  0.806941  0.785534  0.412944  0.706049  0.541328   
ADI    0.750899  0.023956  0.041382  0.129671  0.180912  0.011388  0.736013   
NET    0.979855  0.944503   0.36125  0.933107  0.924631  0.844097  0.988659   
SNOW   0.785844   0.78452  0.689287  0.781291  0.807809  0.768301   0.70286   
CRM    0.585388  0.688567  0.754851  0.710244  0.465

### Low P-Value Cointegration Scores

In [52]:
def find_cointegrated_pairs(csv_file: str, significance_level: float = 0.15):
    """Find and display significantly cointegrated stock pairs."""
    p_value_matrix = pd.read_csv(csv_file, index_col=0)
    stocks = p_value_matrix.columns.tolist()
    
    significant_pairs = []
    for i in range(len(stocks)):
        for j in range(i + 1, len(stocks)):
            p_val = p_value_matrix.iloc[i, j]
            if p_val < significance_level:
                significant_pairs.append((stocks[i], stocks[j], p_val))
    
    # Sort and display
    significant_pairs.sort(key=lambda x: x[2])
    for stock1, stock2, p_val in significant_pairs:
        print(f"{stock1}-{stock2}: {p_val:.4f}")
    
    return significant_pairs

# Usage
pairs = find_cointegrated_pairs("cointegration_pvalues.csv")

NVDA-TSM: 0.0097
TSM-AMZN: 0.0122
AVGO-CYBR: 0.0162
PLTR-PD: 0.0276
ADI-GOOGL: 0.0288
PANW-CYBR: 0.0334
PANW-APP: 0.0338
PANW-PD: 0.0413
MDB-PD: 0.0467
ADI-MSFT: 0.0506
ESTC-SNAP: 0.0573
ADI-PANW: 0.0585
NVDA-ADI: 0.0602
PD-ORCL: 0.0615
CFLT-GTLB: 0.0626
ESTC-CFLT: 0.0626
CFLT-ORCL: 0.0641
PANW-FTNT: 0.0648
AMZN-CYBR: 0.0737
PANW-ORCL: 0.0772
ESTC-PD: 0.0778
DOCN-S: 0.0792
TSM-ADI: 0.0793
S-GTLB: 0.0819
PINS-CFLT: 0.0837
ESTC-GTLB: 0.0850
CFLT-PD: 0.0853
PINS-PD: 0.0875
DOCN-GTLB: 0.0896
ADI-ORCL: 0.0916
PINS-ORCL: 0.0936
SNAP-PD: 0.0949
TSM-PANW: 0.0959
CRWD-FROG: 0.0976
AVGO-PD: 0.1021
SNAP-PUBM: 0.1056
ESTC-PUBM: 0.1094
APP-PD: 0.1102
NVDA-AMZN: 0.1128
MU-GOOGL: 0.1146
CFLT-FROG: 0.1164
CYBR-PD: 0.1174
OKTA-CRWD: 0.1188
AVGO-ORCL: 0.1252
MRVL-DDOG: 0.1310
ADI-CYBR: 0.1319
GTLB-PD: 0.1335
ESTC-ORCL: 0.1379
OKTA-TTD: 0.1405
INTC-APP: 0.1408
ESTC-PINS: 0.1414
CRWD-PD: 0.1419
ADI-AMZN: 0.1431
OKTA-FROG: 0.1474
INTC-PD: 0.1477


In [54]:
def find_cointegrated_pairs(csv_file: str, significance_level: float = 0.15):
    """Find and display significantly cointegrated stock pairs."""
    p_value_matrix = pd.read_csv(csv_file, index_col=0)
    stocks = p_value_matrix.columns.tolist()
    
    significant_pairs = []
    for i in range(len(stocks)):
        for j in range(i + 1, len(stocks)):
            p_val = p_value_matrix.iloc[i, j]
            if p_val < significance_level:
                significant_pairs.append((stocks[i], stocks[j], p_val))
    
    # Sort and display
    significant_pairs.sort(key=lambda x: x[2])
    for stock1, stock2, p_val in significant_pairs:
        print(f"{stock1}-{stock2}: {p_val:.4f}")
    
    return significant_pairs

# Usage
pairs = find_cointegrated_pairs("cointegration_pvalues.csv")

NVDA-TSM: 0.0097
TSM-AMZN: 0.0122
AVGO-CYBR: 0.0162
PLTR-PD: 0.0276
ADI-GOOGL: 0.0288
PANW-CYBR: 0.0334
PANW-APP: 0.0338
PANW-PD: 0.0413
MDB-PD: 0.0467
ADI-MSFT: 0.0506
ESTC-SNAP: 0.0573
ADI-PANW: 0.0585
NVDA-ADI: 0.0602
PD-ORCL: 0.0615
CFLT-GTLB: 0.0626
ESTC-CFLT: 0.0626
CFLT-ORCL: 0.0641
PANW-FTNT: 0.0648
AMZN-CYBR: 0.0737
PANW-ORCL: 0.0772
ESTC-PD: 0.0778
DOCN-S: 0.0792
TSM-ADI: 0.0793
S-GTLB: 0.0819
PINS-CFLT: 0.0837
ESTC-GTLB: 0.0850
CFLT-PD: 0.0853
PINS-PD: 0.0875
DOCN-GTLB: 0.0896
ADI-ORCL: 0.0916
PINS-ORCL: 0.0936
SNAP-PD: 0.0949
TSM-PANW: 0.0959
CRWD-FROG: 0.0976
AVGO-PD: 0.1021
SNAP-PUBM: 0.1056
ESTC-PUBM: 0.1094
APP-PD: 0.1102
NVDA-AMZN: 0.1128
MU-GOOGL: 0.1146
CFLT-FROG: 0.1164
CYBR-PD: 0.1174
OKTA-CRWD: 0.1188
AVGO-ORCL: 0.1252
MRVL-DDOG: 0.1310
ADI-CYBR: 0.1319
GTLB-PD: 0.1335
ESTC-ORCL: 0.1379
OKTA-TTD: 0.1405
INTC-APP: 0.1408
ESTC-PINS: 0.1414
CRWD-PD: 0.1419
ADI-AMZN: 0.1431
OKTA-FROG: 0.1474
INTC-PD: 0.1477


In [60]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen

cybersecurity_symbols = ['AVGO', 'CYBR', 'PANW', 'APP', 'PD']
basket_data = log_price_flat[cybersecurity_symbols].dropna()

result = coint_johansen(basket_data.values, det_order=0, k_ar_diff=1)
rank = sum(result.lr1 > result.cvt[:, 1])  # Compare trace stats to 95% critical values

print(f"Cointegration rank: {rank}")
print("Cointegrated" if rank > 0 else "Not cointegrated")

Cointegration rank: 0
Not cointegrated
