In [6]:

#%pip install joblib
from joblib import Parallel, delayed,cpu_count
import statsmodels.api as sm
import pandas as pd
import math
from tqdm import tqdm
from statsmodels.tsa.stattools import adfuller




num_cores = max(1, cpu_count() - 1)  # Leave 1 core free

print(num_cores)
stocks = pd.read_csv("C:/Users/adamt/OneDrive - Wirtschaftsuniversität Wien - IT-SERVICES/Desktop/masterThesis/stocks.csv")


result = []

midpoint = len(stocks.columns) // 2 

first_half_stocks = stocks.columns[:midpoint]
second_half_stocks = stocks.columns[midpoint:]

# Function to process one stock pair
def process_pair(stock1, stock2, stocks):
    if (stock1 == stock2) or (stock1 == "Date") or (stock2 == "Date"):
        return None

    # Step 1: OLS of stock1 on stock2
    data = pd.concat([stocks[stock1], stocks[stock2]], axis=1).dropna()  # Drop NaN values
    if data.empty:  # Skip if there's no valid data
        return None

    x = sm.add_constant(data[stock1])  # Add constant after dropping NaNs
    y = data[stock2] 

    try:
        model = sm.OLS(y, x).fit()
    except Exception as e:
        print(f"Error during OLS fit for {stock1} and {stock2}: {e}")
        return None

    ols_pvalue = model.pvalues[1]

    # If no linear relationship, skip
    if ols_pvalue > 0.05 or math.isnan(ols_pvalue): 
        return None

    # Step 2: Dickey-Fuller test on residuals
    residuals = pd.Series(model.resid)

    try:
        adf_pvalue = adfuller(residuals)[1]
    except Exception as e:
        print(f"Error during ADF test for {stock1} and {stock2}: {e}")
        return None

    if adf_pvalue < 0.05:
        return [stock1, stock2]  # Cointegrated pair found

    return None

# Parallel execution
num_cores = -1  # Use all available cores

result = Parallel(n_jobs=num_cores)(
    delayed(process_pair)(stock1, stock2, stocks) 
    for stock1 in tqdm(first_half_stocks) 
        for stock2 in second_half_stocks
)

# Remove None values
result = [pair for pair in result if pair]

result = pd.DataFrame(result)
result.to_csv("C:/Users/adamt/OneDrive - Wirtschaftsuniversität Wien - IT-SERVICES/Desktop/masterThesis/pairs.csv")
result


3





[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A




Unnamed: 0,0,1
0,A,ITW
1,A,J
2,A,JBHT
3,A,JNJ
4,A,KDP
...,...,...
9097,ISRG,WBA
9098,ISRG,WBD
9099,ISRG,WMT
9100,ISRG,WTW
