In [5]:
# Import necessary libraries
import numpy as np
import pandas as pd
import yfinance as yf
from statsmodels.tsa.stattools import coint

# Collect data for NSE stocks
tickers = ["INFY.NS", "TCS.NS", "HDFCBANK.NS", "ICICIBANK.NS"]
data = yf.download(tickers, period="5y", interval="1d", group_by="ticker")
data.to_csv("5_years.csv")
# Clean and preprocess data
data = data["Close"].dropna()
data = data.apply(lambda x: np.log(x))

# Pair selection
pairs = []
for i in range(len(data.columns)):
    for j in range(i+1, len(data.columns)):
        s1 = data.iloc[:,i]
        s2 = data.iloc[:,j]
        score, pvalue, _ = coint(s1, s2)
        if pvalue < 0.05:
            pairs.append((s1.name, s2.name))

# Spread calculation
spread_data = pd.DataFrame()
for pair in pairs:
    s1 = data[pair[0]]
    s2 = data[pair[1]]
    model = np.polyfit(s1, s2, 1)
    spread = s2 - (model[0] * s1 + model[1])
    spread_data[pair] = spread

# Strategy implementation
signals = pd.DataFrame(np.zeros_like(spread_data), index=spread_data.index, columns=spread_data.columns)
for pair in pairs:
    spread = spread_data[pair]
    mean = np.mean(spread)
    std = np.std(spread)
    signals[pair] = np.where(spread < mean - 0.5*std, 1, np.where(spread > mean + 0.5*std, -1, 0))

# Backtesting
returns = pd.DataFrame(np.zeros_like(signals), index=signals.index, columns=signals.columns)
for pair in pairs:
    s1 = data[pair[0]]
    s2 = data[pair[1]]
    position = signals[pair].shift(1)
    returns[pair] = position * (s2 - (model[0] * s1 + model[1])) / s2
returns = returns.sum


[*********************100%***********************]  4 of 4 completed


KeyError: 'Close'

In [11]:
# Import necessary libraries
import numpy as np
import pandas as pd
import yfinance as yf
import os
from statsmodels.tsa.stattools import coint

# Collect data for NSE stocks
tickers = ["INFY.NS", "TCS.NS", "HDFCBANK.NS", "ICICIBANK.NS"]
try:
    if os.path.isfile('5_years.csv'):
        data=pd.read_csv('5_years.csv')
        data = data.stack(level=0).rename_axis(['Date', 'Ticker']).reset_index(level=1)
    else:
        data = yf.download(tickers, period="5y", interval="1d", group_by="ticker")
        data = data.stack(level=0).rename_axis(['Date', 'Ticker']).reset_index(level=1)
  
except:
    print("Error: Failed to download data")

# Clean and preprocess data
try:
    data = data["Close"].dropna()
    data = data.apply(lambda x: np.log(x))
except:
    print("Error: Failed to clean and preprocess data")

# Pair selection
pairs = []
for i in range(len(data.columns)):
    for j in range(i+1, len(data.columns)):
        s1 = data.iloc[:,i]
        s2 = data.iloc[:,j]
        score, pvalue, _ = coint(s1, s2)
        if pvalue < 0.05:
            pairs.append((s1.name, s2.name))

# Spread calculation
spread_data = pd.DataFrame()
for pair in pairs:
    s1 = data[pair[0]]
    s2 = data[pair[1]]
    model = np.polyfit(s1, s2, 1)
    spread = s2 - (model[0] * s1 + model[1])
    spread_data[pair] = spread

# Strategy implementation
signals = pd.DataFrame(np.zeros_like(spread_data), index=spread_data.index, columns=spread_data.columns)
for pair in pairs:
    spread = spread_data[pair]
    mean = np.mean(spread)
    std = np.std(spread)
    signals[pair] = np.where(spread < mean - 0.5*std, 1, np.where(spread > mean + 0.5*std, -1, 0))

# Backtesting
returns = pd.DataFrame(np.zeros_like(signals), index=signals.index, columns=signals.columns)
for pair in pairs:
    s1 = data[pair[0]]
    s2 = data[pair[1]]
    position = signals[pair].shift(1)
    returns[pair] = position * (s2 - (model[0] * s1 + model[1])) / s2
returns = returns.sum()


In [None]:
!pip install yfinance