In [None]:
nse_futures_tickers = [
    "ACC.NS", "ADANIENT.NS", "ADANIPORTS.NS", "AMBUJACEM.NS", "APOLLOHOSP.NS",
    "ASHOKLEY.NS", "ASIANPAINT.NS", "AUROPHARMA.NS", "AXISBANK.NS", "BAJAJ-AUTO.NS",
    "BAJAJFINSV.NS", "BAJFINANCE.NS", "BALKRISIND.NS", "BALRAMCHIN.NS", "BANDHANBNK.NS",
    "BANKBARODA.NS", "BEL.NS", "BERGEPAINT.NS", "BHARATFORG.NS", "BHARTIARTL.NS",
    "BHEL.NS", "BIOCON.NS", "BOSCHLTD.NS", "BPCL.NS", "BRITANNIA.NS",
    "CHOLAFIN.NS", "CIPLA.NS", "COALINDIA.NS", "COFORGE.NS", "COLPAL.NS",
    "CONCOR.NS", "CUB.NS", "CUMMINSIND.NS", "DABUR.NS", "DALBHARAT.NS",
    "DEEPAKNTR.NS", "DIVISLAB.NS", "DLF.NS", "DRREDDY.NS", "EICHERMOT.NS",
    "ESCORTS.NS", "EXIDEIND.NS", "FEDERALBNK.NS", "GAIL.NS", "GLENMARK.NS",
    "GMRINFRA.NS", "GODREJCP.NS", "GRANULES.NS", "GRASIM.NS", "GUJGASLTD.NS",
    "HAVELLS.NS", "HCLTECH.NS", "HDFCBANK.NS", "HDFCLIFE.NS",
    "HEROMOTOCO.NS", "HINDALCO.NS", "HINDCOPPER.NS", "HINDPETRO.NS", "HINDUNILVR.NS",
    "IBULHSGFIN.NS", "ICICIBANK.NS", "ICICIGI.NS", "ICICIPRULI.NS", "IDEA.NS",
    "IDFCFIRSTB.NS", "IEX.NS", "IGL.NS", "INDHOTEL.NS", "INDIACEM.NS",
    "INDIAMART.NS", "INDIGO.NS", "INDUSINDBK.NS", "INDUSTOWER.NS", "INFY.NS",
    "INTELLECT.NS", "IOC.NS", "IRCTC.NS", "ITC.NS", "JINDALSTEL.NS",
    "JSWSTEEL.NS", "JUBLFOOD.NS", "KOTAKBANK.NS", "L&TFH.NS", "LALPATHLAB.NS",
    "LAURUSLABS.NS", "LICI.NS", "LT.NS", "LTI.NS", "LTTS.NS",
    "LUPIN.NS", "M&M.NS", "M&MFIN.NS", "MANAPPURAM.NS", "MARICO.NS",
    "MARUTI.NS", "MCDOWELL-N.NS", "METROPOLIS.NS", "MFSL.NS", "MGL.NS",
    "MOTHERSON.NS", "MPHASIS.NS", "MRF.NS", "MUTHOOTFIN.NS", "NATIONALUM.NS",
    "NAUKRI.NS", "NAVINFLUOR.NS", "NESTLEIND.NS", "NMDC.NS", "NTPC.NS",
    "OBEROIRLTY.NS", "OFSS.NS", "ONGC.NS", "PAGEIND.NS", "PEL.NS",
    "PERSISTENT.NS", "PETRONET.NS", "PIDILITIND.NS", "PIIND.NS", "PNB.NS",
    "POLYCAB.NS", "POWERGRID.NS", "PVRINOX.NS", "RAMCOCEM.NS", "RBLBANK.NS",
    "RECLTD.NS", "RELIANCE.NS", "SAIL.NS", "SBICARD.NS", "SBILIFE.NS",
    "SBIN.NS", "SHREECEM.NS", "SIEMENS.NS", "SRF.NS", "SHRIRAMFIN.NS",
    "SUNPHARMA.NS", "SUNTV.NS", "SYNGENE.NS", "TATACHEM.NS", "TATACOMM.NS",
    "TATACONSUM.NS", "TATAMOTORS.NS", "TATAPOWER.NS", "TATASTEEL.NS", "TCS.NS",
    "TECHM.NS", "TITAN.NS", "TORNTPHARM.NS", "TRENT.NS", "TVSMOTOR.NS",
    "UBL.NS", "ULTRACEMCO.NS", "UPL.NS", "VEDL.NS", "VOLTAS.NS",
    "WIPRO.NS", "ZEEL.NS"
]


In [None]:
import yfinance as yf
import pandas as pd
from itertools import combinations
from tqdm import tqdm

def get_highly_correlated_pairs(tickers, start_date="2023-07-01", end_date="2025-07-01", threshold=0.75, save_path="correlated_pairs.csv"):
    """
    Calculates correlation for all stock pairs in tickers list.
    Stores pairs with correlation > threshold or < -threshold.
    """
    # Step 1: Download adjusted close prices
    print("Fetching data from Yahoo Finance...")
    data = yf.download(tickers, start=start_date, end=end_date)["Close"]

    print("Calculating correlations...")
    correlated_pairs = []

    # Step 2: Iterate over all combinations of 2 stocks
    for stock1, stock2 in tqdm(combinations(tickers, 2)):
        series1 = data[stock1]
        series2 = data[stock2]

        # Drop NaNs
        df_pair = pd.concat([series1, series2], axis=1).dropna()
        if len(df_pair) < 100:  # Skip pairs with very low overlap
            continue

        corr = df_pair.corr().iloc[0, 1]

        if abs(corr) >= threshold:
            get_viable_pair(df_pair, start_date)
            stock_x, stock_y = get_viable_pair(df_pair, start_date)
            correlated_pairs.append({
                "Stock_Y": stock_y,
                "Stock_X": stock_x,
                "Correlation": round(corr, 4),
                "Sign": "positive" if corr > 0 else "negative"
            })

    # Step 3: Save to CSV
    result_df = pd.DataFrame(correlated_pairs)
    result_df.to_csv(save_path, index=False)
    print(f"Saved {len(result_df)} correlated pairs to {save_path}")
    return result_df


In [None]:
lot_sizes = {
    "AXISBANK.NS": 500, "BANKBARODA.NS": 6500, "FEDERALBNK.NS": 8500, "HDFCBANK.NS": 550,
    "ICICIBANK.NS": 1375, "IDFCFIRSTB.NS": 6500, "INDUSINDBK.NS": 700, "KOTAKBANK.NS": 400,
    "PNB.NS": 10000, "SBIN.NS": 1500, "JIOFIN.NS": 1500, "SBILIFE.NS": 300,
    "BAJFINANCE.NS": 125, "BAJAJFINSV.NS": 250, "CHOLAFIN.NS": 1000, "HDFCAMC.NS": 300,
    "HDFC.NS": 300, "ICICIGI.NS": 500, "ICICIPRULI.NS": 1100, "LICHSGFIN.NS": 1100,
    "MUTHOOTFIN.NS": 300, "PEL.NS": 300, "RECLTD.NS": 5000, "SRTRANSFIN.NS": 425,
    "COFORGE.NS": 200, "HCLTECH.NS": 700, "INFY.NS": 400, "LTIM.NS": 200,
    "MPHASIS.NS": 300, "PERSISTENT.NS": 150, "TCS.NS": 150, "TECHM.NS": 700, "WIPRO.NS": 1600,
    "AUROPHARMA.NS": 850, "BIOCON.NS": 2300, "CIPLA.NS": 650, "DIVISLAB.NS": 200, "DRREDDY.NS": 125,
    "GLAND.NS": 200, "GRANULES.NS": 2200, "IPCALAB.NS": 200, "LAURUSLABS.NS": 800, "LUPIN.NS": 500,
    "SUNPHARMA.NS": 700, "TORNTPHARM.NS": 300, "MANKIND.NS": 300, "FORTIS.NS": 1750, "PPLPHARMA.NS": 550,
    "ADANIGREEN.NS": 650, "ADANIENT.NS": 500, "ADANIPORTS.NS": 1000, "BPCL.NS": 1800,
    "GAIL.NS": 6750, "IOC.NS": 9750, "NTPC.NS": 5700, "ONGC.NS": 3850, "PETRONET.NS": 3000,
    "POWERGRID.NS": 2700, "RELIANCE.NS": 250, "TATAPOWER.NS": 2250,
    "COALINDIA.NS": 2700, "HINDALCO.NS": 2150, "JSWSTEEL.NS": 1500, "NALCO.NS": 7500,
    "NMDC.NS": 3000, "SAIL.NS": 4750, "TATASTEEL.NS": 4250, "VEDL.NS": 2000,
    "ASHOKLEY.NS": 2750, "BAJAJ-AUTO.NS": 125, "BALKRISIND.NS": 400, "BOSCHLTD.NS": 25,
    "EICHERMOT.NS": 175, "HEROMOTOCO.NS": 300, "M&M.NS": 700, "MARUTI.NS": 100,
    "TATAMOTORS.NS": 2850, "TVSMOTOR.NS": 1350, "UNOMINDA.NS": 800,
    "BRITANNIA.NS": 150, "COLPAL.NS": 325, "DABUR.NS": 1000, "GODREJCP.NS": 500,
    "HINDUNILVR.NS": 300, "ITC.NS": 3200, "MARICO.NS": 1300, "NESTLEIND.NS": 25,
    "TATACONSUM.NS": 700, "UBL.NS": 350, "UNITDSPR.NS": 300,
    "ACC.NS": 500, "AMBUJACEM.NS": 2500, "ULTRACEMCO.NS": 150, "GRASIM.NS": 475,
    "SHREECEM.NS": 25, "RAMCOCEM.NS": 600,
    "ABB.NS": 250, "BEL.NS": 2850, "BHEL.NS": 5250, "BHARATFORG.NS": 1000, "IRCTC.NS": 875,
    "L&T.NS": 300, "LTTS.NS": 200, "NBCC.NS": 7500, "SIEMENS.NS": 275, "TATAPROJ.NS": 600,
    "APLAPOLLO.NS": 275, "ANGELONE.NS": 425, "KFIN.NS": 700, "AMBER.NS": 150, "PGELECTRO.NS": 500
}


In [None]:
correlated_df = get_highly_correlated_pairs(nse_futures_tickers, start_date="2023-07-01", end_date="2025-07-01")

Fetching data from Yahoo Finance...


  data = yf.download(tickers, start=start_date, end=end_date)["Close"]
[*********************100%***********************]  156 of 156 completed
ERROR:yfinance:
5 Failed downloads:
ERROR:yfinance:['MCDOWELL-N.NS', 'GMRINFRA.NS', 'LTI.NS', 'IBULHSGFIN.NS', 'L&TFH.NS']: YFTzMissingError('possibly delisted; no timezone found')


Calculating correlations...


12090it [00:31, 382.27it/s]

Saved 2865 correlated pairs to correlated_pairs.csv





In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from itertools import combinations
from datetime import timedelta

# Step 1: Download Data with Validation
def download_data(stocks, start_date, end_date):
    df = yf.download(stocks, start=start_date, end=end_date, progress=False, auto_adjust=False, actions=False, keepna=True)['Close']
    df.dropna(inplace=True)
    df.index = pd.to_datetime(df.index)
    return df

# Step 2: Select viable pair based on lowest error ratio
def get_viable_pair(data, test_start_date):
    one_year_data = data.loc[test_start_date:]
    best_ratio = float('inf')
    best_pair = None
    best_model = None
    for s1, s2 in combinations(data.columns, 2):
        for y, x in [(s1, s2), (s2, s1)]:
            x_data = sm.add_constant(one_year_data[x])
            y_data = one_year_data[y]
            model = sm.OLS(y_data, x_data).fit()
            intercept_se = model.bse.iloc[0]
            residuals = model.resid
            std_err = np.std(residuals)
            ratio = intercept_se / std_err
            if ratio < best_ratio:
                best_ratio = ratio
                best_pair = (x, y)
                best_model = model
    stock_x, stock_y = best_pair
    correlation = one_year_data[stock_x].corr(one_year_data[stock_y])
    sign = np.sign(correlation)
    new_data = pd.DataFrame(columns = ["Stock_X", "Stock_Y", "Correlation", "Sign"])
    new_data["Stock_X"] = stock_x
    new_data["Stock_Y"] = stock_y
    new_data["Correlation"] = correlation
    new_data["Sign"] = sign
    # data = data[[stock_x, stock_y]].copy()
    # data.columns = [f'{stock_x}(X)', f'{stock_y}(Y)', ]
    # data.attrs['Correlation'] = correlation
    # data.attrs['Sign'] = '+' if sign > 0 else '-'
    #return data, stock_x, stock_y, correlation, sign
    return stock_x, stock_y
    #return new_data



# Function 1: Initial full analysis and save to CSV
def generate_initial_pair_trades(df):                 #, csv_path to store it in csv
    col_x, col_y = df.columns[0], df.columns[1]

    df = df[[col_x, col_y]].dropna().copy()
    df.columns = ['X', 'Y']  # for regression only

    logs = []
    position = None
    entry_info = {}

    for i in range(252, len(df)):
        current_day = df.index[i]
        window = df.iloc[i-252:i]
        y = window['Y']
        x = sm.add_constant(window['X'])
        model = sm.OLS(y, x).fit()
        intercept = model.params.iloc[0]
        beta = model.params.iloc[1]
        residuals = model.resid
        std_err_resid = np.std(residuals)

        y_i = df.iloc[i]['Y']
        x_i = df.iloc[i]['X']
        predicted_y = intercept + beta * x_i
        resid_i = y_i - predicted_y
        z_score = resid_i / std_err_resid

        if position is None and (z_score > 2.5 or z_score < -2.5):
            adf_p = adfuller(residuals)[1]
            if adf_p < 0.05:
                position = 'long' if z_score < 0 else 'short'
                entry_info = {
                    'Entry Date': current_day,
                    'Stock Y': col_y,
                    'Stock X': col_x,
                    'ADF_PValue_Entry': adf_p,
                    'ZScore Entry': z_score,
                    'Beta': beta,
                    'Intercept': intercept,
                    'StdErr_Residual': std_err_resid,
                    'Position': position
                }

        elif position is not None:
            if (position == 'long' and (z_score > -1 or z_score < -3)) or (position == 'short' and (z_score < 1 or z_score > 3)):
                adf_p_exit = adfuller(residuals)[1]
                entry_info.update({
                    'Exit Date': current_day,
                    'ADF_PValue_Exit': adf_p_exit,
                    'ZScore Exit': z_score
                })
                logs.append(entry_info)
                position = None
                entry_info = {}

    trades_df = pd.DataFrame(logs)
    # trades_df.to_csv(csv_path, index=False)


    #print(f"Saved {len(trades_df)} entries to {csv_path}.")
    return trades_df


# Function 2: Update existing CSV with new analysis for new dates
def update_pair_trades_csv(stock_x, stock_y, analysis_start_date, analysis_end_date):   #, csv_path to store in csv
    start_date = pd.to_datetime(analysis_start_date)
    end_date = pd.to_datetime(analysis_end_date)
    df = download_data([stock_x, stock_y], start_date, end_date)
    df = df[[stock_x, stock_y]].dropna()
    df.columns = ['X', 'Y']

    logs = []
    position = None
    entry_info = {}

    for i in range(252, len(df)):
        current_day = df.index[i]
        if current_day < start_date or current_day > end_date:
            continue

        window = df.iloc[i-252:i]  # past 1 year
        y = window['Y']
        x = sm.add_constant(window['X'])
        model = sm.OLS(y, x).fit()
        intercept = model.params.iloc[0]
        beta = model.params.iloc[1]
        residuals = model.resid
        std_err_resid = np.std(residuals)

        y_i = df.iloc[i]['Y']
        x_i = df.iloc[i]['X']
        predicted_y = intercept + beta * x_i
        resid_i = y_i - predicted_y
        z_score = resid_i / std_err_resid

        if position is None and (z_score > 2.5 or z_score < -2.5):
            adf_p = adfuller(residuals)[1]
            if adf_p < 0.05:
                position = 'long' if z_score < 0 else 'short'
                entry_info = {
                    'Entry Date': current_day,
                    'Stock Y': stock_y,
                    'Stock X': stock_x,
                    'ADF_PValue_Entry': adf_p,
                    'ZScore Entry': z_score,
                    'Beta': beta,
                    'Intercept': intercept,
                    'StdErr_Residual': std_err_resid,
                    'Position': position
                }

        elif position is not None:
            if (position == 'long' and (z_score > -1 or z_score < -3)) or (position == 'short' and (z_score < 1 or z_score > 3)):
                adf_p_exit = adfuller(residuals)[1]
                entry_info.update({
                    'Exit Date': current_day,
                    'ADF_PValue_Exit': adf_p_exit,
                    'ZScore Exit': z_score
                })
                logs.append(entry_info)
                position = None
                entry_info = {}

    trades_df = pd.DataFrame(logs)
    try:
        existing = pd.read_csv(csv_path)
        combined = pd.concat([existing, trades_df]).drop_duplicates(subset=['Entry Date', 'Stock Y', 'Stock X'])
    except FileNotFoundError:
        combined = trades_df
    #combined.to_csv(csv_path, index=False)
    #print(f"Updated {csv_path} with {len(trades_df)} new entries.")
    return combined


def calculate_trade_pnls(trades_df, price_df, lot_sizes):         #, output_csv_path to store it in csv
    results = []
    cumulative_pnl = 0
    # trades_df['Entry Date'] = pd.to_datetime(trades_df['Entry Date']).dt.normalize()
    # trades_df['Exit Date'] = pd.to_datetime(trades_df['Exit Date']).dt.normalize()


    for _, trade in trades_df.iterrows():
        entry_date = pd.to_datetime(trade['Entry Date'])
        exit_date = pd.to_datetime(trade['Exit Date'])
        stock_x = trade['Stock X']
        stock_y = trade['Stock Y']
        position = trade['Position']

        # Strip (X)/(Y) from names for lot size lookup and price access
        name_x = stock_x.replace('(X)', '').strip()
        name_y = stock_y.replace('(Y)', '').strip()

        try:
            entry_px_x = price_df.loc[entry_date, stock_x]
            entry_px_y = price_df.loc[entry_date, stock_y]
            exit_px_x = price_df.loc[exit_date, stock_x]
            exit_px_y = price_df.loc[exit_date, stock_y]
        except KeyError:
            print(f"Missing price data for trade on {entry_date} or {exit_date} - skipping.")
            continue

        lot_x = lot_sizes.get(name_x, 1)
        lot_y = lot_sizes.get(name_y, 1)

        if position == 'long':
            pnl_y = (exit_px_y - entry_px_y) * lot_y
            pnl_x = (entry_px_x - exit_px_x) * lot_x
        else:  # short
            pnl_y = (entry_px_y - exit_px_y) * lot_y
            pnl_x = (exit_px_x - entry_px_x) * lot_x

        trade_pnl = pnl_y + pnl_x
        cumulative_pnl += trade_pnl
        if lot_y > (lot_x*(trade["Beta"])):
           lot_y = lot_y // (lot_x*(trade["Beta"]))

        result = trade.to_dict()
        result.update({
            'Entry Open Price X': entry_px_x,
            'Exit Open Price X': exit_px_x,
            'Entry Open Price Y': entry_px_y,
            'Exit Open Price Y': exit_px_y,
            'Lot Size X': lot_x,
            'Lot Size Y': lot_y,
            'Trade PnL': trade_pnl,
            'Cumulative PnL': cumulative_pnl
        })
        results.append(result)

    final_df = pd.DataFrame(results)
    #final_df.to_csv(output_csv_path, index=False)
    #print(f"Saved detailed trade PnL report to {output_csv_path}")
    return final_df



In [None]:
new_data = pd.DataFrame(columns = ['Entry Date', 'Stock Y', 'Stock X', 'ADF_PValue_Entry', 'ZScore Entry',
       'Beta', 'Intercept', 'StdErr_Residual', 'Position', 'Exit Date',
       'ADF_PValue_Exit', 'ZScore Exit', 'Entry Open Price X',
       'Exit Open Price X', 'Entry Open Price Y', 'Exit Open Price Y',
       'Lot Size X', 'Lot Size Y', 'Trade PnL', 'Cumulative PnL'])
for i in range(20):
  stock_y = correlated_df.iloc[i, 0]
  stock_x = correlated_df.iloc[i, 1]
  df = download_data([stock_y, stock_x], "2023-07-01", "2025-07-01")
  trades_df = generate_initial_pair_trades(df)
  final_trades = calculate_trade_pnls(trades_df, df, lot_sizes)
  new_data = pd.concat([new_data, final_trades], ignore_index=True)

new_data


  new_data = pd.concat([new_data, final_trades], ignore_index=True)


Unnamed: 0,Entry Date,Stock Y,Stock X,ADF_PValue_Entry,ZScore Entry,Beta,Intercept,StdErr_Residual,Position,Exit Date,ADF_PValue_Exit,ZScore Exit,Entry Open Price X,Exit Open Price X,Entry Open Price Y,Exit Open Price Y,Lot Size X,Lot Size Y,Trade PnL,Cumulative PnL
0,2024-11-21,ADANIENT.NS,ACC.NS,0.00161,-3.751839,0.985089,646.40647,122.534054,long,2024-11-22,0.006852,-3.802246,2027.199951,2089.600098,2183.649902,2228.0,500,1.0,-9025.024414,-9025.024414
1,2024-11-25,ADANIENT.NS,ACC.NS,0.010746,-3.910656,0.989592,633.762189,127.584207,long,2024-11-26,0.019321,-4.415577,2145.0,2116.199951,2257.5,2150.5,500,1.0,-39099.975586,-48125.0
2,2024-11-28,ADANIENT.NS,ACC.NS,0.009458,-2.637406,0.988006,634.496413,136.382727,long,2024-12-30,0.033682,-0.080564,2188.550049,2059.899902,2437.100098,2592.350098,500,1.0,141950.073242,93825.073242
3,2024-07-25,GUJGASLTD.NS,ACC.NS,0.018294,2.525557,0.214481,22.679642,29.160511,short,2024-07-30,0.035196,3.261063,2578.899902,2590.75,649.450012,677.25,500,1.0,5897.24884,5897.24884
4,2024-08-02,GUJGASLTD.NS,ACC.NS,0.040594,3.555073,0.228857,-9.599907,30.706332,short,2024-08-05,0.088206,3.42164,2435.300049,2380.600098,656.900024,642.0,500,1.0,-27335.075562,-21437.826721
5,2024-08-12,IOC.NS,ACC.NS,0.011759,2.552436,0.113962,-124.636354,11.805622,short,2024-09-11,0.043181,0.943096,2313.600098,2440.649902,169.160004,169.740005,500,171.0,57869.884491,57869.884491
6,2024-07-24,LICI.NS,ACC.NS,0.003955,3.482354,0.546294,-409.085162,39.829234,short,2024-07-25,0.026031,3.934138,2619.350098,2578.899902,1160.550049,1160.900024,500,1.0,-20225.447632,-20225.447632
7,2024-07-26,LICI.NS,ACC.NS,0.049842,3.896219,0.552837,-423.255322,41.682994,short,2024-07-29,0.094661,3.638579,2614.149902,2604.300049,1184.349976,1173.599976,500,1.0,-4914.176758,-25139.62439
8,2024-08-02,LICI.NS,ACC.NS,0.004288,5.249231,0.569773,-459.633284,47.933711,short,2024-08-05,0.768444,4.18529,2435.300049,2380.600098,1179.550049,1107.650024,500,1.0,-27278.075562,-52417.699951
9,2025-05-28,SAIL.NS,ACC.NS,0.011606,2.569999,0.05407,5.212245,7.753954,short,2025-06-04,0.05201,3.105213,1920.099976,1877.0,128.960007,131.830002,500,175.0,-35182.4646,-35182.4646


In [None]:
final_trades.columns

Index(['Entry Date', 'Stock Y', 'Stock X', 'ADF_PValue_Entry', 'ZScore Entry',
       'Beta', 'Intercept', 'StdErr_Residual', 'Position', 'Exit Date',
       'ADF_PValue_Exit', 'ZScore Exit', 'Entry Open Price X',
       'Exit Open Price X', 'Entry Open Price Y', 'Exit Open Price Y',
       'Lot Size X', 'Lot Size Y', 'Trade PnL', 'Cumulative PnL'],
      dtype='object')

In [None]:
# Create initial DataFrame
df1 = pd.DataFrame(columns = ['col1', 'col2'])

# Create a new DataFrame or Series to append
new_data = pd.DataFrame({'col1': [5], 'col2': [6]})
pd.concat([df1, new_data], ignore_index=True)

Unnamed: 0,col1,col2
0,5,6


In [None]:
stocks = ["TATAMOTORS.NS", "BAJAJ-AUTO.NS"]
df = download_data(stocks, "2023-07-01", "2025-07-01")

In [None]:
stocks = ["NTPC.NS", "TATASTEEL.NS"]
df = download_data(stocks, "2023-07-01", "2025-07-01")

In [None]:
# df_1, x, y, corr, sgn = get_viable_pair(df, "2023-07-01")
x, y = get_viable_pair(df, "2023-07-01")

In [None]:
x, y

('NTPC.NS', 'TATASTEEL.NS')

In [None]:
df_1

Unnamed: 0_level_0,BAJAJ-AUTO.NS(X),TATAMOTORS.NS(Y)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-07-03,4610.649902,590.799988
2023-07-04,4625.399902,591.450012
2023-07-05,4890.049805,588.250000
2023-07-06,4917.700195,600.799988
2023-07-07,4834.000000,618.200012
...,...,...
2025-06-24,8377.000000,673.650024
2025-06-25,8389.500000,674.500000
2025-06-26,8433.000000,682.750000
2025-06-27,8436.000000,686.900024


In [None]:
# trades_df = generate_initial_pair_trades(df_1, f"pair_trades_{stocks[0]}_{stocks[1]}.csv")
trades_df = generate_initial_pair_trades(df_1)

In [None]:
update_pair_trades_csv(x, y, "2025-07-02", "2025-07-10", f"pair_trades_{stocks[0]}_{stocks[1]}.csv")

Updated pair_trades_TATAMOTORS.NS_BAJAJ-AUTO.NS.csv with 0 new entries.


In [None]:
lot_sizes = {
    'HDFCBANK.NS': 550,
    'ICICIBANK.NS': 1375,
    'TATAMOTORS.NS': 2850,
    'BAJAJ-AUTO.NS': 75
}

In [None]:
# calculate_trade_pnls(trades_df, df_1, lot_sizes, "pair_trades_detailed.csv")
calculate_trade_pnls(trades_df, df_1, lot_sizes)

Unnamed: 0,Entry Date,Stock Y,Stock X,ADF_PValue_Entry,ZScore Entry,Beta,Intercept,StdErr_Residual,Position,Exit Date,ADF_PValue_Exit,ZScore Exit,Entry Open Price X,Exit Open Price X,Entry Open Price Y,Exit Open Price Y,Lot Size X,Lot Size Y,Trade PnL,Cumulative PnL
0,2024-07-25,TATAMOTORS.NS(Y),BAJAJ-AUTO.NS(X),0.016967,2.87559,0.084897,208.691958,32.884406,short,2024-07-26,0.052618,3.075202,9278.25,9492.900391,1090.949951,1118.300049,125,2850,-51116.479492,-51116.479492
1,2024-09-09,TATAMOTORS.NS(Y),BAJAJ-AUTO.NS(X),0.04075,-2.515525,0.086803,195.412916,39.082664,long,2024-09-11,0.045825,-5.152296,10847.599609,11420.75,1038.699951,976.299988,125,2850,-249483.694458,-300600.17395
