In [None]:
# import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts

In [1]:
# Define the pairs (ensure these pairs are generally known to be highly correlated and cointegrated)
pairs = {
    # 'NASDAQ': ['AAPL', 'GOOGL', 'AMZN', 'TSLA', 'MSFT', 'NFLX'],
    # 'LSE': ['BP', 'SHEL', 'TOT', 'ENI', 'RDSA', 'EQNR'],
    # 'HKEX': ['BABA', 'TCEHY', 'JD', 'PDD', 'NTES', 'BIDU'],
    # 'BSE': ['RELIANCE.BO', 'TCS.BO', 'INFY.BO', 'HDFCBANK.BO', 'ICICIBANK.BO', 'SBIN.BO'],
    # 'TSE': ['SONY', 'TOYOF', 'SNE', 'NTDOY', 'TM', 'NMR'],
    # 'FWB': ['SIEGY', 'SAP', 'DDAIF', 'VOW3', 'BAS', 'ALV'],
    # 'Euronext': ['AIR.PA', 'OR.PA', 'MC.PA', 'BN.PA', 'CAP.PA', 'DG.PA'],
    # 'SIX': ['NVS', 'ROG', 'UBSG', 'CSGN', 'ZURN', 'SCMN'],
    # 'Bovespa': ['VALE', 'PBR', 'ITUB', 'BBD', 'ABEV', 'GGB'],
    # 'ASX': ['BHP', 'CBA', 'NAB', 'WBC', 'ANZ', 'WES'],
    # 'SGX': ['D05.SI', 'U11.SI', 'O39.SI', 'C52.SI', 'J36.SI', 'Z74.SI'],  # DBS, UOB, OCBC, Sembcorp, Jardine Matheson, Singtel
    # 'Commodities': ['GC=F', 'SI=F', 'CL=F', 'NG=F', 'HG=F', 'PA=F'],  # Gold, Silver, Crude Oil, Natural Gas, Copper, Palladium
    # 'Cryptos': ['BTC-USD', 'ETH-USD', 'BNB-USD', 'USDT-USD', 'ADA-USD', 'SOL-USD'],
    # 'ETFs': ['SPY', 'QQQ', 'IVV', 'VTI', 'VOO', 'DIA']
    'FX': ['EURUSD=X', 'USDCHF=X', 'GBPUSD=X', 'USDJPY=X','USDAUD=X', 'USDCAD=X']
}

# Fetch the data using yfinance
def fetch_data(tickers, start="2022-06-01", end="2024-06-01"):
    data = yf.download(tickers, start=start, end=end)
    return data['Close']

# Utility method to compute PnL diagnostics
def pnl_performance(pnl, label):
    cumpnl = pnl.cumsum(skipna=True)
    sharpe = pnl.mean() / np.std(pnl)
    sharpe = sharpe * np.sqrt(252)
    print("\nPERFORMANCE STATISTICS FOR " + label)
    print("Daily annualized sharpe: " + str(sharpe))
    print("Average annual returns: " + str(pnl.mean() * 252 * 2) + "%")
    print("Total returns: " + str(pnl.sum() * 100) + "%")
    highwatermark_df = cumpnl.cummax()
    drawdown_df = cumpnl - highwatermark_df
    maxdrawdown = drawdown_df.min()
    print("Max drawdown: " + str(maxdrawdown * 100) + "%")
    plt.plot(cumpnl.values, label=label)
    plt.legend()
    plt.title("Cumulative PNL chart")
    plt.show()

# Function to run pairs trading for a given pair
def pairs_trading(data, ticker1, ticker2):
    # Drop rows with missing values
    data = data[[ticker1, ticker2]].dropna()
    
    if data.empty or len(data) < 200:
        print(f"Not enough data for {ticker1} and {ticker2}")
        return

    # Calculate the cointegration test
    coint_result = ts.coint(data[ticker1], data[ticker2])[1]
    print(f"Cointegration test result for {ticker1} and {ticker2}: {coint_result}")

    # Ensure they are cointegrated
    if coint_result < 0.1:
        # Linear regression for spread
        result = sm.OLS(data[ticker1], sm.add_constant(data[ticker2])).fit()
        spread = (data[ticker1] - result.params[1] * data[ticker2]) - result.params[0]

        # Stochastic Oscillator (Mean Reversion)
        period = 20
        maxindividualweight = 0.01
        
        return_df = (data / data.shift(1)) - 1
        signal_df = -(return_df - return_df.rolling(period, min_periods=3).min()) / (return_df.rolling(period, min_periods=3).max() - return_df.rolling(period, min_periods=3).min())
        signal_df = signal_df.subtract(signal_df.mean(axis=1), axis='index')
        signal_df = signal_df.divide(signal_df.abs().sum(axis=1), axis='index')
        signal_df = signal_df.shift(1)
        for i in range(3):
            signal_df[signal_df > maxindividualweight] = maxindividualweight
            signal_df[signal_df < -maxindividualweight] = -maxindividualweight
            signal_df = signal_df.subtract(signal_df.mean(axis=1), axis='index')
            signal_df = signal_df.divide(signal_df.abs().sum(axis=1), axis='index')
        pnl_df = signal_df * return_df.shift(-1)
        pnl = pnl_df.sum(axis=1)
        pnl_performance(pnl, f"{ticker1}-{ticker2} MARKET NEUTRAL Stochastic Oscillator")

        # MA Crossover (Momentum)
        signal_df = -(return_df.rolling(3).mean() - return_df.rolling(period, min_periods=3).mean())
        signal_df = signal_df.subtract(signal_df.mean(axis=1), axis='index')
        signal_df = signal_df.divide(signal_df.abs().sum(axis=1), axis='index')
        signal_df = signal_df.shift(1)
        for i in range(3):
            signal_df[signal_df > maxindividualweight] = maxindividualweight
            signal_df[signal_df < -maxindividualweight] = -maxindividualweight
            signal_df = signal_df.subtract(signal_df.mean(axis=1), axis='index')
            signal_df = signal_df.divide(signal_df.abs().sum(axis=1), axis='index')
        pnl_df = signal_df * return_df.shift(-1)
        pnl = pnl_df.sum(axis=1)
        pnl_performance(pnl, f"{ticker1}-{ticker2} MARKET NEUTRAL MACD Crossover")
    else:
        print(f"{ticker1} and {ticker2} are not cointegrated")

# Fetch and process data for each pair
for exchange, tickers in pairs.items():
    for i in range(0, len(tickers)):
        for j in range(i+1, len(tickers)):
            ticker1 = tickers[i]
            ticker2 = tickers[j]
            print(f"Running pairs trading for {ticker1} and {ticker2} from {exchange} exchange...")
            data = fetch_data([ticker1, ticker2])
            pairs_trading(data, ticker1, ticker2)

Running pairs trading for EURUSD=X and USDCHF=X from FX exchange...


NameError: name 'yf' is not defined