
<h1>Research phase<h1>

In [None]:
# importing libs
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime
from statsmodels.tsa.stattools import coint
import plotly.graph_objects as go

In [None]:
# List of asset candidates
assets = ["SPY", "QQQ", "XLK", "VGT", "IVV"]

In [None]:
# Download historical price data
start_date = "2020-01-01"
# Creating today date variable
end_date = str(datetime.today().date())

In [None]:
# Downlaod our data
data = yf.download(tickers=assets, start=start_date, end=end_date, interval="1d")['Close'].dropna()
# Reset the index
data.reset_index(inplace=True)
# Extracting date form date time  
data["Date"] = data["Date"].apply(lambda x : x.date())
# setting Date column as our index and dropping the old one
data  = data.reset_index(drop=True).set_index("Date")
# show up the resualt
data.head()

In [None]:
# Studying correlation matrix
corr_matrix = data.corr()
corr_matrix

In [None]:
# Looking for the strongest correlation between assets
strong_corr = corr_matrix[(corr_matrix > 0.8) & (corr_matrix < 1)]
strong_corr

In [None]:
# plotting Price over time to make a comparison
plt.figure(figsize=(14, 6))
for i in data.columns:
    plt.plot(data[i], label=str(i))
plt.title('Price Comparison')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


In [None]:
# Initialize an empty DataFrame to store spreads
spread_df = pd.DataFrame(index=data.index)

# Loop through each pair of assets and calculate the spread
for i in range(len(assets)):
    for j in range(i+1, len(assets)): 
        asset1 = assets[i]
        asset2 = assets[j]
        
        # Calculate the spread (difference between the two assets)
        spread_label = f"{asset1}_{asset2}"
        spread_df[spread_label] = data[asset1] - data[asset2]

# Display the first few rows of the calculated spreads
spread_df

In [None]:
# Plot the spreads
plt.figure(figsize=(12, 6))
for column in spread_df.columns:
    plt.plot(spread_df[column], label=column)

plt.title('Spread Between Multiple Assets')
plt.xlabel('Date')
plt.ylabel('Spread Value')
plt.legend()
plt.show()

In [None]:
# Initialize an empty dictionary, so we can store each assets with p-value
pvalue = dict()
# Looping throw corrlated assets and caluclationg the p-value for each
for i in range(len(assets)):
    for j in range(i+1, len(assets)):
        score, p_value, _ = coint(data[assets[i]], data[assets[j]])
        print(f"Cointegration Test p-value for {assets[i]} and {assets[j]}: {p_value}")
        pvalue.update({f"{assets[i]} and {assets[j]}": p_value})


In [None]:
# Looping throw the p-values dict, so we can find out the co-integrated assets, with p-value < 0.05 
for i in pvalue.values():
    if i < 0.05:
        key = next((k for k, v in pvalue.items() if v == i), None)
        print(f"Spread trading opportunity: {key} thier p-value: {i} < 0.05")

<h1>Application & Backtesting phase<h1>

In [None]:
# Creating new dataframe with the spread of correlated assets
spread_data = spread_df['SPY_IVV'].to_frame("Spread")
# Plot the speard and the mean 
plt.figure(figsize=(12, 6))
plt.plot(spread_data['Spread'], label='Spread', color='blue')
plt.axhline(spread_data['Spread'].mean(), color='red', linestyle='--', label='Mean Spread')
plt.title('Spread Between SPY and IW')
plt.xlabel('Date')
plt.ylabel('Spread Value')
plt.legend()
plt.show()

In [None]:
# Calculate mean and standard deviation
mean_spread = spread_df['SPY_IVV'].mean()
std_spread = spread_df['SPY_IVV'].std()

# Calculate upper and lower bounds
spread_data['Upper Bound'] = mean_spread + std_spread
spread_data['Lower Bound'] = mean_spread - std_spread


In [None]:
# Create our signals
spread_data['Signal'] = 0
# Short opportunity
spread_data.loc[spread_data['Spread'] > spread_data['Upper Bound'], 'Signal'] = -1
# Long portunity 
spread_data.loc[spread_data['Spread'] < spread_data['Lower Bound'], 'Signal'] = 1  

# Visualize signals on the spread
plt.figure(figsize=(12, 6))
plt.plot(spread_data['Spread'], label='Spread', color='blue')
plt.plot(spread_data['Upper Bound'], label='Upper Bound', color='green', linestyle='--')
plt.plot(spread_data['Lower Bound'], label='Lower Bound', color='orange', linestyle='--')
plt.scatter(spread_data['Spread'][spread_data['Signal']== 1].index, spread_data['Spread'][spread_data['Signal'] == 1], color='green', label='Buy Signal', marker='^')
plt.scatter(spread_data['Spread'][spread_data['Signal']== -1].index, spread_data['Spread'][spread_data['Signal'] == -1], color='red', label='Sell Signal', marker='v')
plt.title('Spread Trading Signals')
plt.xlabel('Date')
plt.ylabel('Spread Value')
plt.legend()
plt.show()


In [None]:
# Backtesting
spread_data['PnL'] = spread_data['Signal'].shift(1) * (spread_data['Spread'].diff())
spread_data['Cumulative PnL'] = spread_data['PnL'].cumsum()

# Plot cumulative profit and loss
plt.figure(figsize=(12, 6))
plt.plot(spread_data['Cumulative PnL'], label='Cumulative PnL', color='purple')
plt.title('Cumulative Profit and Loss')
plt.xlabel('Date')
plt.ylabel('PnL')
plt.legend()
plt.show()

In [None]:
# Calculate sharpe ratio 
sharpe_ratio = spread_data['PnL'].mean() / spread_data['PnL'].std() * (252**0.5)
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

In [None]:
# Create new portfolio df for backtesting 
portfolio = pd.DataFrame(index=data.index)
# Calculate the opposite positions for each asset
# If SPY long IVV short and vice versa
portfolio['SPY_Position'] = spread_data['Signal'] 
portfolio['IVV_Position'] = spread_data['Signal'] * -1

# Calculate the returns for each asset 
portfolio['SPY_Return'] = data['SPY'].pct_change() * portfolio['SPY_Position'].shift(1)
portfolio['IVV_Return'] = data['IVV'].pct_change() * portfolio['IVV_Position'].shift(1)
# Total Reurn 
portfolio['Total_Return'] = portfolio['SPY_Return'] + portfolio['IVV_Return']
# Cumulative return 
portfolio['Cumulative_Return'] = portfolio['Total_Return'].cumsum()
portfolio.dropna(inplace=True)

In [None]:
# Plot the resualt of our backtest
plt.figure(figsize=(12, 6))
plt.plot(data['SPY'].pct_change() * portfolio['SPY_Position'].shift(1), label="SPY Return", color='Red')
plt.plot(data['IVV'].pct_change() * portfolio['IVV_Position'].shift(1), label="IVV Return", color='Blue')
plt.title("Pairs Trading Backtest - SPY & IVV")
plt.xlabel("Date")
plt.ylabel("Cumulative Return")
plt.legend()
plt.show()

In [None]:
# Plot the resualt of our backtest
plt.figure(figsize=(12, 6))
plt.plot(portfolio['Cumulative_Return'], label="Cumulative Portfolio Return", color='Green')
plt.title("Pairs Trading Backtest - SPY & IVV")
plt.xlabel("Date")
plt.ylabel("Cumulative Return")
plt.legend()
plt.show()

In [None]:
# Plot the resualt of our cumulative return for each asset 
plt.figure(figsize=(12, 6))
plt.plot(data['SPY'].pct_change() * portfolio['SPY_Position'].shift(1).cumsum(), label="SPY cumulative return", color='Red')
plt.plot(data['IVV'].pct_change() * portfolio['IVV_Position'].shift(1).cumsum(), label="IVV cumulative return", color='Blue')
plt.title("Pairs Trading Backtest - SPY & IVV")
plt.xlabel("Date")
plt.ylabel("Cumulative Return")
plt.legend()
plt.show()

In [None]:
# Plot the resualt of our return for each asset 
plt.figure(figsize=(12, 6))
plt.plot(data['SPY'].pct_change() * portfolio['SPY_Position'].shift(1), label="SPY Return", color='Red')
plt.plot(data['IVV'].pct_change() * portfolio['IVV_Position'].shift(1), label="IVV Return", color='Blue')
plt.title("Pairs Trading Backtest - SPY & IVV")
plt.xlabel("Date")
plt.ylabel("Cumulative Return")
plt.legend()
plt.show()

In [None]:
# Preparing data for plotting
x = portfolio.index
SPY_CumReturn = data['SPY'].pct_change() * portfolio['SPY_Position'].shift(1).cumsum()
IVV_CumReturn = data['IVV'].pct_change() * portfolio['IVV_Position'].shift(1).cumsum()
SPY_Return = data['SPY'].pct_change() * portfolio['SPY_Position'].shift(1)
IVV_Return = data['IVV'].pct_change() * portfolio['IVV_Position'].shift(1)

# Create a figure
fig = go.Figure()

# Create traces
fig.add_traces([
    go.Scatter(x=x, y=portfolio['Cumulative_Return'], mode='lines', name='Total cumulative return', line=dict(color='Green')),
    go.Scatter(x=x, y=SPY_CumReturn, mode='lines', name='SPY cumulative return', line=dict(color='Blue')),
    go.Scatter(x=x, y=IVV_CumReturn, mode='lines', name='IVV cumulative return', line=dict(color='Red')),
    go.Scatter(x=x, y=SPY_Return, mode='lines+markers', name='SPY return', line=dict(color='Blue')),
    go.Scatter(x=x, y=IVV_Return, mode='lines+markers', name='IVV return', line=dict(color='Red'))
])

# Update layout
fig.update_layout(
    title="Pairs Trading Backtest - Interactive Plot",
    xaxis_title="Date",
    yaxis_title="Cumulative Return",
    hovermode="x unified",
    template="plotly_dark",
    width=1000,
    height=600,
)

# Show the plot
fig.show()
