# TSA Chapter 7: Pairs Trading

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch7/TSA_ch7_pairs_trading/TSA_ch7_pairs_trading.ipynb)

Coca-Cola (KO) and PepsiCo (PEP) as a cointegrated pair for pairs trading using Yahoo Finance data.

In [None]:
!pip install numpy pandas matplotlib statsmodels pandas_datareader yfinance -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller, coint
from statsmodels.regression.linear_model import OLS
import statsmodels.api as sm
import yfinance as yf
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
COLORS = {'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32', 'orange': '#E67E22', 'gray': '#666666', 'purple': '#8E44AD'}
BLUE, RED, GREEN, ORANGE, GRAY, PURPLE = COLORS['blue'], COLORS['red'], COLORS['green'], COLORS['orange'], COLORS['gray'], COLORS['purple']
plt.rcParams.update({
    'figure.facecolor': 'none', 'axes.facecolor': 'none', 'savefig.facecolor': 'none',
    'savefig.transparent': True, 'axes.spines.top': False, 'axes.spines.right': False,
    'axes.grid': False, 'font.size': 10, 'axes.titlesize': 12, 'axes.labelsize': 10,
    'xtick.labelsize': 9, 'ytick.labelsize': 9, 'legend.fontsize': 9, 'figure.dpi': 150,
    'lines.linewidth': 1.2, 'axes.linewidth': 0.6, 'legend.facecolor': 'none',
    'legend.framealpha': 0, 'legend.edgecolor': 'none',
})
def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    print(f'Saved: {name}')

In [None]:
# Fetch real data from Yahoo Finance
ko = yf.download('KO', start='2019-01-01', end='2024-01-01')['Close']
pep = yf.download('PEP', start='2019-01-01', end='2024-01-01')['Close']

# Flatten multi-index if needed and align
if hasattr(ko, 'columns'):
    ko = ko.iloc[:, 0]
if hasattr(pep, 'columns'):
    pep = pep.iloc[:, 0]
prices = pd.DataFrame({'KO': ko, 'PEP': pep}).dropna()
print(f'Period: {prices.index[0].strftime("%Y-%m-%d")} to {prices.index[-1].strftime("%Y-%m-%d")}, n={len(prices)}')

# Compute log spread via OLS hedge ratio
log_ko = np.log(prices['KO'])
log_pep = np.log(prices['PEP'])
X = sm.add_constant(log_pep)
hedge = OLS(log_ko, X).fit()
spread = hedge.resid
mu, sigma = spread.mean(), spread.std()
print(f'Hedge ratio (beta): {hedge.params.iloc[1]:.4f}')

# Plot
fig, axes = plt.subplots(2, 1, figsize=(12, 4))

# Panel 1: Prices
ax1 = axes[0]
ax1.plot(prices.index, prices['KO'], color=BLUE, lw=1, label='Coca-Cola (KO)')
ax2 = ax1.twinx()
ax2.plot(prices.index, prices['PEP'], color=RED, lw=1, label='PepsiCo (PEP)')
ax1.set_ylabel('KO ($)', color=BLUE)
ax2.set_ylabel('PEP ($)', color=RED)
ax2.spines['top'].set_visible(False)
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2, frameon=False)
ax1.set_title('KO vs PEP Daily Prices (Yahoo Finance)', fontweight='bold')

# Panel 2: Log spread with bands
axes[1].plot(prices.index, spread, color=GREEN, lw=0.8, label='Log Spread')
axes[1].axhline(mu, color='black', ls='-', lw=0.8, label=f'Mean = {mu:.4f}')
axes[1].axhline(mu + 2*sigma, color=RED, ls='--', lw=0.8, label=f'+/- 2$\\sigma$')
axes[1].axhline(mu - 2*sigma, color=RED, ls='--', lw=0.8)
axes[1].fill_between(prices.index, mu - 2*sigma, mu + 2*sigma, color=GREEN, alpha=0.08)
axes[1].set_title('Log Spread with Trading Bands', fontweight='bold')
axes[1].set_ylabel('Spread')
axes[1].legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3, frameon=False)

plt.tight_layout()
save_chart(fig, 'pairs_trading')
plt.show()

# Cointegration test
t_stat, p_val, _ = coint(log_ko, log_pep)
print(f'\nEngle-Granger cointegration test: t-stat={t_stat:.3f}, p-value={p_val:.4f}')