# TSA Chapter 7: Quiz 2 - Spurious vs Genuine Regression

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch7/TSA_ch7_quiz2_spurious_regression/TSA_ch7_quiz2_spurious_regression.ipynb)

Distinguishing spurious from genuine regression relationships using scatter plots.

In [None]:
!pip install numpy matplotlib scipy -q

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
COLORS = {'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32', 'orange': '#E67E22', 'gray': '#666666', 'purple': '#8E44AD'}
BLUE, RED, GREEN, ORANGE, GRAY, PURPLE = COLORS['blue'], COLORS['red'], COLORS['green'], COLORS['orange'], COLORS['gray'], COLORS['purple']
plt.rcParams.update({
    'figure.facecolor': 'none', 'axes.facecolor': 'none', 'savefig.facecolor': 'none',
    'savefig.transparent': True, 'axes.spines.top': False, 'axes.spines.right': False,
    'axes.grid': False, 'font.size': 10, 'axes.titlesize': 12, 'axes.labelsize': 10,
    'xtick.labelsize': 9, 'ytick.labelsize': 9, 'legend.fontsize': 9, 'figure.dpi': 150,
    'lines.linewidth': 1.2, 'axes.linewidth': 0.6, 'legend.facecolor': 'none',
    'legend.framealpha': 0, 'legend.edgecolor': 'none',
})
def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    print(f'Saved: {name}')

In [None]:
np.random.seed(123)
T = 200

# --- Genuine relationship: cointegrated pair ---
common_trend = np.cumsum(np.random.normal(0, 1, T))
x_genuine = common_trend + np.random.normal(0, 0.5, T)
y_genuine = 1.5 * common_trend + 3 + np.random.normal(0, 0.8, T)

# --- Spurious relationship: independent random walks ---
x_spurious = np.cumsum(np.random.normal(0.05, 1, T))
y_spurious = np.cumsum(np.random.normal(0.03, 1, T))

# --- Regression fits ---
slope_g, intercept_g, r_g, p_g, _ = stats.linregress(x_genuine, y_genuine)
slope_s, intercept_s, r_s, p_s, _ = stats.linregress(x_spurious, y_spurious)

# --- Plot ---
fig, axes = plt.subplots(1, 2, figsize=(10, 4.5))

# Genuine
axes[0].scatter(x_genuine, y_genuine, alpha=0.4, s=15, color=BLUE, edgecolors='none')
x_fit = np.linspace(x_genuine.min(), x_genuine.max(), 100)
axes[0].plot(x_fit, slope_g * x_fit + intercept_g, color=RED, lw=1.5)
axes[0].set_title('Regression A', fontweight='bold')
axes[0].set_xlabel('$X_t$'); axes[0].set_ylabel('$Y_t$')
axes[0].text(0.05, 0.95, f'$R^2 = {r_g**2:.3f}$\n$p < {max(p_g, 1e-10):.1e}$',
             transform=axes[0].transAxes, va='top', fontsize=9,
             bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor=GRAY, alpha=0.7))

# Spurious
axes[1].scatter(x_spurious, y_spurious, alpha=0.4, s=15, color=ORANGE, edgecolors='none')
x_fit2 = np.linspace(x_spurious.min(), x_spurious.max(), 100)
axes[1].plot(x_fit2, slope_s * x_fit2 + intercept_s, color=RED, lw=1.5)
axes[1].set_title('Regression B', fontweight='bold')
axes[1].set_xlabel('$X_t$'); axes[1].set_ylabel('$Y_t$')
axes[1].text(0.05, 0.95, f'$R^2 = {r_s**2:.3f}$\n$p < {max(p_s, 1e-10):.1e}$',
             transform=axes[1].transAxes, va='top', fontsize=9,
             bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor=GRAY, alpha=0.7))

plt.tight_layout()
save_chart(fig, 'ch7_quiz2_spurious_regression')
plt.show()

print('\nQuiz: Which regression is spurious?')
print('  (a) Regression A  (b) Regression B  (c) Both  (d) Neither')
print('\nHint: High R-squared between independent random walks is a hallmark of spurious regression.')