In [None]:
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import numpy as np

def fetch_close(tickers, start="2010-01-01", end=None) -> pd.DataFrame:
    """
    Fetch auto-adjusted close from Yahoo Finance.
    Returns a DataFrame with columns = tickers.
    """
    data = yf.download(
        tickers=list(tickers),
        start=start,
        end=end,
        auto_adjust=True,
        progress=False,
        group_by="column"
    )

    # With multiple tickers, "Close" is a DataFrame with tickers as columns
    if isinstance(data.columns, pd.MultiIndex):
        close = data["Close"].copy()
    else:
        # Single ticker case
        close = data[["Close"]].copy()
        close.columns = list(tickers)

    return close.dropna(how="all")


def build_20d_return_spread(
    stock_ticker="SPY",
    bond_ticker="TLT",
    start="2018-01-01",
    end=None,
    lookback=20
) -> pd.DataFrame:
    """
    20-day return spread between stock and bond:
        spread_20d = ret_20d(stock) - ret_20d(bond)
    """
    close = fetch_close([stock_ticker, bond_ticker], start=start, end=end)

    stock = close[stock_ticker].dropna()
    bond = close[bond_ticker].dropna()

    # Align indices (intersection)
    idx = stock.index.intersection(bond.index)
    stock = stock.reindex(idx)
    bond = bond.reindex(idx)

    stock_ret_20d = stock.pct_change(lookback)
    bond_ret_20d = bond.pct_change(lookback)

    spread_20d = stock_ret_20d - bond_ret_20d

    out = pd.DataFrame({
        f"{stock_ticker}_ret_{lookback}d": stock_ret_20d,
        f"{bond_ticker}_ret_{lookback}d": bond_ret_20d,
        f"spread_{lookback}d": spread_20d
    }).dropna()

    return out

In [None]:
df = build_20d_return_spread(
    stock_ticker="SPY",
    bond_ticker="TLT",
    start="2020-01-01",
    end=None,
    lookback=20
)

print(df.tail())

plt.figure(figsize=(10, 6))
plt.plot(df["spread_20d"])
plt.ylabel("return (stock 20D - bond 20D)")
plt.show()

In [None]:
df["spread_20d"].hist(bins=50)

In [None]:
def spread_20d_from_series(stock_close: pd.Series, bond_close: pd.Series, lookback: int = 20) -> pd.Series:
    """
    spread_20d(t) = stock_ret_20d(t) - bond_ret_20d(t)
    """
    idx = stock_close.index.intersection(bond_close.index)
    s = stock_close.reindex(idx)
    b = bond_close.reindex(idx)

    return (s.pct_change(lookback) - b.pct_change(lookback)).rename(f"spread_{lookback}d")


def backtest_spread_threshold_strategy(
    stock_ticker: str = "SPY",
    bond_ticker: str = "TLT",
    start: str = "2010-01-01",
    end: str | None = None,
    lookback: int = 20,
    upper: float = 0.02,
    exit_band: float = 0.00,
    fee_bps: float = 2.0
) -> pd.DataFrame:
    """
    Long/flat strategy:
      - enter long when spread_20d > upper
      - exit to cash when spread_20d < exit_band (hysteresis)
    Position decided at t is applied to t+1 return (shift(1)).
    """
    close = fetch_close([stock_ticker, bond_ticker], start=start, end=end)

    stock_close = close[stock_ticker].dropna()
    bond_close = close[bond_ticker].dropna()

    spread = spread_20d_from_series(stock_close, bond_close, lookback=lookback)
    stock_ret_1d = stock_close.pct_change().rename("stock_ret_1d")

    df = pd.concat(
        [stock_close.rename("stock_close"), spread, stock_ret_1d],
        axis=1
    ).dropna()

    # Build position with hysteresis
    pos = pd.Series(0.0, index=df.index, name="position")
    in_long = False
    for t in df.index:
        if (not in_long) and (df.loc[t, spread.name] > upper):
            in_long = True
        elif in_long and (df.loc[t, spread.name] < exit_band):
            in_long = False
        pos.loc[t] = 1.0 if in_long else 0.0

    # Trading cost on turnover
    turnover = pos.diff().abs().fillna(0.0)
    cost = turnover * (fee_bps / 10000.0)

    # Strategy return (no look-ahead)
    strat_ret = (pos.shift(1) * df["stock_ret_1d"] - cost).rename("strategy_ret_1d")

    equity = (1 + strat_ret).cumprod().rename("equity")
    bh_equity = (1 + df["stock_ret_1d"]).cumprod().rename("buy_hold_equity")

    out = pd.concat([df, pos, turnover.rename("turnover"), cost.rename("cost"),
                     strat_ret, equity, bh_equity], axis=1).dropna()

    return out


def perf_summary(daily_ret: pd.Series, trading_days: int = 252) -> dict:
    r = daily_ret.dropna()
    if r.empty:
        return {}

    eq = (1 + r).cumprod()
    peak = eq.cummax()
    dd = (eq / peak) - 1.0

    ann_ret = (eq.iloc[-1]) ** (trading_days / len(r)) - 1.0
    ann_vol = r.std(ddof=0) * np.sqrt(trading_days)
    sharpe = np.nan if ann_vol == 0 else ann_ret / ann_vol

    return {
        "ann_return": float(ann_ret),
        "ann_vol": float(ann_vol),
        "sharpe": float(sharpe),
        "max_drawdown": float(dd.min()),
        "hit_rate": float((r > 0).mean()),
        "n_days": int(len(r)),
    }


# --- Run example ---
res = backtest_spread_threshold_strategy(
    stock_ticker="SPY",
    bond_ticker="TLT",
    start="2024-01-01",
    lookback=20,
    upper=0.0,
    exit_band=-0.1,
    fee_bps=0.0
)

print("Strategy:", perf_summary(res["strategy_ret_1d"]))
print("Buy&Hold:", perf_summary(res["stock_ret_1d"]))

# Optional plot
res[["equity", "buy_hold_equity"]].plot(title="Equity vs Buy&Hold")
plt.show()

In [None]:
import matplotlib.pyplot as plt

# assuming `res` is the DataFrame returned by backtest_spread_threshold_strategy(...)
# it contains:
# - "stock_close"
# - "spread_20d" (or generally "spread_{lookback}d")

# detect the spread column name robustly
spread_col = next(c for c in res.columns if c.startswith("spread_") and c.endswith("d"))

fig, ax1 = plt.subplots(figsize=(12, 6))

# SPY price (left axis)
ax1.plot(res.index, res["stock_close"], linewidth=1.5, color='gray')
ax1.set_ylabel("SPY (auto-adjusted close)")
ax1.set_title(f"SPY Price vs {spread_col}")

# 20D return spread (right axis)
ax2 = ax1.twinx()
ax2.plot(res.index, res[spread_col], linewidth=1.2)
ax2.set_ylabel(f"{spread_col} (stock 20D - bond 20D)")

plt.show()


In [None]:
# pip install yfinance pandas numpy matplotlib

import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt


def fetch_close(ticker: str, start="2010-01-01", end=None) -> pd.Series:
    """
    Fetch auto-adjusted close. Always returns a 1D Series.
    """
    df = yf.download(ticker, start=start, end=end, auto_adjust=True, progress=False)
    close = df["Close"]

    # close can be Series or 1-col DataFrame depending on yfinance/pandas versions
    if isinstance(close, pd.DataFrame):
        close = close.iloc[:, 0]
    close = close.dropna()
    close.name = "close"
    return close


def rsi_wilder(close: pd.Series, n: int = 14) -> pd.Series:
    """
    Wilder RSI (standard):
      RSI = 100 - 100/(1+RS), RS = RMA(gain)/RMA(loss), RMA uses alpha=1/n.
    """
    delta = close.diff()
    gain = delta.clip(lower=0.0)
    loss = (-delta).clip(lower=0.0)

    avg_gain = gain.ewm(alpha=1/n, adjust=False, min_periods=n).mean()
    avg_loss = loss.ewm(alpha=1/n, adjust=False, min_periods=n).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi.rename(f"rsi_{n}")


def forward_returns(close: pd.Series, horizons=(5, 10, 20, 40, 60)) -> pd.DataFrame:
    out = {}
    for H in horizons:
        out[f"fwd_{H}d"] = close.shift(-H) / close - 1.0
    return pd.DataFrame(out)


def event_study(signal: pd.Series, fwd: pd.DataFrame) -> pd.DataFrame:
    idx = signal[signal].index
    rows = []
    for col in fwd.columns:
        r = fwd.loc[idx, col].dropna()
        rows.append({
            "horizon": col,
            "n": int(r.shape[0]),
            "mean": float(r.mean()) if len(r) else np.nan,
            "hit_rate": float((r > 0).mean()) if len(r) else np.nan,
            "p10": float(r.quantile(0.10)) if len(r) else np.nan,
            "p50": float(r.quantile(0.50)) if len(r) else np.nan,
            "p90": float(r.quantile(0.90)) if len(r) else np.nan,
        })
    return pd.DataFrame(rows)


def random_baseline(close: pd.Series, n_events: int, horizons=(5, 10, 20, 40, 60), seed=42) -> pd.DataFrame:
    rng = np.random.default_rng(seed)
    fwd = forward_returns(close, horizons=horizons).dropna()
    dates = fwd.index.to_numpy()

    n_events = min(n_events, len(dates))
    pick = rng.choice(dates, size=n_events, replace=False)

    sig = pd.Series(False, index=fwd.index)
    sig.loc[pick] = True
    return event_study(sig, fwd)


# ---------------------------
# Run: RSI(1) < 22 test
# ---------------------------

ticker = "^GSPC"
start = "2010-01-01"
end = None

close = fetch_close(ticker, start=start, end=end)

rsi1 = rsi_wilder(close, n=1)
thr = 22
sig = (rsi1 < thr).rename("rsi1_lt_22")

horizons = (5, 10, 20, 40, 60)
fwd = forward_returns(close, horizons=horizons)

summary = event_study(sig, fwd)
baseline = random_baseline(close, n_events=int(sig.sum()), horizons=horizons, seed=42)

print("=== RSI(1) < 22: Conditional forward returns ===")
print(summary.to_string(index=False))
print("\n=== Random dates (same count): Baseline ===")
print(baseline.to_string(index=False))

# ---------------------------
# Plot: SPY price + red dots at signal dates
# ---------------------------

df_plot = pd.concat([close, rsi1, sig], axis=1).dropna()

fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(df_plot.index, df_plot["close"], linewidth=1.5)

sig_idx = df_plot.index[df_plot["rsi1_lt_22"]]
ax.scatter(sig_idx, df_plot.loc[sig_idx, "close"], s=18)  # red dots default color

ax.set_title(f"{ticker} Price with RSI(1) < {thr} signals")
ax.set_ylabel("Price (auto-adjusted close)")
plt.show()

# Optional: RSI(1) plot
fig, ax = plt.subplots(figsize=(12, 3))
ax.plot(df_plot.index, df_plot["rsi_1"], linewidth=1.0)
ax.axhline(thr, linestyle="--")
ax.set_title(f"RSI(1) with threshold {thr}")
ax.set_ylabel("RSI(1)")
plt.show()