# Lightweight Backtesting Debug Notebook (No RL)

This notebook provides a compact, debuggable version of the framework focused on:
- Deterministic rules
- Probabilistic strategies
- Simple event-driven backtesting

It intentionally **excludes RL** to stay lightweight and interactive.


## 1) Imports & Settings

In [None]:
from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import Any

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style='whitegrid')
pd.set_option('display.max_columns', 200)


## 2) Data Loading
Reads parquet files from `data/raw/{ticker}.parquet`.


In [None]:
RAW_DIR = Path('../data/raw') if Path('../data/raw').exists() else Path('data/raw')

def load_ticker_data(ticker: str) -> pd.DataFrame:
    path = RAW_DIR / f'{ticker}.parquet'
    if not path.exists():
        raise FileNotFoundError(f'Missing {path}. Run data download first.')
    df = pd.read_parquet(path)
    if 'date' not in df.columns:
        raise ValueError('Expected `date` column in parquet input.')
    return df.sort_values('date').reset_index(drop=True)

TICKERS = ['TTD', 'AAPL', 'MSFT', 'NVDA']


## 3) Feature Engineering

In [None]:
def compute_indicators(
    df: pd.DataFrame,
    return_window: int = 20,
    vol_window: int = 20,
    momentum_window: int = 10,
    atr_window: int = 14,
    liquidity_window: int = 20,
) -> pd.DataFrame:
    data = df.copy().sort_values('date').reset_index(drop=True)

    data['returns'] = data['close'].pct_change().fillna(0.0)
    data['rolling_mean'] = data['close'].rolling(return_window).mean()
    data['rolling_volatility'] = data['returns'].rolling(vol_window).std(ddof=0).fillna(0.0)

    rolling_std_price = data['close'].rolling(return_window).std(ddof=0).replace(0, np.nan)
    data['zscore'] = ((data['close'] - data['rolling_mean']) / rolling_std_price).replace([np.inf, -np.inf], np.nan).fillna(0.0)

    cum_max = data['close'].cummax().replace(0, np.nan)
    data['drawdown'] = ((data['close'] - cum_max) / cum_max).fillna(0.0)

    data['momentum'] = data['close'].pct_change(momentum_window).fillna(0.0)

    prev_close = data['close'].shift(1)
    tr1 = data['high'] - data['low']
    tr2 = (data['high'] - prev_close).abs()
    tr3 = (data['low'] - prev_close).abs()
    data['atr'] = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1).rolling(atr_window).mean().fillna(0.0)

    dollar_volume = data['close'] * data['volume']
    data['liquidity_proxy'] = (
        dollar_volume / dollar_volume.rolling(liquidity_window).mean().replace(0, np.nan)
    ).replace([np.inf, -np.inf], np.nan).fillna(0.0)

    data['rolling_mean'] = data['rolling_mean'].bfill().fillna(data['close'])
    return data


## 4) Strategy Interface + Deterministic/Probabilistic Strategies

In [None]:
@dataclass
class StrategyState:
    price: float
    returns: float
    zscore: float
    volatility: float
    momentum: float
    position: float
    cash: float


class Strategy:
    name: str = 'base'

    def decide(self, state: StrategyState) -> float:
        raise NotImplementedError

    @staticmethod
    def clamp(value: float, low: float=-1.0, high: float=1.0) -> float:
        return max(low, min(high, float(value)))


@dataclass
class BuyAndHoldStrategy(Strategy):
    name: str = 'buy_and_hold'
    def decide(self, state: StrategyState) -> float:
        return 1.0


@dataclass
class DeterministicThresholdStrategy(Strategy):
    buy_drop_pct: float = 0.02
    sell_rise_pct: float = 0.02
    name: str = 'deterministic_threshold'

    def decide(self, state: StrategyState) -> float:
        if state.returns <= -abs(self.buy_drop_pct):
            return 1.0
        if state.returns >= abs(self.sell_rise_pct):
            return 0.0
        return state.position


@dataclass
class VolatilityScaledThresholdStrategy(Strategy):
    k: float = 1.0
    name: str = 'volatility_scaled_threshold'

    def decide(self, state: StrategyState) -> float:
        threshold = max(1e-6, state.volatility * self.k)
        if state.returns <= -threshold:
            return 1.0
        if state.returns >= threshold:
            return 0.0
        return state.position


@dataclass
class ProbabilisticLinearStrategy(Strategy):
    threshold: float = 0.03
    seed: int = 42
    name: str = 'prob_linear'
    rng: np.random.Generator = field(init=False)

    def __post_init__(self) -> None:
        self.rng = np.random.default_rng(self.seed)

    def decide(self, state: StrategyState) -> float:
        drop_pct = max(0.0, -state.returns)
        p_buy = float(np.clip(drop_pct / max(self.threshold, 1e-9), 0.0, 1.0))
        return 1.0 if self.rng.random() < p_buy else 0.0


@dataclass
class ProbabilisticSigmoidStrategy(Strategy):
    alpha: float = -1.5
    seed: int = 42
    name: str = 'prob_sigmoid'
    rng: np.random.Generator = field(init=False)

    def __post_init__(self) -> None:
        self.rng = np.random.default_rng(self.seed)

    def decide(self, state: StrategyState) -> float:
        p_buy = 1.0 / (1.0 + np.exp(-(state.zscore * self.alpha)))
        return 1.0 if self.rng.random() < p_buy else 0.0


@dataclass
class ZScoreMeanReversionStrategy(Strategy):
    entry_z: float = 1.0
    exit_z: float = 0.2
    name: str = 'zscore_mean_reversion'

    def decide(self, state: StrategyState) -> float:
        if state.zscore <= -self.entry_z:
            return 1.0
        if state.zscore >= self.entry_z:
            return -1.0
        if abs(state.zscore) <= self.exit_z:
            return 0.0
        return state.position


@dataclass
class KellyMeanReversionStrategy(Strategy):
    lookback_mean: float = 0.001
    lookback_var: float = 0.0004
    max_leverage: float = 1.5
    name: str = 'kelly_mean_reversion'

    def decide(self, state: StrategyState) -> float:
        edge = -state.zscore * self.lookback_mean
        var = max(self.lookback_var, state.volatility**2, 1e-8)
        f_star = edge / var
        return self.clamp(f_star, -self.max_leverage, self.max_leverage)


## 5) Lightweight Backtester

In [None]:
@dataclass
class BacktestConfig:
    initial_cash: float = 100_000.0
    transaction_cost_bps: float = 5.0
    slippage_bps: float = 2.0
    max_leverage: float = 1.5


def run_backtest(df: pd.DataFrame, strategy: Strategy, ticker: str, config: BacktestConfig) -> dict[str, Any]:
    data = df.copy().sort_values('date').reset_index(drop=True)

    cash = config.initial_cash
    shares = 0.0
    prev_equity = config.initial_cash

    rows = []
    trades = []

    for i, row in data.iterrows():
        close = float(row['close'])
        date = pd.to_datetime(row['date'])

        portfolio_value = cash + shares * close
        current_frac = 0.0 if portfolio_value == 0 else (shares * close) / portfolio_value

        state = StrategyState(
            price=close,
            returns=float(row.get('returns', 0.0)),
            zscore=float(row.get('zscore', 0.0)),
            volatility=float(row.get('rolling_volatility', 0.0)),
            momentum=float(row.get('momentum', 0.0)),
            position=current_frac,
            cash=cash,
        )

        target_frac = float(strategy.decide(state))
        target_frac = max(-config.max_leverage, min(config.max_leverage, target_frac))

        target_notional = target_frac * portfolio_value
        current_notional = shares * close
        delta_notional = target_notional - current_notional

        if abs(delta_notional) > 1e-12:
            trade_shares = delta_notional / close
            slippage = abs(delta_notional) * (config.slippage_bps / 10_000)
            tx_cost = abs(delta_notional) * (config.transaction_cost_bps / 10_000)

            cash -= delta_notional + slippage + tx_cost
            shares += trade_shares

            trades.append({
                'date': date,
                'ticker': ticker,
                'strategy': strategy.name,
                'price': close,
                'trade_notional': delta_notional,
                'trade_shares': trade_shares,
                'tx_cost': tx_cost,
                'slippage': slippage,
            })

        equity = cash + shares * close
        daily_return = 0.0 if i == 0 else (equity / prev_equity) - 1.0
        prev_equity = equity

        rows.append({
            'date': date,
            'equity': equity,
            'daily_return': daily_return,
            'position': 0.0 if equity == 0 else (shares * close) / equity,
            'strategy': strategy.name,
            'ticker': ticker,
        })

    perf = pd.DataFrame(rows)
    trade_log = pd.DataFrame(trades)
    return {'performance': perf, 'trade_log': trade_log}


## 6) Metrics & Plot Helpers

In [None]:
def compute_metrics(perf: pd.DataFrame, periods_per_year: int = 252) -> dict[str, float]:
    equity = perf['equity']
    returns = perf['daily_return']

    if len(equity) < 2:
        return {'CAGR': 0.0, 'Sharpe': 0.0, 'MaxDrawdown': 0.0, 'WinRate': 0.0}

    years = len(equity) / periods_per_year
    cagr = float((equity.iloc[-1] / equity.iloc[0]) ** (1 / years) - 1) if years > 0 else 0.0

    r = returns.dropna()
    sharpe = float(np.sqrt(periods_per_year) * r.mean() / r.std(ddof=0)) if r.std(ddof=0) > 0 else 0.0

    dd = equity / equity.cummax() - 1
    max_dd = float(dd.min())

    win_rate = float((r > 0).mean()) if len(r) else 0.0

    return {'CAGR': cagr, 'Sharpe': sharpe, 'MaxDrawdown': max_dd, 'WinRate': win_rate}


def plot_equity_and_drawdown(curves: pd.DataFrame):
    fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)

    for s, g in curves.groupby('strategy'):
        g = g.sort_values('date')
        axes[0].plot(g['date'], g['equity'], label=s)

        dd = g['equity'] / g['equity'].cummax() - 1
        axes[1].plot(g['date'], dd, label=s)

    axes[0].set_title('Equity Curves')
    axes[1].set_title('Drawdowns')
    axes[0].legend(loc='best', fontsize=8)
    axes[1].legend(loc='best', fontsize=8)
    plt.tight_layout()
    plt.show()


## 7) Run Backtest (No RL)

In [None]:
config = BacktestConfig()
strategies = [
    BuyAndHoldStrategy(),
    DeterministicThresholdStrategy(),
    VolatilityScaledThresholdStrategy(),
    ProbabilisticLinearStrategy(),
    ProbabilisticSigmoidStrategy(),
    ZScoreMeanReversionStrategy(),
    KellyMeanReversionStrategy(),
]

all_perf = []
summary_rows = []

for ticker in TICKERS:
    raw = load_ticker_data(ticker)
    df = compute_indicators(raw)

    # Light split: test on final 20% only
    split = int(len(df) * 0.8)
    test_df = df.iloc[split:].copy()

    for strat in strategies:
        out = run_backtest(test_df, strategy=strat, ticker=ticker, config=config)
        perf = out['performance']
        all_perf.append(perf)

        m = compute_metrics(perf)
        m.update({'ticker': ticker, 'strategy': strat.name})
        summary_rows.append(m)

curves_df = pd.concat(all_perf, ignore_index=True)
summary_df = pd.DataFrame(summary_rows).sort_values(['ticker', 'Sharpe'], ascending=[True, False])

summary_df.head(20)


## 8) Visual Debugging

In [None]:
# Pick one ticker for readable chart debugging
plot_ticker = 'AAPL'
plot_df = curves_df[curves_df['ticker'] == plot_ticker].copy()
plot_equity_and_drawdown(plot_df)


## 9) Save Outputs (Optional)

In [None]:
OUT_DIR = Path('../results') if Path('../results').exists() else Path('results')
OUT_DIR.mkdir(parents=True, exist_ok=True)

summary_path = OUT_DIR / 'summary_notebook.csv'
curves_path = OUT_DIR / 'equity_curves_notebook.parquet'

summary_df.to_csv(summary_path, index=False)
curves_df.to_parquet(curves_path, index=False)

print('Saved:', summary_path)
print('Saved:', curves_path)
