# Backtesting with Backtrader

## What is Backtesting?
Backtesting simulates how a trading strategy would have performed on *historical* data.
It is essential for evaluating whether model predictions are actually actionable.

## Strategy Logic
- **Signal**: If the model predicts tomorrow's Close > today's Close → **BUY**
- **Exit**:   If the model predicts tomorrow's Close < today's Close → **SELL**
- We compare against a simple **Buy & Hold** benchmark.

## Caveats
- This is for **educational purposes only**.
- Real trading incurs slippage, transaction costs, and market impact.
- Past performance does not guarantee future results.


In [None]:
import sys; sys.path.insert(0, '..')
import warnings; warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import backtrader as bt
import joblib

from src.data_loader import (
    fetch_stock_data, time_series_split,
    scale_features, build_sequences
)
from src.sentiment_analyzer import add_sentiment_to_df
from src.evaluator import sharpe_ratio, max_drawdown, plot_equity_curve

%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
print('Setup complete')

In [None]:
# ── Config ────────────────────────────────────────────────────────────────────
TICKER     = 'AAPL'
START      = '2015-01-01'
END        = '2024-12-31'
INITIAL_CASH = 10_000.0

FEATURE_COLS = [
    'Open', 'High', 'Low', 'Volume',
    'SMA_10', 'SMA_20', 'SMA_50',
    'RSI_14', 'RSI_7', 'MACD', 'MACD_Signal', 'MACD_Hist',
    'BB_Width', 'ATR_14', 'Vol_Change', 'OBV',
    'Log_Return', 'Pct_Change', 'Sentiment',
    'Close_Lag_1', 'Close_Lag_2', 'Close_Lag_3',
    'Close_Lag_5', 'Close_Lag_10',
]

In [None]:
# ── 1. Load data & generate predictions ──────────────────────────────────────
df = fetch_stock_data(TICKER, START, END)
df = add_sentiment_to_df(df, TICKER, START, END)
feature_cols = [c for c in FEATURE_COLS if c in df.columns]

_, test_df = time_series_split(df, 0.80)

# Load trained RF regressor (run random_forest_model.ipynb first)
try:
    rf = joblib.load('../results/rf_regressor.pkl')
    X_test = test_df[feature_cols].values
    preds  = rf.predict(X_test)
    print('Loaded RF regressor predictions')
except FileNotFoundError:
    # Fallback: use a trivial lag-1 predictor
    print('RF model not found – using lag-1 fallback')
    preds = test_df['Close'].shift(1).fillna(method='bfill').values

# Merge predictions back into the test dataframe
test_df = test_df.copy()
test_df['Pred_Close'] = preds
test_df['Signal']     = (test_df['Pred_Close'] > test_df['Close'].shift(1)).astype(int)

In [None]:
# ── 2. Build Backtrader DataFeed ──────────────────────────────────────────────
# Backtrader expects a pandas dataframe with a specific column schema.

bt_data = test_df[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
bt_data.index = pd.to_datetime(bt_data.index)
bt_data.columns = ['open', 'high', 'low', 'close', 'volume']

data_feed = bt.feeds.PandasData(dataname=bt_data)

In [None]:
# ── 3. Define Trading Strategy ────────────────────────────────────────────────

class MLSignalStrategy(bt.Strategy):
    """
    Trades based on an externally computed signal array.
    Signal = 1 → BUY all-in
    Signal = 0 → SELL (exit position)
    """
    params = dict(signals=None, printlog=False)

    def log(self, txt, dt=None):
        if self.params.printlog:
            dt = dt or self.datas[0].datetime.date(0)
            print(f'{dt} {txt}')

    def __init__(self):
        self.idx      = 0
        self.signals  = self.params.signals
        self.order    = None
        self.portfolio_values = []

    def next(self):
        self.portfolio_values.append(self.broker.getvalue())

        if self.order:
            return   # wait for pending order

        signal = self.signals[self.idx] if self.idx < len(self.signals) else 0
        self.idx += 1

        if signal == 1 and not self.position:
            size = int(self.broker.getvalue() / self.data.close[0])
            self.order = self.buy(size=size)
            self.log(f'BUY  @{self.data.close[0]:.2f} x{size}')

        elif signal == 0 and self.position:
            self.order = self.sell(size=self.position.size)
            self.log(f'SELL @{self.data.close[0]:.2f}')

    def notify_order(self, order):
        if order.status in [order.Completed, order.Canceled, order.Margin]:
            self.order = None

    def stop(self):
        self.portfolio_values.append(self.broker.getvalue())
        self.log(f'Final portfolio value: {self.broker.getvalue():.2f}')

In [None]:
# ── 4. Run Backtest ───────────────────────────────────────────────────────────
signals_arr = test_df['Signal'].values

cerebro = bt.Cerebro()
cerebro.adddata(data_feed)
cerebro.addstrategy(MLSignalStrategy, signals=signals_arr, printlog=True)
cerebro.broker.setcash(INITIAL_CASH)
cerebro.broker.setcommission(commission=0.001)   # 0.1 % per trade

# Analyser: Sharpe Ratio via backtrader built-in
cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe',
                    riskfreerate=0.04/252)
cerebro.addanalyzer(bt.analyzers.DrawDown,   _name='drawdown')
cerebro.addanalyzer(bt.analyzers.Returns,    _name='returns')

print(f'Starting Portfolio Value: ${cerebro.broker.getvalue():,.2f}')
results = cerebro.run()
strat   = results[0]
final_val = cerebro.broker.getvalue()
print(f'Ending Portfolio Value:   ${final_val:,.2f}')
print(f'Total Return:             {(final_val/INITIAL_CASH - 1)*100:.2f}%')

In [None]:
# ── 5. Analyser Results ───────────────────────────────────────────────────────
sharpe_bt = strat.analyzers.sharpe.get_analysis()
dd_bt     = strat.analyzers.drawdown.get_analysis()
print(f"Sharpe Ratio (bt):  {sharpe_bt.get('sharperatio', 'N/A')}")
print(f"Max Drawdown:       {dd_bt.max.drawdown:.2f}%")

In [None]:
# ── 6. Plot Equity Curve vs Buy & Hold ────────────────────────────────────────
portfolio_vals = pd.Series(
    strat.portfolio_values,
    index=test_df.index[:len(strat.portfolio_values)]
)

# Buy & Hold benchmark
bh = (test_df['Close'] / test_df['Close'].iloc[0]) * INITIAL_CASH

plot_equity_curve(portfolio_vals, label='ML Strategy', benchmark=bh)

In [None]:
# ── 7. Plot Backtrader Chart ──────────────────────────────────────────────────
# Note: backtrader's built-in plot requires matplotlib backend
cerebro.plot(style='candlestick', iplot=False)