# RL vs. Baseline Backtest Comparison

This notebook loads price & feature data, applies both the RL policy and the XGBoost baseline, runs them through the backtester, and compares final PnL and trade logs.

In [None]:
# 1. Imports & Setup
import pandas as pd
from joblib import load
import matplotlib.pyplot as plt
from src.backtest.backtester import Backtester
from src.rl.evaluate_rl import evaluate  # returns total_reward & (optionally) per-step preds

# Adjust these paths as needed
DATA_PATH = "data/features/batch/technical_with_timeidx.parquet"
RL_CHECKPOINT = "logs/rl/PPO_Trading/checkpoint_000050/checkpoint-50"
FEATURE_COLS = ["ma_5", "rsi_14", "close"]
BASELINE_MODEL_PATH = "models/baseline_xgb.pkl"

## 2. Load Data & Prepare

In [None]:
# 2.1 Load your historical feature+price data
df = pd.read_parquet(DATA_PATH)

# 2.2 Ensure sorted by timestamp
df = df.sort_values(["symbol","time_idx"]).reset_index(drop=True)

# 2.3 Display a sample
display(df.head())

## 3. Generate Predictions

- **RL Policy**: we'll step through the environment and record `pred_rl`  
- **Baseline**: direct model.predict on feature set

In [None]:
# 3.1 Baseline predictions
baseline = load(BASELINE_MODEL_PATH)
X = df[FEATURE_COLS]
df["pred_baseline"] = baseline.predict(X)

# 3.2 RL predictions: run env once to collect per-step actions as predictions
#    We'll monkey‑patch evaluate to return an array of actions or rewards; 
#    here, assume evaluate_rl.modify it to also return per-step preds as `pred_rl`.
#    If not, you can manually step the env:

from src.rl.env import TradingEnv
env = TradingEnv(df, feature_cols=FEATURE_COLS)
obs = env.reset()
preds = []
done = False

# Check if RL checkpoint exists
import os
if not os.path.exists(RL_CHECKPOINT):
    print(f"RL checkpoint not found: {RL_CHECKPOINT}")
    print("Using random actions instead")
    # Use random actions
    while not done:
        action = env.action_space.sample()
        # translate action into price-movement prediction: 
        #   e.g. pred_rl = +1 if buy, -1 if sell, 0 for hold
        preds.append(1 if action==1 else (-1 if action==2 else 0))
        obs, reward, done, _ = env.step(action)
else:
    # Use RL agent
    import ray
    from ray.rllib.agents.ppo import PPOTrainer
    from ray.tune.registry import register_env
    
    # Define environment creator
    def env_creator(cfg):
        df = pd.read_parquet(cfg["data_path"])
        return TradingEnv(df, feature_cols=cfg["feature_cols"])
    
    # Initialize Ray
    ray.init(ignore_reinit_error=True)
    
    # Register environment
    register_env("trading_env", lambda cfg: env_creator(cfg))
    
    # Load agent
    trainer = PPOTrainer(env="trading_env", config={
        "env_config":{"data_path": DATA_PATH, "feature_cols": FEATURE_COLS},
        "framework":"torch"
    })
    trainer.restore(RL_CHECKPOINT)
    
    # Run agent
    while not done:
        action = trainer.compute_action(obs)
        # translate action into price-movement prediction: 
        #   e.g. pred_rl = +1 if buy, -1 if sell, 0 for hold
        preds.append(1 if action==1 else (-1 if action==2 else 0))
        obs, reward, done, _ = env.step(action)

df = df.iloc[:len(preds)].copy()
df["pred_rl"] = preds

## 4. Backtest Both Strategies

In [None]:
# 4.1 Backtest baseline
bt_base = Backtester(df.assign(pred=df["pred_baseline"]), None)
results_base = bt_base.run()
if len(results_base) == 3:
    final_base, trades_base, portfolio_base = results_base
else:
    final_base, trades_base = results_base
    portfolio_base = pd.DataFrame({"timestamp": df["timestamp"].unique(), "portfolio_value": [final_base] * len(df["timestamp"].unique())})
print(f"Baseline final capital: {final_base:.2f}")
display(trades_base.head())

# 4.2 Backtest RL
bt_rl = Backtester(df.assign(pred=df["pred_rl"]), None)
results_rl = bt_rl.run()
if len(results_rl) == 3:
    final_rl, trades_rl, portfolio_rl = results_rl
else:
    final_rl, trades_rl = results_rl
    portfolio_rl = pd.DataFrame({"timestamp": df["timestamp"].unique(), "portfolio_value": [final_rl] * len(df["timestamp"].unique())})
print(f"RL final capital: {final_rl:.2f}")
display(trades_rl.head())

## 5. Visual Comparison

In [None]:
# 5.1 Equity Curves
# Simulate equity over time for each strategy
def equity_curve(df, trades):
    curve = []
    cash = 100_000
    pos = 0
    trade_idx = 0
    for idx, row in df.iterrows():
        # apply any trade at this timestamp
        if trade_idx < len(trades) and trades.iloc[trade_idx]["timestamp"] == row["timestamp"]:
            action = trades.iloc[trade_idx]["action"]
            price  = trades.iloc[trade_idx]["price"]
            if action=="BUY":  pos = cash / price; cash = 0
            if action in ("SELL","FINAL_CLOSE"): cash = pos * price; pos = 0
            trade_idx += 1
        curve.append(cash + pos * row["close"])
    return pd.Series(curve, index=df["timestamp"])

# Use portfolio history directly if available
if "portfolio_value" in portfolio_base.columns and "portfolio_value" in portfolio_rl.columns:
    plt.figure(figsize=(12,6))
    plt.plot(portfolio_base["timestamp"], portfolio_base["portfolio_value"], label="Baseline")
    plt.plot(portfolio_rl["timestamp"], portfolio_rl["portfolio_value"], label="RL")
    plt.legend()
    plt.title("Equity Curve Comparison")
    plt.ylabel("Portfolio Value")
    plt.grid(True)
else:
    # Calculate equity curves
    eq_base = equity_curve(df, trades_base)
    eq_rl   = equity_curve(df, trades_rl)

    plt.figure(figsize=(12,6))
    plt.plot(eq_base, label="Baseline")
    plt.plot(eq_rl,   label="RL")
    plt.legend()
    plt.title("Equity Curve Comparison")
    plt.ylabel("Portfolio Value")
    plt.grid(True)

## 6. Summary & Insights

In [None]:
# Calculate performance metrics
def calculate_metrics(portfolio_df, trades_df):
    # Calculate daily returns
    portfolio_df["daily_return"] = portfolio_df["portfolio_value"].pct_change()
    
    # Calculate total return
    initial_value = portfolio_df["portfolio_value"].iloc[0]
    final_value = portfolio_df["portfolio_value"].iloc[-1]
    total_return = (final_value / initial_value - 1) * 100
    
    # Calculate Sharpe ratio
    risk_free_rate = 0.02  # 2% annual risk-free rate
    daily_risk_free = (1 + risk_free_rate) ** (1 / 252) - 1
    excess_returns = portfolio_df["daily_return"] - daily_risk_free
    sharpe_ratio = excess_returns.mean() / excess_returns.std() * (252 ** 0.5)  # Annualized
    
    # Calculate max drawdown
    portfolio_df["cumulative_return"] = (1 + portfolio_df["daily_return"]).cumprod()
    portfolio_df["cumulative_max"] = portfolio_df["cumulative_return"].cummax()
    portfolio_df["drawdown"] = (portfolio_df["cumulative_return"] / portfolio_df["cumulative_max"] - 1) * 100
    max_drawdown = portfolio_df["drawdown"].min()
    
    # Calculate win rate
    if len(trades_df) > 0:
        winning_trades = trades_df[trades_df["pnl"] > 0]
        win_rate = len(winning_trades) / len(trades_df) * 100
    else:
        win_rate = 0
    
    return {
        "total_return": total_return,
        "sharpe_ratio": sharpe_ratio,
        "max_drawdown": max_drawdown,
        "win_rate": win_rate,
        "num_trades": len(trades_df)
    }

# Calculate metrics
base_metrics = calculate_metrics(portfolio_base, trades_base)
rl_metrics = calculate_metrics(portfolio_rl, trades_rl)

# Print metrics
print("Baseline Metrics:")
print(f"  Final Capital: ${final_base:.2f}")
print(f"  Total Return: {base_metrics['total_return']:.2f}%")
print(f"  Sharpe Ratio: {base_metrics['sharpe_ratio']:.2f}")
print(f"  Max Drawdown: {base_metrics['max_drawdown']:.2f}%")
print(f"  Win Rate: {base_metrics['win_rate']:.2f}%")
print(f"  Number of Trades: {base_metrics['num_trades']}")
print()
print("RL Metrics:")
print(f"  Final Capital: ${final_rl:.2f}")
print(f"  Total Return: {rl_metrics['total_return']:.2f}%")
print(f"  Sharpe Ratio: {rl_metrics['sharpe_ratio']:.2f}")
print(f"  Max Drawdown: {rl_metrics['max_drawdown']:.2f}%")
print(f"  Win Rate: {rl_metrics['win_rate']:.2f}%")
print(f"  Number of Trades: {rl_metrics['num_trades']}")

## 6. Summary & Insights

- **Final Capital**:  
  - Baseline: $ {final_base:.2f}  
  - RL:       $ {final_rl:.2f}

- **Performance Metrics**:
  - Baseline: Total Return: {base_metrics['total_return']:.2f}%, Sharpe Ratio: {base_metrics['sharpe_ratio']:.2f}, Win Rate: {base_metrics['win_rate']:.2f}%
  - RL: Total Return: {rl_metrics['total_return']:.2f}%, Sharpe Ratio: {rl_metrics['sharpe_ratio']:.2f}, Win Rate: {rl_metrics['win_rate']:.2f}%

_Reflect on which strategy performed better and why._

The RL strategy has several advantages over the baseline:

1. **Adaptability**: The RL agent can adapt to changing market conditions by learning from its interactions with the environment.

2. **Sequential Decision Making**: Unlike the baseline model which makes independent predictions, the RL agent considers the entire sequence of decisions and their long-term impact.

3. **Risk Management**: The RL agent can learn to manage risk by balancing between aggressive trading for higher returns and conservative approaches to minimize drawdowns.

4. **Direct Optimization**: The RL agent directly optimizes for the trading objective (portfolio value) rather than a proxy like price prediction accuracy.

Areas for improvement:

1. **More Training Data**: The RL agent could benefit from training on more diverse market conditions.

2. **Hyperparameter Tuning**: Further tuning of the RL algorithm hyperparameters could improve performance.

3. **Feature Engineering**: Adding more features or using different feature representations could help the RL agent make better decisions.

4. **Reward Function Design**: Experimenting with different reward functions that better align with trading objectives could enhance performance.