# Backtest Results

This notebook runs a backtest of our baseline model and visualizes the results.

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import joblib

# Add src directory to path
sys.path.append('../src')

# Import local modules
from models.data_loader import load_training_data
from backtest.backtester import Backtester, load_model, prepare_backtest_data

# Set up plotting
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Load Model and Data

First, let's load the trained model and prepare the data for backtesting.

In [None]:
# Load model
model_path = "../models/baseline_xgb.pkl"
model = load_model(model_path)

# Print model metadata
model_package = joblib.load(model_path)
if isinstance(model_package, dict) and "metadata" in model_package:
    metadata = model_package["metadata"]
    print("Model metadata:")
    print(f"Features: {metadata['features']}")
    print(f"Training metrics: RMSE={metadata['metrics']['train_rmse']:.5f}, R²={metadata['metrics']['train_r2']:.5f}")
    print(f"Test metrics: RMSE={metadata['metrics']['test_rmse']:.5f}, R²={metadata['metrics']['test_r2']:.5f}")
    print(f"Directional accuracy: {metadata['metrics']['directional_accuracy']:.2f}%")
    print(f"Sharpe ratio: {metadata['metrics']['sharpe_ratio']:.2f}")

In [None]:
# Define parameters
symbols = ['AAPL', 'MSFT', 'GOOGL']
start_date = '2025-04-01'
end_date = '2025-04-17'

# Load data for backtesting
df = load_training_data(
    start_date=start_date,
    end_date=end_date,
    symbols=symbols,
    use_feast=False,  # Set to True if using Feast
    label_horizon=1,
    label_type='return'
)

print(f"Loaded {len(df)} records for backtesting")

In [None]:
# Prepare data for backtesting
backtest_df = prepare_backtest_data(df, model)

# Display first few rows
backtest_df.head()

## 2. Run Backtest

Now, let's run the backtest to evaluate the model's performance in a simulated trading environment.

In [None]:
# Initialize backtester
backtester = Backtester(
    backtest_df,
    model=None,  # We've already generated predictions
    capital=100000,
    commission=0.001,  # 0.1% commission
    slippage=0.001,    # 0.1% slippage
    prediction_column="pred"
)

In [None]:
# Run backtest with default strategy
final_capital, trades_df, portfolio_df = backtester.run(
    threshold=0.001,  # Only enter positions if predicted return > 0.1%
    max_position_size=0.2,  # Allocate 20% of capital per position
    stop_loss=0.02,  # 2% stop loss
    take_profit=0.05  # 5% take profit
)

## 3. Analyze Backtest Results

Let's analyze the results of the backtest to understand the model's performance.

In [None]:
# Plot backtest results
backtester.plot_results(portfolio_df, trades_df)

In [None]:
# Analyze trades
if len(trades_df) > 0:
    print(f"Total trades: {len(trades_df)}")
    
    # Calculate win rate
    winning_trades = trades_df[trades_df["pnl"] > 0]
    win_rate = len(winning_trades) / len(trades_df) * 100
    print(f"Win rate: {win_rate:.2f}%")
    
    # Calculate average profit/loss
    avg_profit = winning_trades["pnl"].mean() if len(winning_trades) > 0 else 0
    losing_trades = trades_df[trades_df["pnl"] < 0]
    avg_loss = losing_trades["pnl"].mean() if len(losing_trades) > 0 else 0
    print(f"Average profit: ${avg_profit:.2f}")
    print(f"Average loss: ${avg_loss:.2f}")
    
    # Calculate profit factor
    total_profit = winning_trades["pnl"].sum() if len(winning_trades) > 0 else 0
    total_loss = abs(losing_trades["pnl"].sum()) if len(losing_trades) > 0 else 1e-9
    profit_factor = total_profit / total_loss
    print(f"Profit factor: {profit_factor:.2f}")
    
    # Display trades by symbol
    trades_by_symbol = trades_df.groupby("symbol").agg({
        "pnl": ["sum", "mean", "count"],
        "return": "mean"
    })
    print("\nTrades by symbol:")
    display(trades_by_symbol)
    
    # Display trades by action
    trades_by_action = trades_df.groupby("action").agg({
        "pnl": ["sum", "mean", "count"],
        "return": "mean"
    })
    print("\nTrades by action:")
    display(trades_by_action)
    
    # Display sample trades
    print("\nSample trades:")
    display(trades_df.head())
else:
    print("No trades were executed during the backtest.")

In [None]:
# Analyze portfolio performance
if len(portfolio_df) > 0:
    # Calculate daily returns
    portfolio_df["daily_return"] = portfolio_df["portfolio_value"].pct_change()
    
    # Calculate total return
    initial_value = portfolio_df["portfolio_value"].iloc[0]
    final_value = portfolio_df["portfolio_value"].iloc[-1]
    total_return = (final_value / initial_value - 1) * 100
    print(f"Total return: {total_return:.2f}%")
    
    # Calculate annualized return
    days = (portfolio_df["timestamp"].iloc[-1] - portfolio_df["timestamp"].iloc[0]).total_seconds() / 86400
    annualized_return = ((final_value / initial_value) ** (365 / days) - 1) * 100
    print(f"Annualized return: {annualized_return:.2f}%")
    
    # Calculate Sharpe ratio
    risk_free_rate = 0.02  # 2% annual risk-free rate
    daily_risk_free = (1 + risk_free_rate) ** (1 / 365) - 1
    excess_returns = portfolio_df["daily_return"].dropna() - daily_risk_free
    sharpe_ratio = excess_returns.mean() / excess_returns.std() * np.sqrt(252)  # Annualized
    print(f"Sharpe ratio: {sharpe_ratio:.2f}")
    
    # Calculate max drawdown
    portfolio_df["cumulative_return"] = (1 + portfolio_df["daily_return"].fillna(0)).cumprod()
    portfolio_df["cumulative_max"] = portfolio_df["cumulative_return"].cummax()
    portfolio_df["drawdown"] = (portfolio_df["cumulative_return"] / portfolio_df["cumulative_max"] - 1) * 100
    max_drawdown = portfolio_df["drawdown"].min()
    print(f"Max drawdown: {max_drawdown:.2f}%")
    
    # Plot daily returns
    plt.figure(figsize=(12, 6))
    plt.plot(portfolio_df["timestamp"], portfolio_df["daily_return"].fillna(0) * 100)
    plt.title("Daily Returns")
    plt.xlabel("Date")
    plt.ylabel("Return (%)")
    plt.grid(True)
    plt.show()
    
    # Plot cumulative returns
    plt.figure(figsize=(12, 6))
    plt.plot(portfolio_df["timestamp"], (portfolio_df["cumulative_return"] - 1) * 100)
    plt.title("Cumulative Returns")
    plt.xlabel("Date")
    plt.ylabel("Return (%)")
    plt.grid(True)
    plt.show()
else:
    print("No portfolio data available.")

## 4. Compare with Benchmark

Let's compare our strategy's performance with a benchmark (e.g., buy and hold).

In [None]:
# Create a simple buy and hold benchmark
def create_benchmark(df, initial_capital=100000):
    # Group by timestamp
    benchmark_data = []
    
    # Get unique symbols
    symbols = df["symbol"].unique()
    
    # Allocate equal capital to each symbol
    capital_per_symbol = initial_capital / len(symbols)
    
    # Get initial prices
    initial_prices = {}
    initial_quantities = {}
    
    for symbol in symbols:
        symbol_data = df[df["symbol"] == symbol].sort_values("timestamp")
        if len(symbol_data) > 0:
            initial_price = symbol_data.iloc[0]["close"]
            initial_prices[symbol] = initial_price
            initial_quantities[symbol] = capital_per_symbol / initial_price
    
    # Calculate portfolio value at each timestamp
    for timestamp, group in df.groupby("timestamp"):
        portfolio_value = 0
        
        for symbol in symbols:
            symbol_data = group[group["symbol"] == symbol]
            if len(symbol_data) > 0 and symbol in initial_quantities:
                current_price = symbol_data.iloc[0]["close"]
                portfolio_value += initial_quantities[symbol] * current_price
        
        benchmark_data.append({
            "timestamp": timestamp,
            "portfolio_value": portfolio_value
        })
    
    return pd.DataFrame(benchmark_data).sort_values("timestamp")

# Create benchmark
benchmark_df = create_benchmark(df)

# Plot strategy vs benchmark
plt.figure(figsize=(12, 6))
plt.plot(portfolio_df["timestamp"], portfolio_df["portfolio_value"], label="Strategy")
plt.plot(benchmark_df["timestamp"], benchmark_df["portfolio_value"], label="Benchmark (Buy & Hold)")
plt.title("Strategy vs Benchmark")
plt.xlabel("Date")
plt.ylabel("Portfolio Value ($)")
plt.legend()
plt.grid(True)
plt.show()

# Calculate benchmark metrics
benchmark_df["daily_return"] = benchmark_df["portfolio_value"].pct_change()
benchmark_initial_value = benchmark_df["portfolio_value"].iloc[0]
benchmark_final_value = benchmark_df["portfolio_value"].iloc[-1]
benchmark_total_return = (benchmark_final_value / benchmark_initial_value - 1) * 100

days = (benchmark_df["timestamp"].iloc[-1] - benchmark_df["timestamp"].iloc[0]).total_seconds() / 86400
benchmark_annualized_return = ((benchmark_final_value / benchmark_initial_value) ** (365 / days) - 1) * 100

risk_free_rate = 0.02  # 2% annual risk-free rate
daily_risk_free = (1 + risk_free_rate) ** (1 / 365) - 1
benchmark_excess_returns = benchmark_df["daily_return"].dropna() - daily_risk_free
benchmark_sharpe_ratio = benchmark_excess_returns.mean() / benchmark_excess_returns.std() * np.sqrt(252)  # Annualized

print(f"Benchmark total return: {benchmark_total_return:.2f}%")
print(f"Benchmark annualized return: {benchmark_annualized_return:.2f}%")
print(f"Benchmark Sharpe ratio: {benchmark_sharpe_ratio:.2f}")

# Compare strategy vs benchmark
print("\nStrategy vs Benchmark:")
print(f"Total return: {total_return:.2f}% vs {benchmark_total_return:.2f}%")
print(f"Annualized return: {annualized_return:.2f}% vs {benchmark_annualized_return:.2f}%")
print(f"Sharpe ratio: {sharpe_ratio:.2f} vs {benchmark_sharpe_ratio:.2f}")

## 5. Parameter Sensitivity Analysis

Let's analyze how different parameters affect the backtest results.

In [None]:
# Test different threshold values
thresholds = [0.0, 0.001, 0.002, 0.005, 0.01]
threshold_results = []

for threshold in thresholds:
    backtester = Backtester(
        backtest_df,
        model=None,
        capital=100000,
        commission=0.001,
        slippage=0.001,
        prediction_column="pred"
    )
    
    final_capital, trades_df, portfolio_df = backtester.run(
        threshold=threshold,
        max_position_size=0.2,
        stop_loss=0.02,
        take_profit=0.05
    )
    
    # Calculate metrics
    if len(portfolio_df) > 0:
        portfolio_df["daily_return"] = portfolio_df["portfolio_value"].pct_change()
        initial_value = portfolio_df["portfolio_value"].iloc[0]
        final_value = portfolio_df["portfolio_value"].iloc[-1]
        total_return = (final_value / initial_value - 1) * 100
        
        days = (portfolio_df["timestamp"].iloc[-1] - portfolio_df["timestamp"].iloc[0]).total_seconds() / 86400
        annualized_return = ((final_value / initial_value) ** (365 / days) - 1) * 100
        
        risk_free_rate = 0.02
        daily_risk_free = (1 + risk_free_rate) ** (1 / 365) - 1
        excess_returns = portfolio_df["daily_return"].dropna() - daily_risk_free
        sharpe_ratio = excess_returns.mean() / excess_returns.std() * np.sqrt(252) if excess_returns.std() > 0 else 0
        
        # Calculate max drawdown
        portfolio_df["cumulative_return"] = (1 + portfolio_df["daily_return"].fillna(0)).cumprod()
        portfolio_df["cumulative_max"] = portfolio_df["cumulative_return"].cummax()
        portfolio_df["drawdown"] = (portfolio_df["cumulative_return"] / portfolio_df["cumulative_max"] - 1) * 100
        max_drawdown = portfolio_df["drawdown"].min()
        
        # Calculate win rate
        if len(trades_df) > 0:
            winning_trades = trades_df[trades_df["pnl"] > 0]
            win_rate = len(winning_trades) / len(trades_df) * 100
        else:
            win_rate = 0
        
        threshold_results.append({
            "threshold": threshold,
            "final_capital": final_value,
            "total_return": total_return,
            "annualized_return": annualized_return,
            "sharpe_ratio": sharpe_ratio,
            "max_drawdown": max_drawdown,
            "win_rate": win_rate,
            "num_trades": len(trades_df)
        })

# Display results
threshold_results_df = pd.DataFrame(threshold_results)
display(threshold_results_df)

# Plot results
plt.figure(figsize=(14, 10))

plt.subplot(2, 2, 1)
plt.plot(threshold_results_df["threshold"], threshold_results_df["total_return"], marker="o")
plt.title("Total Return vs Threshold")
plt.xlabel("Threshold")
plt.ylabel("Total Return (%)")
plt.grid(True)

plt.subplot(2, 2, 2)
plt.plot(threshold_results_df["threshold"], threshold_results_df["sharpe_ratio"], marker="o")
plt.title("Sharpe Ratio vs Threshold")
plt.xlabel("Threshold")
plt.ylabel("Sharpe Ratio")
plt.grid(True)

plt.subplot(2, 2, 3)
plt.plot(threshold_results_df["threshold"], threshold_results_df["win_rate"], marker="o")
plt.title("Win Rate vs Threshold")
plt.xlabel("Threshold")
plt.ylabel("Win Rate (%)")
plt.grid(True)

plt.subplot(2, 2, 4)
plt.plot(threshold_results_df["threshold"], threshold_results_df["num_trades"], marker="o")
plt.title("Number of Trades vs Threshold")
plt.xlabel("Threshold")
plt.ylabel("Number of Trades")
plt.grid(True)

plt.tight_layout()
plt.show()

## 6. Save Backtest Results

Let's save the backtest results for future reference.

In [None]:
# Create results directory if it doesn't exist
os.makedirs("../results", exist_ok=True)

# Save trades and portfolio history
trades_df.to_csv("../results/baseline_trades.csv", index=False)
portfolio_df.to_csv("../results/baseline_portfolio.csv", index=False)
benchmark_df.to_csv("../results/baseline_benchmark.csv", index=False)
threshold_results_df.to_csv("../results/baseline_threshold_analysis.csv", index=False)

print("Backtest results saved to ../results/")

## 7. Summary and Next Steps

We've backtested our baseline XGBoost model and analyzed its performance. The model achieves a total return of X% over the test period, with a Sharpe ratio of Y.

Key findings:
1. The model [outperforms/underperforms] the buy and hold benchmark
2. The optimal prediction threshold is around Z
3. The model has a win rate of W%

Next steps:
1. Implement more sophisticated trading strategies
2. Incorporate position sizing based on prediction confidence
3. Test the model on a longer time period and more symbols
4. Implement more advanced models (e.g., deep learning models)
5. Incorporate alternative data sources (e.g., sentiment analysis from news and social media)