# Objective: 
- To translate the ML model's predictions into actionable trading signals and simulate their performance over historical data using a robust backtesting framework. This includes both deterministic and Monte Carlo backtesting.

# Load Backtesting Data

In [None]:
import os
import pandas as pd
import yaml

def load_backtest_inputs(
    processed_dir="data/processed",
    metrics_dir="results/ml_metrics",
    backtest_config_path="config/backtest_config.yaml"
):
    """
    Load market data, ML features, and ML model predictions for the backtest period,
    as well as backtesting parameters.
    """
    # Load market data (example: all Parquet files in processed_dir)
    market_data = {}
    for fname in os.listdir(processed_dir):
        if fname.endswith(".parquet") and "test" in fname:
            key = fname.replace(".parquet", "")
            market_data[key] = pd.read_parquet(os.path.join(processed_dir, fname))

    # Load ML features (assuming features are part of the test parquet or separate)
    # If separate, adjust logic accordingly

    # Load ML model predictions
    preds_path = os.path.join(metrics_dir, "test_predictions.csv")
    if os.path.exists(preds_path):
        predictions = pd.read_csv(preds_path)
    else:
        predictions = None
        print(f"Warning: Predictions file not found at {preds_path}")

    # Load backtesting parameters
    with open(backtest_config_path, "r") as f:
        backtest_config = yaml.safe_load(f)

    return market_data, predictions, backtest_config

if __name__ == "__main__":
    market_data, predictions, backtest_config = load_backtest_inputs()
    print("Loaded market data keys:", list(market_data.keys()))
    if predictions is not None:
        print("Loaded predictions shape:", predictions.shape)

# Strategy Signal Generation

In [None]:
import pandas as pd
import numpy as np

def generate_trading_signals(predictions_df, 
                             long_threshold=0.01, 
                             short_threshold=-0.01,
                             corporate_actions_df=None,
                             index_rebalance_df=None,
                             date_col='date',
                             ticker_col='ticker'):
    """
    Convert raw ML predictions into trading signals and apply event-driven logic.
    
    :param predictions_df: DataFrame with at least ['date', 'ticker', 'prediction']
    :param long_threshold: float, minimum predicted return for a long signal
    :param short_threshold: float, maximum predicted return for a short signal
    :param corporate_actions_df: DataFrame with ['date', 'ticker', 'action'] (optional)
    :param index_rebalance_df: DataFrame with ['date', 'ticker', 'rebalance_flag'] (optional)
    :return: DataFrame with trading signals (1=long, -1=short, 0=flat)
    """
    signals = predictions_df.copy()
    # Basic signal rules
    signals['signal'] = 0
    signals.loc[signals['prediction'] > long_threshold, 'signal'] = 1
    signals.loc[signals['prediction'] < short_threshold, 'signal'] = -1

    # Event-driven logic: Avoid trades around corporate actions
    if corporate_actions_df is not None:
        corporate_actions_df[date_col] = pd.to_datetime(corporate_actions_df[date_col])
        signals[date_col] = pd.to_datetime(signals[date_col])
        merged = pd.merge(signals, corporate_actions_df[[date_col, ticker_col]], 
                          on=[date_col, ticker_col], how='left', indicator=True)
        # If a corporate action is present, set signal to 0 (flat)
        signals['signal'] = np.where(merged['_merge'] == 'both', 0, signals['signal'])

    # Event-driven logic: Adjust for index rebalances (e.g., reduce position size or avoid trading)
    if index_rebalance_df is not None:
        index_rebalance_df[date_col] = pd.to_datetime(index_rebalance_df[date_col])
        signals[date_col] = pd.to_datetime(signals[date_col])
        merged = pd.merge(signals, index_rebalance_df[[date_col, ticker_col, 'rebalance_flag']],
                          on=[date_col, ticker_col], how='left')
        # Example: If rebalance_flag==1, halve the position size
        signals['signal'] = np.where(merged['rebalance_flag'] == 1, signals['signal'] * 0.5, signals['signal'])

    return signals

# Example usage:
# predictions = pd.read_csv("results/ml_metrics/test_predictions.csv")
# corp_actions = pd.read_csv("data/processed/corporate_actions.csv")
# index_reb = pd.read_csv("data/processed/index_rebalance_flags.csv")
# signals = generate_trading_signals(predictions, 
#                                    long_threshold=0.01, 
#                                    short_threshold=-0.01,
#                                    corporate_actions_df=corp_actions,
#                                    index_rebalance_df=index_reb)

# Deterministic Backtesting

In [None]:
import pandas as pd
import numpy as np
import yaml
from src.backtesting.backtester import Backtester
from src.execution_sim.simulated_execution import execute_order

def run_backtest_with_execution(
    signals_df,
    market_data_df,
    backtest_config_path="config/backtest_config.yaml"
):
    # Load backtest configuration
    with open(backtest_config_path, "r") as f:
        config = yaml.safe_load(f)
    initial_cash = config.get("initial_cash", 1_000_000)
    transaction_cost = config.get("transaction_cost", 0.001)
    slippage = config.get("slippage", 0.0005)
    leverage = config.get("leverage", 1.0)

    # Initialize backtester
    backtester = Backtester(
        initial_cash=initial_cash,
        transaction_cost=transaction_cost,
        slippage=slippage
    )

    # Prepare logging
    trade_log = []
    portfolio_snapshots = []

    # Ensure data is sorted by date
    dates = sorted(signals_df['date'].unique())
    for date in dates:
        day_signals = signals_df[signals_df['date'] == date]
        day_market = market_data_df[market_data_df['date'] == date]
        prices = day_market.set_index('ticker')['close'].to_dict()
        volumes = day_market.set_index('ticker')['volume'].to_dict()

        # Execute trades for each ticker
        for _, row in day_signals.iterrows():
            ticker = row['ticker']
            signal = row['signal']
            if ticker not in prices or np.isnan(prices[ticker]):
                continue
            # Determine order size (example: full notional, can be customized)
            order_size = int((backtester.cash * leverage) // (prices[ticker] * (1 + transaction_cost)))
            if signal == 1:
                order_details = {
                    'symbol': ticker,
                    'side': 'buy',
                    'size': order_size,
                    'order_type': 'market'
                }
            elif signal == -1 and backtester.positions.get(ticker, 0) > 0:
                order_details = {
                    'symbol': ticker,
                    'side': 'sell',
                    'size': backtester.positions[ticker],
                    'order_type': 'market'
                }
            else:
                continue

            current_market_data = {
                'price': prices[ticker],
                'volume': volumes.get(ticker, 0),
                'bid': prices[ticker],
                'ask': prices[ticker]
            }
            exec_result = execute_order(order_details, current_market_data)
            trade_log.append({
                'date': date,
                'ticker': ticker,
                'side': order_details['side'],
                'size': exec_result['filled_size'],
                'fill_price': exec_result['avg_fill_price'],
                'slippage': exec_result['slippage']
            })
            # Update backtester positions and cash
            if order_details['side'] == 'buy':
                cost = exec_result['filled_size'] * exec_result['avg_fill_price'] * (1 + transaction_cost)
                backtester.cash -= cost
                backtester.positions[ticker] = backtester.positions.get(ticker, 0) + exec_result['filled_size']
            elif order_details['side'] == 'sell':
                proceeds = exec_result['filled_size'] * exec_result['avg_fill_price'] * (1 - transaction_cost)
                backtester.cash += proceeds
                backtester.positions[ticker] = 0

        # Update portfolio value
        backtester._update_portfolio_value(prices)
        portfolio_snapshots.append({
            'date': date,
            'cash': backtester.cash,
            'positions': dict(backtester.positions),
            'portfolio_value': backtester.portfolio_value_history[-1]
        })

    # Save logs
    pd.DataFrame(trade_log).to_csv("results/backtest_trade_log.csv", index=False)
    pd.DataFrame(portfolio_snapshots).to_csv("results/backtest_portfolio_snapshots.csv", index=False)
    print("Backtest complete. Trade log and portfolio snapshots saved.")

if __name__ == "__main__":
    # Example loading
    signals = pd.read_csv("results/ml_metrics/test_predictions.csv")  # Should have columns: date, ticker, signal
    market_data = pd.read_parquet("data/processed/final_features_test.parquet")  # Should have columns: date, ticker, close, volume
    run_backtest_with_execution(signals, market_data)

# Monte Carlo Backtesting Setup

In [None]:
import yaml
from src.backtesting.monte_carlo_backtester import MonteCarloBacktester

def load_backtest_config(config_path="config/backtest_config.yaml"):
    with open(config_path, "r") as f:
        return yaml.safe_load(f)

def historical_bootstrap_path_generator(market_data):
    """
    Simple historical bootstrapping: randomly sample days with replacement.
    """
    import numpy as np
    sampled_idx = np.random.choice(market_data.index, size=len(market_data), replace=True)
    return market_data.loc[sampled_idx].reset_index(drop=True)

def configure_and_run_monte_carlo(backtest_config, strategy_signals, market_data):
    mc_cfg = backtest_config.get("monte_carlo", {})
    if not mc_cfg.get("enabled", False):
        print("Monte Carlo backtesting is not enabled in the config.")
        return

    num_paths = mc_cfg.get("num_paths", 100)
    scenario_method = mc_cfg.get("scenario_method", "historical_bootstrap")

    # Select scenario generation method
    if scenario_method == "historical_bootstrap":
        path_generator = historical_bootstrap_path_generator
    elif scenario_method == "garch":
        from src.backtesting.monte_carlo_backtester import garch_path_generator
        path_generator = garch_path_generator
    elif scenario_method == "pytorch_generative":
        from src.models.pytorch_models import generate_synthetic_paths
        path_generator = generate_synthetic_paths
    elif scenario_method == "tensorflow_generative":
        from src.models.tensorflow_models import generate_synthetic_paths
        path_generator = generate_synthetic_paths
    else:
        raise ValueError(f"Unknown scenario method: {scenario_method}")

    mc_backtester = MonteCarloBacktester(backtester_config=mc_cfg.get("backtester_config", {}))
    results = mc_backtester.run_monte_carlo_simulations(
        num_paths=num_paths,
        path_generator=path_generator,
        strategy_signals=strategy_signals,
        market_data=market_data,
        config=mc_cfg.get("backtester_config", {})
    )
    print(f"Monte Carlo simulation complete. Ran {num_paths} paths.")
    return results

if __name__ == "__main__":
    backtest_config = load_backtest_config()
    # Load or define strategy_signals and market_data here
    # strategy_signals = ...
    # market_data = ...
    # results = configure_and_run_monte_carlo(backtest_config, strategy_signals,

# Run Monte Carlo Simulations

In [None]:
import os
import pandas as pd
import numpy as np
from src.backtesting.monte_carlo_backtester import MonteCarloBacktester
import yaml

def run_and_store_monte_carlo(
    strategy_signals,
    market_data,
    backtest_config_path="config/backtest_config.yaml",
    results_dir="results/monte_carlo"
):
    # Load config
    with open(backtest_config_path, "r") as f:
        backtest_config = yaml.safe_load(f)
    mc_cfg = backtest_config.get("monte_carlo", {})
    num_paths = mc_cfg.get("num_paths", 1000)
    scenario_method = mc_cfg.get("scenario_method", "historical_bootstrap")

    # Select scenario generation method
    if scenario_method == "historical_bootstrap":
        def path_generator(md):
            idx = np.random.choice(md.index, size=len(md), replace=True)
            return md.loc[idx].reset_index(drop=True)
    elif scenario_method == "garch":
        from src.backtesting.monte_carlo_backtester import garch_path_generator
        path_generator = garch_path_generator
    elif scenario_method == "pytorch_generative":
        from src.models.pytorch_models import generate_synthetic_paths
        path_generator = generate_synthetic_paths
    elif scenario_method == "tensorflow_generative":
        from src.models.tensorflow_models import generate_synthetic_paths
        path_generator = generate_synthetic_paths
    else:
        raise ValueError(f"Unknown scenario method: {scenario_method}")

    # Run Monte Carlo simulations
    mc_backtester = MonteCarloBacktester(backtester_config=mc_cfg.get("backtester_config", {}))
    results = mc_backtester.run_monte_carlo_simulations(
        num_paths=num_paths,
        path_generator=path_generator,
        strategy_signals=strategy_signals,
        market_data=market_data,
        config=mc_cfg.get("backtester_config", {})
    )

    # Store results
    os.makedirs(results_dir, exist_ok=True)
    # Save equity curves
    for i, eq_curve in enumerate(results['equity_curves']):
        eq_curve.to_csv(os.path.join(results_dir, f"equity_curve_{i+1}.csv"))
    # Save metrics
    metrics_df = pd.DataFrame(results['metrics'])
    metrics_df.to_csv(os.path.join(results_dir, "monte_carlo_metrics.csv"), index=False)
    print(f"Saved {num_paths} equity curves and metrics to {results_dir}")

if __name__ == "__main__":
    # Load or define strategy_signals and market_data before running
    # strategy_signals = ...
    # market_data = ...
    # run_and_store_monte_carlo(strategy_signals,

# Save Backtest Results

In [None]:
import os
import pandas as pd

def save_deterministic_backtest_results(trade_log, position_log, equity_curve, output_dir="results/backtest_reports"):
    """
    Save trade logs, position logs, and equity curve to the specified directory.
    """
    os.makedirs(output_dir, exist_ok=True)
    trade_log.to_csv(os.path.join(output_dir, "trade_log.csv"), index=False)
    position_log.to_csv(os.path.join(output_dir, "position_log.csv"), index=False)
    equity_curve.to_csv(os.path.join(output_dir, "equity_curve.csv"), index=False)
    print(f"Deterministic backtest results saved to {output_dir}")

def save_monte_carlo_results(equity_curves, metrics_df, output_dir="results/monte_carlo_sims"):
    """
    Save all Monte Carlo equity curves and aggregated metrics to the specified directory.
    """
    os.makedirs(output_dir, exist_ok=True)
    # Save each equity curve
    for i, eq_curve in enumerate(equity_curves):
        eq_curve.to_csv(os.path.join(output_dir, f"equity_curve_{i+1}.csv"), index=False)
    # Save metrics
    metrics_df.to_csv(os.path.join(output_dir, "monte_carlo_metrics.csv"), index=False)
    print(f"Monte Carlo simulation results saved to {output_dir}")

# Example usage:
# save_deterministic_backtest_results(trade_log_df, position_log_df, equity_curve_df)
#