# Simulations
 - For mass testing strategy on random stocks

### Imports

In [1]:
import numpy as np

from functions.data_collection import get_current_sp500_ticker_sample, get_historical_data, get_sp500_data_in_date_range
from functions.computing import compute_momentum, compute_mean_reversion, compute_signal_returns
from functions.signals import generate_signals_momentum, generate_signals_meanreversion, generate_final_signal
from functions.trading import build_strategy_choice_label, build_feature_matrix, track_strategy_chosen_signals
from functions.training import train_strategy_chooser
from functions.risk_management import apply_stop_loss

import warnings
warnings.filterwarnings("ignore")

## Sim 1
- running bunch of simulations on randomly chosen S&P companies

### Parameters

In [3]:
# PARAMETERS
n_simulations = 10      # Run different random samples
sample_size = 15        # Number of stocks to sample (excluding SPY)
start_date = "2015-01-01"
end_date = "2025-01-01"
initial_capital = 10000.0

### Running Sims

In [4]:
# Containers for final portfolio values (from the last date in each simulation)
final_strategy_values = []
final_spy_values = []
final_equal_values = []

for sim in range(n_simulations):
    print(f"\n--- Running Simulation {sim+1} of {n_simulations} ---")
    # Get a random sample of S&P 500 tickers
    tickers = get_current_sp500_ticker_sample(n=sample_size)
    # Always add SPY as benchmark
    if "SPY" not in tickers:
        tickers.append("SPY")
    
    # Download historical data for the chosen tickers
    data = get_historical_data(tickers, start=start_date, end=end_date)
    data = data.loc[start_date:end_date].dropna(axis=1, how='any')
    if data.empty or data.shape[0] < 100:
        print("Insufficient data returned for this sample. Skipping simulation.")
        continue

    # Compute momentum and mean-reversion (z-score) using your windows
    momentum_df = compute_momentum(data, window=63)    # About 3 months of trading days
    zscore_df   = compute_mean_reversion(data, window=20)  # 20-day rolling window

    # Generate signals for each approach
    momentum_signals = generate_signals_momentum(momentum_df, threshold=0.10)
    meanrev_signals  = generate_signals_meanreversion(zscore_df, z_entry=1.0)

    # Compute next-day returns based on the signals (used for choosing the best strategy)
    momentum_returns = compute_signal_returns(data, momentum_signals)
    meanrev_returns  = compute_signal_returns(data, meanrev_signals)

    # 1 if momentum outperformed mean-reversion on that day, 0 otherwise.
    label = build_strategy_choice_label(momentum_returns, meanrev_returns)

    # Build features for the strategy chooser (averaging factors across stocks, plus volatility etc.)
    features = build_feature_matrix(data, momentum_df, zscore_df, vol_window=20)

    # TRAIN THE STRATEGY CHOOSER
    try:
        clf = train_strategy_chooser(features, label)
    except Exception as e:
        print(f"Training classifier failed with error: {e}\nSkipping simulation.")
        continue

    # GENERATE FINAL SIGNALS & APPLY STOP-LOSS
    signals = generate_final_signal(clf, features, momentum_signals, meanrev_signals)
    final_signals = apply_stop_loss(signals, data, stop_loss_pct=0.05)

    # TRACK PERFORMANCE
    # Use the SPY column from your data as the benchmark series
    spy_series = data["SPY"]
    result_df = track_strategy_chosen_signals(data, final_signals, spy_series, initial_capital=initial_capital)
    
    # Make sure we have results; use the last available date to record final portfolio values.
    if result_df.empty:
        print("No results available from tracking performance. Skipping simulation.")
        continue

    final_date = result_df.index[-1]
    final_strategy = result_df.at[final_date, "Strategy"]
    final_spy = result_df.at[final_date, "SPY_BuyHold"]
    final_equal = result_df.at[final_date, "EqualWeight_BuyHold"]

    final_strategy_values.append(final_strategy)
    final_spy_values.append(final_spy)
    final_equal_values.append(final_equal)

    print(f"Simulation {sim+1} final values (Date: {final_date.date()}):")
    print(f"  ML Strategy:           ${final_strategy:,.2f}")
    print(f"  SPY Buy & Hold:        ${final_spy:,.2f}")
    print(f"  Equal Weight Buy & Hold: ${final_equal:,.2f}")

# COMPUTE AVERAGES AND MEDIANS ACROSS SIMULATIONS
if final_strategy_values:
    avg_strategy = np.mean(final_strategy_values)
    avg_spy = np.mean(final_spy_values)
    avg_equal = np.mean(final_equal_values)

    median_strategy = np.median(final_strategy_values)
    median_spy = np.median(final_spy_values)
    median_equal = np.median(final_equal_values)

    max_strategy = np.max(final_strategy_values)
    min_strategy = np.min(final_strategy_values)

    print("\n============================================")
    print("Final Portfolio Statistics (over simulations):")
    print(f"  SPY Buy & Hold:                    ${avg_spy:,.2f}")
    print()
    print(f"  Equal Weight Buy & Hold - Average: ${avg_equal:,.2f}")
    print(f"  Equal Weight Buy & Hold - Median:  ${median_equal:,.2f}")
    print()
    print(f"  ML Strategy - Average:             ${avg_strategy:,.2f}")
    print(f"  ML Strategy - Median:              ${median_strategy:,.2f}")
    print(f"  ML Strategy - Max:                 ${max_strategy:,.2f}")
    print(f"  ML Strategy - Min:                 ${min_strategy:,.2f}")
    print("============================================\n")
else:
    print("No successful simulations to report statistics.")


--- Running Simulation 1 of 10 ---


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Best Params: {'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 100}
Best CV Score: 0.49183673469387756
Simulation 1 final values (Date: 2024-12-31):
  ML Strategy:           $52,856.21
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $29,971.64

--- Running Simulation 2 of 10 ---


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Best Params: {'max_depth': 3, 'min_samples_split': 5, 'n_estimators': 100}
Best CV Score: 0.5015306122448979
Simulation 2 final values (Date: 2024-12-31):
  ML Strategy:           $26,206.13
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $41,947.88

--- Running Simulation 3 of 10 ---


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Best Params: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Best CV Score: 0.5066326530612245
Simulation 3 final values (Date: 2024-12-31):
  ML Strategy:           $14,199,812.76
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $36,743.50

--- Running Simulation 4 of 10 ---


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Best Params: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Best CV Score: 0.5061224489795918


[*********************100%***********************]  1 of 1 completed

Simulation 4 final values (Date: 2024-12-31):
  ML Strategy:           $23,698,728.71
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $38,056.63

--- Running Simulation 5 of 10 ---



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%*******

Best Params: {'max_depth': 5, 'min_samples_split': 5, 'n_estimators': 50}
Best CV Score: 0.5239795918367347


[*********************100%***********************]  1 of 1 completed

Simulation 5 final values (Date: 2024-12-31):
  ML Strategy:           $202,051.66
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $36,755.50

--- Running Simulation 6 of 10 ---



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%*******

Best Params: {'max_depth': 5, 'min_samples_split': 5, 'n_estimators': 50}
Best CV Score: 0.5265306122448979


[*********************100%***********************]  1 of 1 completed

Simulation 6 final values (Date: 2024-12-31):
  ML Strategy:           $136,726.64
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $41,645.96

--- Running Simulation 7 of 10 ---



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%*******

Best Params: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Best CV Score: 0.4959183673469387
Simulation 7 final values (Date: 2024-12-31):
  ML Strategy:           $15,810,855.77
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $41,920.60

--- Running Simulation 8 of 10 ---


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Best Params: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 100}
Best CV Score: 0.5209183673469387


[*********************100%***********************]  1 of 1 completed

Simulation 8 final values (Date: 2024-12-31):
  ML Strategy:           $91,142.47
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $42,041.49

--- Running Simulation 9 of 10 ---



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%*******

Best Params: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Best CV Score: 0.5301020408163265
Simulation 9 final values (Date: 2024-12-31):
  ML Strategy:           $12,801,151.27
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $40,660.87

--- Running Simulation 10 of 10 ---


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Best Params: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Best CV Score: 0.5035714285714286
Simulation 10 final values (Date: 2024-12-31):
  ML Strategy:           $6,855,093.99
  SPY Buy & Hold:        $33,957.37
  Equal Weight Buy & Hold: $28,685.95

Final Portfolio Statistics (over simulations):
  SPY Buy & Hold:                    $33,957.37

  Equal Weight Buy & Hold - Average: $37,843.00
  Equal Weight Buy & Hold - Median:  $39,358.75

  ML Strategy - Average:             $7,387,462.56
  ML Strategy - Median:              $3,528,572.83
  ML Strategy - Max:                 $23,698,728.71
  ML Strategy - Min:                 $26,206.13



## Sim 2
- Using S&P companies from start date

In [5]:
# Date Range
start_date = "2015-01-01"
end_date   = "2025-01-01"

# Load Data
data_all = get_sp500_data_in_date_range(start_date, end_date, data_csv="data/SPY_500_data.csv", composition_csv="data/SPY_500_historical_stocks.csv")
data_all = data_all.loc[start_date:end_date].dropna(axis=1, how='any')

Data already downloaded. Returning existing data slice.


In [6]:
# Check that SPY is present (used as the benchmark).
if "SPY" not in data_all.columns:
    raise ValueError("SPY not found in the saved data. Please ensure SPY is included in the composition.")

# Parameters
n_simulations   = 10 
sample_size     = 15
start_date      = "2015-01-01"
end_date        = "2025-01-01"
initial_capital = 10000.0

# Limit the data to the desired date range
data_all = data_all.loc[start_date:end_date]

# Prepare a list of tickers available for sampling (exclude SPY, which is used as the benchmark)
available_tickers = [
    ticker for ticker in data_all.columns 
    if ticker != "SPY" and data_all[ticker].notna().sum() > 0
]

In [7]:
# Containers to store the final portfolio values.
final_strategy_values = []
final_spy_values      = []
final_equal_values    = []

simulation_results = []

for sim in range(n_simulations):
    print(f"\n=== Running Simulation {sim+1} of {n_simulations} ===")
    
    if len(available_tickers) < sample_size:
        print("Not enough tickers available for simulation.")
        break
        
    # Randomly sample companies
    sample_tickers = np.random.choice(available_tickers, size=sample_size, replace=False)
    sample_data = data_all[list(sample_tickers)].copy()
    
    # Use SPY from the saved data as the benchmark.
    benchmark_series = data_all["SPY"]
    
    print("Tickers selected for simulation:", sample_tickers)
    
    # Run the Strategy
    # Compute momentum and mean-reversion indicators
    momentum_df = compute_momentum(sample_data, window=63)   # 3-month window
    zscore_df   = compute_mean_reversion(sample_data, window=20)  # 20-day window
    
    # Generate momentum and mean-reversion signals
    momentum_signals = generate_signals_momentum(momentum_df, threshold=0.10)
    meanrev_signals  = generate_signals_meanreversion(zscore_df, z_entry=1.0)
    
    # Compute next-day return
    momentum_returns = compute_signal_returns(sample_data, momentum_signals)
    meanrev_returns  = compute_signal_returns(sample_data, meanrev_signals)
    
    # Build a label: 1 if momentum outperformed mean-reversion; 0 otherwise
    label = build_strategy_choice_label(momentum_returns, meanrev_returns)
    
    # Build the feature matrix for the strategy chooser
    features = build_feature_matrix(sample_data, momentum_df, zscore_df, vol_window=20)
    
    # Train the strategy chooser classifier
    try:
        clf = train_strategy_chooser(features, label)
    except Exception as e:
        print(f"Error training classifier in simulation {sim+1}: {e}")
        continue  # Skip this simulation if training fails
    
    # Generate final signals based on classifier predictions
    signals = generate_final_signal(clf, features, momentum_signals, meanrev_signals)
    
    # Apply a stop-loss rule
    final_signals = apply_stop_loss(signals, sample_data, stop_loss_pct=0.05)
    
    # Track strategy performance
    result_df = track_strategy_chosen_signals(sample_data, final_signals, benchmark_series, initial_capital=initial_capital)
    simulation_results.append(result_df)
    
    # Extract final portfolio values for each strategy
    final_date = result_df.index[-1]
    final_strategy = result_df.loc[final_date, "Strategy"]
    final_spy      = result_df.loc[final_date, "SPY_BuyHold"]
    final_equal    = result_df.loc[final_date, "EqualWeight_BuyHold"]
    
    final_strategy_values.append(final_strategy)
    final_spy_values.append(final_spy)
    final_equal_values.append(final_equal)
    
    print(f"Simulation {sim+1} final values on {final_date.date()}:")
    print(f"  ML Strategy:           ${final_strategy:,.2f}")
    print(f"  SPY Buy & Hold:        ${final_spy:,.2f}")
    print(f"  Equal Weight BuyHold:  ${final_equal:,.2f}")

# Overall Statistics
if final_strategy_values:
    mean_spy   = np.mean(final_spy_values)
    
    mean_equal = np.mean(final_equal_values)
    median_equal = np.median(final_equal_values)

    mean_ml    = np.mean(final_strategy_values)
    median_ml  = np.median(final_strategy_values)
    max_ml     = np.max(final_strategy_values)
    min_ml     = np.min(final_strategy_values)
    
    print("\n============================================")
    print("Final Portfolio Statistics (over simulations):")
    print(f"  SPY Buy & Hold:                    ${mean_spy:,.2f}")
    print()
    print(f"  Equal Weight Buy & Hold - Average: ${mean_equal:,.2f}")
    print(f"  Equal Weight Buy & Hold - Median:  ${median_equal:,.2f}")
    print()
    print(f"  ML Strategy - Average:             ${mean_ml:,.2f}")
    print(f"  ML Strategy - Median:              ${median_ml:,.2f}")
    print(f"  ML Strategy - Max:                 ${max_ml:,.2f}")
    print(f"  ML Strategy - Min:                 ${min_ml:,.2f}")
    print("============================================\n")  
else:
    print("No simulation results to report overall statistics.")


=== Running Simulation 1 of 10 ===
Tickers selected for simulation: ['HSY' 'QCOM' 'XRX' 'HRL' 'MU' 'FIS' 'UNH' 'JWN' 'PNW' 'INTC' 'ITT' 'AFL'
 'ADSK' 'EOG' 'APH']
Best Params: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Best CV Score: 0.5076530612244898
Simulation 1 final values on 2024-12-31:
  ML Strategy:           $36,686,848.84
  SPY Buy & Hold:        $33,957.37
  Equal Weight BuyHold:  $29,474.57

=== Running Simulation 2 of 10 ===
Tickers selected for simulation: ['DHR' 'VMC' 'VIAV' 'WELL' 'PHM' 'CSCO' 'MAS' 'MRK' 'DOV' 'MET' 'STT'
 'NOC' 'FHN' 'FSLR' 'CLX']
Best Params: {'max_depth': 3, 'min_samples_split': 5, 'n_estimators': 50}
Best CV Score: 0.523469387755102
Simulation 2 final values on 2024-12-31:
  ML Strategy:           $26,090.96
  SPY Buy & Hold:        $33,957.37
  Equal Weight BuyHold:  $40,683.58

=== Running Simulation 3 of 10 ===
Tickers selected for simulation: ['DTE' 'NDAQ' 'TXN' 'PG' 'HAL' 'SO' 'EMR' 'FMC' 'FE' 'A' 'PSA' 'CHRW'
 'SYK' 'ADB