Introduction 

In [37]:
import yfinance as yf
import talib
import numpy as np 
import pandas as pd 
import vectorbt as vbt 
import warnings
from scipy import stats 
import matplotlib.pyplot as plt

In [38]:
#Download stock data from 2018 using yfinance

#Change these variable when needed
TICKER = "BTC-USD" #Ticker symbol (e.g. "QQQ", "SPY", "GLD")
START_DATE = "2018-01-01" #Start date (YYYY-MM-DD) format

#Download data from start date to today
stock_data = yf.download(TICKER, start=START_DATE, interval="1d")

#check if data was downloaded successfully (not empty)
if not stock_data.empty:
    print(f"Successfully downloaded {len(stock_data)} records for {TICKER} from {START_DATE} to today")
    print(f"Data range: {stock_data.index.min().date()} to {stock_data.index.max().date()}")
    print(stock_data.head())
else:
    print(f"Failed to download data for {TICKER} from yfinance")

#Display downloaded data
stock_data

[*********************100%***********************]  1 of 1 completed

Successfully downloaded 2923 records for BTC-USD from 2018-01-01 to today
Data range: 2018-01-01 to 2026-01-01
Price              Close          High           Low          Open  \
Ticker           BTC-USD       BTC-USD       BTC-USD       BTC-USD   
Date                                                                 
2018-01-01  13657.200195  14112.200195  13154.700195  14112.200195   
2018-01-02  14982.099609  15444.599609  13163.599609  13625.000000   
2018-01-03  15201.000000  15572.799805  14844.500000  14978.200195   
2018-01-04  15599.200195  15739.700195  14522.200195  15270.700195   
2018-01-05  17429.500000  17705.199219  15202.799805  15477.200195   

Price            Volume  
Ticker          BTC-USD  
Date                     
2018-01-01  10291200000  
2018-01-02  16846600192  
2018-01-03  16871900160  
2018-01-04  21783199744  
2018-01-05  23840899072  





Price,Close,High,Low,Open,Volume
Ticker,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2018-01-01,13657.200195,14112.200195,13154.700195,14112.200195,10291200000
2018-01-02,14982.099609,15444.599609,13163.599609,13625.000000,16846600192
2018-01-03,15201.000000,15572.799805,14844.500000,14978.200195,16871900160
2018-01-04,15599.200195,15739.700195,14522.200195,15270.700195,21783199744
2018-01-05,17429.500000,17705.199219,15202.799805,15477.200195,23840899072
...,...,...,...,...,...
2025-12-28,87835.835938,87986.890625,87394.953125,87799.343750,15156557929
2025-12-29,87138.140625,90299.156250,86717.914062,87835.789062,48411625849
2025-12-30,88430.132812,89297.937500,86735.546875,87134.351562,35586356225
2025-12-31,87508.828125,89080.289062,87130.562500,88429.585938,33830210616


In [39]:
#Technical analysis indicators using talib

#Make sure stock_data is available from previous cell
if "stock_data" not in locals():
    raise ValueError("stock_data is not defined. Please run the previous cell first.")

#EXTRACT OHLCV data 
# yfinance returns either:
# 1) MultiIndex columns when multiple tickers are downloaded, e.g. ("Close", "QQQ")
# 2) Single-level columns when a single ticker is downloaded, e.g. "Close"
#
# This block handles both cases and extracts OHLCV data
# as NumPy arrays (open_, high, low, close, volume),
# which are required inputs for TA-Lib indicator functions.

if isinstance(stock_data.columns, pd.MultiIndex):
    # MultiIndex case
    open_ = stock_data["Open", TICKER].values
    high = stock_data["High", TICKER].values
    low = stock_data["Low", TICKER].values
    close = stock_data["Close", TICKER].values
    volume = stock_data["Volume", TICKER].values
else:
    # Single-level case
    open_ = stock_data["Open"].values
    high = stock_data["High"].values
    low = stock_data["Low"].values
    close = stock_data["Close"].values
    volume = stock_data["Volume"].values

    print(f"Calculating technical indicators using TA-Lib for {TICKER} from {START_DATE} to today")

#first calculate simple moving averages (SMA)
#defines the trend
sma_20 = talib.SMA(close, timeperiod=20)
sma_50 = talib.SMA(close, timeperiod=50)

#Exponential moving averages (EMA)
#good for signal timing, time the trend
ema_12 = talib.EMA(close, timeperiod=12)
ema_26 = talib.EMA(close, timeperiod=26)

#Moving average convergence divergence (MACD) 
#shows whether momentum is increasing or decreasing by comparing fast and slow trends
macd, macd_signal, macd_hist = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9)

#RSI (Relative Strength Index)
#RSI shows whether recent momentum is stretched, not whether price is cheap or expensive
rsi = talib.RSI(close, timeperiod=14)

#Stochastic RSI
#shows how extreme RSI is relative to its recent range, making it a fast timing tool—not a trend indicator
stochrsi_k, stochrsi_d = talib.STOCHRSI(close, timeperiod=14, fastk_period=3, fastd_period=3, fastd_matype=0)

#VWAP (Volume Weighted Average Price)
#the volume-weighted average price up to the current point
#where high-volume trades influence the average more than low-volume trades.
typical_price = (high + low + close) / 3
price_volume = typical_price * volume
cumulative_price_volume = np.cumsum(price_volume)
cumulative_volume = np.cumsum(volume)
vwap = cumulative_price_volume / cumulative_volume

#schaff trend cycle (STC)
#STC turns MACD into a fast, normalized momentum cycle that highlights trend accelerations and slowdowns earlier than MACD alone
cycle_period = 10

# Smooth MACD to reduce noise
macd_cycle = talib.EMA(macd, timeperiod=cycle_period)
macd_smooth = talib.EMA(macd_cycle, timeperiod=cycle_period)

# Find recent high and low of the smoothed MACD
highest_macd = talib.MAX(macd_smooth, timeperiod=cycle_period)
lowest_macd = talib.MIN(macd_smooth, timeperiod=cycle_period)

# Scale MACD into a 0–100 range 
stc_k = 100 * (macd_smooth - lowest_macd) / (highest_macd - lowest_macd) #stochastic k

# Smooth the cycle for cleaner signals
stc_d = talib.EMA(stc_k, timeperiod=3) 

# Unpack Stochastic RSI (returns fastk and fastd)

indicators_df = pd.DataFrame({
    "Date": stock_data.index,
    "Close": close,
    "SMA_20": sma_20,
    "SMA_50": sma_50,
    "EMA_12": ema_12,
    "EMA_26": ema_26,
    "MACD": macd,
    "MACD_Signal": macd_signal,
    "MACD_Hist": macd_hist,
    "RSI": rsi,
    "StochRSI_K": stochrsi_k,
    "StochRSI_D": stochrsi_d,
    "VWAP": vwap,
    "STC_K": stc_k,
    "STC_D": stc_d
})

print("All technical indicators calculated!")
print(f"Data shape: {indicators_df.shape}")
indicators_df.tail(5)

All technical indicators calculated!
Data shape: (2923, 15)


Unnamed: 0,Date,Close,SMA_20,SMA_50,EMA_12,EMA_26,MACD,MACD_Signal,MACD_Hist,RSI,StochRSI_K,StochRSI_D,VWAP,STC_K,STC_D
2918,2025-12-28,87835.835938,88529.880078,90842.188125,87901.028962,89042.928767,-1141.899805,-1378.713216,236.813411,44.75406,100.0,75.054344,50828.602557,100.0,99.999996
2919,2025-12-29,87138.140625,88252.201563,90490.558125,87783.661526,88901.833349,-1118.171823,-1326.604938,208.433114,42.401671,0.0,66.666667,50849.029233,100.0,99.999998
2920,2025-12-30,88430.132812,88072.660937,90139.228906,87883.118647,88866.892569,-983.773922,-1258.038735,274.264813,47.866453,100.0,66.666667,50864.07161,100.0,99.999999
2921,2025-12-31,87508.828125,87822.535547,89829.456094,87825.53549,88766.295203,-940.759713,-1194.58293,253.823217,44.615736,40.515166,46.838389,50878.265399,100.0,100.0
2922,2026-01-01,87716.195312,87694.824609,89550.51625,87808.713924,88688.510025,-879.796102,-1131.625564,251.829463,45.512679,27.592157,56.035774,50891.446071,100.0,100.0


In [40]:
#prepare price series 
#we need to split the data into training and testing data (in sample and out of sample)
#for robustness our ratio of IS to OOS will be 60/40 or 70/30
#suppress warnings from appearing in the output
warnings.filterwarnings("ignore", message="Degrees of freedom <= 0 for slice", category=RuntimeWarning)
warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide", category=RuntimeWarning)

# Function to extract the 'Close' price series from a DataFrame
# This function handles different DataFrame column structures (MultiIndex vs single-level)
# Expect stock_data and TICKER already exist from previous cells
def select_close_series(df, ticker):
    """
    Extracts the 'Close' price column from a DataFrame, handling both MultiIndex and single-level columns.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        The stock data DataFrame (from yfinance)
    ticker : str
        The ticker symbol (e.g., "BTC-USD", "QQQ")
    
    Returns:
    --------
    pandas.Series
        The Close price series as a float Series
    """
    # Check if DataFrame has MultiIndex columns (happens when downloading multiple tickers)
    if isinstance(df.columns, pd.MultiIndex):
        # Try to select the Close column for the specific ticker
        if ("Close", ticker) in df.columns:
            s = df[("Close", ticker)]
        else:
            # If ticker-specific column not found, search for any column containing 'Close'
            cols = [c for c in df.columns if 'Close' in str(c)]
            if not cols:
                raise KeyError("Close not found")
            s = df[cols[0]]
    else:
        # Single-level columns case - search for any column containing 'Close'
        cols = [c for c in df.columns if 'Close' in str(c)]
        if not cols:
            raise KeyError("Close not found")
        s = df[cols[0]]
    
    # Convert to float and squeeze to ensure it's a 1D Series (remove any extra dimensions)
    return s.astype(float).squeeze()

# Extract the Close price series using our function
# This gives us a clean pandas Series with just the closing prices
close = select_close_series(stock_data, TICKER)

# Rename the series to 'price' for clarity in subsequent analysis
close.name = 'price'

# Simple split: Divide data into training and validation sets
# TRAIN_RATIO determines what percentage of data is used for training
# 0.6 means 60% training, 40% validation
TRAIN_RATIO = 0.6

# Calculate the index where we'll split the data
# int() truncates to ensure we get a valid integer index
split_idx = int(len(close) * TRAIN_RATIO)

# Create training set: all data from the beginning up to split_idx
# .copy() ensures we get an independent copy, not a view
train_close = close.iloc[:split_idx].copy()

# Create validation set: all data from split_idx to the end
# This is our out-of-sample data for testing model performance
val_close = close.iloc[split_idx:].copy()

# Print the date ranges for both datasets to verify the split
# This helps us understand what time periods we're training and validating on
print(f"Data ready: train={train_close.index[0].date()} → {train_close.index[-1].date()} | val={val_close.index[0].date()} → {val_close.index[-1].date()}")

Data ready: train=2018-01-01 → 2022-10-19 | val=2022-10-20 → 2026-01-01


## TRIPLE EMA CROSSOVER GRID SEARCH - TRAINING SET

This section performs a comprehensive grid search optimization for the **Triple EMA Crossover Strategy** using only the **training data**.

The goal is to find the optimal EMA1/EMA2/EMA3 combination that maximizes the Sharpe ratio on OOS data.

**Strategy Logic:** Buy when any EMA crosses above another (EMA1 > EMA2, EMA1 > EMA3, or EMA2 > EMA3). Sell when any EMA crosses below another.

In [41]:
# Define Parameter Ranges for Triple EMA Crossover
# EMA periods for triple crossover strategy
ema1_periods = list(range(4, 40, 1))   # Fast EMA (shortest-term)
ema2_periods = list(range(50, 90, 2))  # Medium EMA (medium-term)
ema3_periods = list(range(120, 250, 3)) # Slow EMA (longest-term)

print("EMA1 Periods (fast - shortest-term):")
for i, period in enumerate(ema1_periods, 1):
    print(f" {i}. {period} periods")

print("EMA2 Periods (medium - medium-term):")
for i, period in enumerate(ema2_periods, 1):
    print(f" {i}. {period} periods")

print("EMA3 Periods (slow - longest-term):")
for i, period in enumerate(ema3_periods, 1):
    print(f" {i}. {period} periods")

# Generate all valid combinations (ema1 < ema2 < ema3)
ema_combinations = []
for ema1 in ema1_periods:
    for ema2 in ema2_periods:
        for ema3 in ema3_periods:
            if ema1 < ema2 < ema3:
                ema_combinations.append((ema1, ema2, ema3))

print(f"Generated {len(ema_combinations)} valid Triple EMA combinations")
print("\n First 10 combinations preview:")
for i, (ema1, ema2, ema3) in enumerate(ema_combinations[:10], 1):
    print(f" {i:2d}. EMA1: {ema1:2d} | EMA2: {ema2:2d} | EMA3: {ema3:2d}")
if len(ema_combinations) > 10:
    print(f" ... and {len(ema_combinations) - 10} more combinations")

print("Ready to test all combinations on training data!")

EMA1 Periods (fast - shortest-term):
 1. 4 periods
 2. 5 periods
 3. 6 periods
 4. 7 periods
 5. 8 periods
 6. 9 periods
 7. 10 periods
 8. 11 periods
 9. 12 periods
 10. 13 periods
 11. 14 periods
 12. 15 periods
 13. 16 periods
 14. 17 periods
 15. 18 periods
 16. 19 periods
 17. 20 periods
 18. 21 periods
 19. 22 periods
 20. 23 periods
 21. 24 periods
 22. 25 periods
 23. 26 periods
 24. 27 periods
 25. 28 periods
 26. 29 periods
 27. 30 periods
 28. 31 periods
 29. 32 periods
 30. 33 periods
 31. 34 periods
 32. 35 periods
 33. 36 periods
 34. 37 periods
 35. 38 periods
 36. 39 periods
EMA2 Periods (medium - medium-term):
 1. 50 periods
 2. 52 periods
 3. 54 periods
 4. 56 periods
 5. 58 periods
 6. 60 periods
 7. 62 periods
 8. 64 periods
 9. 66 periods
 10. 68 periods
 11. 70 periods
 12. 72 periods
 13. 74 periods
 14. 76 periods
 15. 78 periods
 16. 80 periods
 17. 82 periods
 18. 84 periods
 19. 86 periods
 20. 88 periods
EMA3 Periods (slow - longest-term):
 1. 120 periods
 2

In [42]:
# Create Triple EMA Results Collection System

# Create empty list to store all backtest results
grid_search_results = []

print("Triple EMA Results Collection System Initialized")
# Note: 'ema_combinations' variable must exist from the previous step
print(f" - Will test {len(ema_combinations)} Triple EMA combinations")
print(" - Results will be stored in 'grid_search_results' list")

# Define what metrics we will collect (All TradingView-style metrics)
metrics_to_collect = [
    # Strategy Parameters
    "ema1_period",
    "ema2_period",
    "ema3_period",

    # Return Metrics
    "total_return",         # The total percentage gain or loss over the entire backtest period
    "annualized_return",    # The geometric average return calculated on a yearly basis (CAGR)
    "total_profit",         # The absolute monetary value gained (Net Profit)

    # Risk-Adjusted Return Metrics
    "sharpe_ratio",         # Return divided by volatility. Measures how much return you get per unit of risk
    "sortino_ratio",        # Similar to Sharpe, but only penalizes "bad" (downside) volatility
    "calmar_ratio",         # Annualized Return divided by Maximum Drawdown. Measures return relative to the worst run of trades
    "omega_ratio",          # Probability-weighted ratio of gains versus losses (captures the entire distribution)
    "information_ratio",    # Active return (vs benchmark) divided by tracking error (volatility of active return)
    "tail_ratio",           # Ratio of the 95th percentile of gains to the 95th percentile of losses (measures outliers)
    "deflated_sharpe_ratio",# Adjusts the Sharpe Ratio to account for the probability of finding a good result by luck (multiple testing bias)


    # Risk Metrics
    "max_drawdown",         # The largest percentage drop from a peak to a trough (worst-case scenario).
    "volatility",           # Standard deviation of returns; measures how wildly prices swing.
    "ulcer_index",          # Measures the depth and duration of drawdowns (stress indicator).

    # Trade Performance Metrics
    "win_rate",             # Percentage of trades that were profitable.
    "total_trades",         # The total count of trades executed during the backtest.
    "avg_trade_duration",   # The average amount of time a trade is held open.
    "expectancy",           # The average amount of money you can expect to make (or lose) per trade.
    "profit_factor",        # Gross Profit divided by Gross Loss. (>1 is profitable, >1.5 is good).
    "sqn",                  # System Quality Number; measures the relationship between expectancy and the standard deviation of results.

    # Win/Loss Analysis
    "payoff_ratio",         # Average Win Amount divided by Average Loss Amount.
    "largest_win",          # The single biggest profit from one trade.
    "largest_loss",         # The single biggest loss from one trade.
    "avg_win_amount",       # The average profit of all winning trades.
    "avg_loss_amount",      # The average loss of all losing trades.
    "winning_streak",       # The maximum number of consecutive winning trades.
    "losing_streak",        # The maximum number of consecutive losing trades.

    # Additional Ratios
    "recovery_factor",      # Net Profit divided by Max Drawdown (how fast the strategy recovers from losses).
    "gain_to_pain_ratio",   # Sum of all returns divided by the absolute value of all negative returns.
    "serenity_index"        # A risk-adjusted return metric that penalizes volatility and drawdowns heavily.
]

print("Metrics to collect for each Triple EMA combination:")
for i, metric in enumerate(metrics_to_collect, 1):
    print(f" {i}. {metric.replace('_', ' ').title()}")

print("Ready to start the Triple EMA grid search!")




Triple EMA Results Collection System Initialized
 - Will test 31680 Triple EMA combinations
 - Results will be stored in 'grid_search_results' list
Metrics to collect for each Triple EMA combination:
 1. Ema1 Period
 2. Ema2 Period
 3. Ema3 Period
 4. Total Return
 5. Annualized Return
 6. Total Profit
 7. Sharpe Ratio
 8. Sortino Ratio
 9. Calmar Ratio
 10. Omega Ratio
 11. Information Ratio
 12. Tail Ratio
 13. Deflated Sharpe Ratio
 14. Max Drawdown
 15. Volatility
 16. Ulcer Index
 17. Win Rate
 18. Total Trades
 19. Avg Trade Duration
 20. Expectancy
 21. Profit Factor
 22. Sqn
 23. Payoff Ratio
 24. Largest Win
 25. Largest Loss
 26. Avg Win Amount
 27. Avg Loss Amount
 28. Winning Streak
 29. Losing Streak
 30. Recovery Factor
 31. Gain To Pain Ratio
 32. Serenity Index
Ready to start the Triple EMA grid search!


In [43]:
# TRIPLE EMA CROSSOVER GRID SEARCH ON TRAINING DATA

print("INITIATING TRIPLE EMA CROSSOVER GRID SEARCH OPTIMIZATION")
print("=" * 70)
print(f"Testing Strategy: Triple Exponential Moving Average Crossover")
print(f"Training Period: {train_close.index[0].date()} -> {train_close.index[-1].date()}")
print(f"Initial Capital: $100,000")
print(f"Transaction Costs: 0.05% per trade (fees + slippage)")
print(f"Optimization Metric: Sharpe Ratio (risk-adjusted returns)")
print("=" * 70)

# Ensure results container exists
if 'grid_search_results' not in locals():
    grid_search_results = []

total_combinations = len(ema_combinations)
successful_tests = 0
failed_tests = 0

print(f"Starting grid search across {total_combinations} parameter combos... \n")

# Iterate through every combination of EMA periods we generated earlier
# 'enumerate(..., 1)' starts the counter 'i' at 1 instead of 0
for i, (ema1_period, ema2_period, ema3_period) in enumerate(ema_combinations, 1):
    try:
        # --- 1. INDICATORS ---
        # Calculate the 3 EMA lines for the current combination using VectorBT
        # ewm=True ensures it calculates an Exponential Moving Average (not Simple)
        ema1 = vbt.MA.run(train_close, ema1_period, ewm=True)
        ema2 = vbt.MA.run(train_close, ema2_period, ewm=True)
        ema3 = vbt.MA.run(train_close, ema3_period, ewm=True)

        # --- 2. SIGNALS (LOGIC) ---
        # BUY LOGIC (Entry):
        # We buy if ANY of the faster EMAs cross above a slower EMA.
        # The '|' operator means "OR".
        entries_raw = (
            ema1.ma_crossed_above(ema2.ma) |  # Fast crosses above Medium
            ema1.ma_crossed_above(ema3.ma) |  # Fast crosses above Slow
            ema2.ma_crossed_above(ema3.ma)    # Medium crosses above Slow
        )

        # SELL LOGIC (Exit):
        # We sell if ANY of the faster EMAs cross below a slower EMA.
        exits_raw = (
            ema1.ma_crossed_below(ema2.ma) |  # Fast crosses below Medium
            ema1.ma_crossed_below(ema3.ma) |  # Fast crosses below Slow
            ema2.ma_crossed_below(ema3.ma)    # Medium crosses below Slow
        )

        # --- 3. DATA CLEANING ---
        # Clean up the signals to ensure they are simple 1D Boolean Series (True/False)
        # VectorBT sometimes returns complex structures; this forces them into a standard list format
        # compatible with the portfolio simulation engine.
        entries = pd.Series(np.asarray(entries_raw).ravel(), index=train_close.index, dtype=bool)
        exits   = pd.Series(np.asarray(exits_raw).ravel(),   index=train_close.index, dtype=bool)

        # --- 4. BACKTEST SIMULATION ---
        # Run the portfolio simulation using vectorbt's engine
        portfolio = vbt.Portfolio.from_signals(
            
            # The Data
            close=train_close,    #The price array used to execute trades. Price to pay
            
            # The Signals
            entries=entries,      # A list of True/False values. True = "Buy/Open a position right now".
            exits=exits,          # A list of True/False values. True = "Sell/Close the position right now".
            
            # The Money
            init_cash=100_000,    # We start the simulation with $100k virtual cash.
            
            # Realism Parameters (Crucial!)
            # 0.0005 = 0.05%. We pay this fee on EVERY trade (buy and sell). 
            # add this! high-frequency strategies will look fake/profitable when they aren't.
            fees=0.0005,
            
            # 0.0005 = 0.05%. This simulates "bad luck" in execution.
            # accounts for the spread and market movement.
            slippage=0.0005,
            
            # Time Settings
            # "D" tells vectorbt that our data is Daily.
            # This is required to correctly calculate Annualized Return and Sharpe Ratio.
            freq="D"
        )

        # --- 5. CALCULATE PORTFOLIO METRICS ---
        # We wrap values in float() to ensure they are standard Python numbers, not complex arrays.
        
        # Basic Performance Metrics
        total_return = float(portfolio.total_return())
        
        # 'freq="D"' is CRITICAL here. It tells the system to scale the return to a yearly basis (Annualized).
        annualized_return = float(portfolio.annualized_return(freq='D'))
        
        # Max Drawdown: The worst percentage drop from a peak. (e.g., -0.20 means a 20% crash)
        max_drawdown = float(portfolio.max_drawdown())
        
        # Volatility: How 'bumpy' the ride is. Higher = riskier.
        volatility = float(portfolio.annualized_volatility(freq='D'))
        
        # Sharpe Ratio: Reward per unit of risk. > 1.0 is generally good.
        sharpe_ratio = float(portfolio.sharpe_ratio(freq='D'))
        
        # Sortino Ratio: Like Sharpe, but only penalizes 'bad' volatility (downside crashes).
        sortino_ratio = float(portfolio.sortino_ratio(freq='D'))

        # --- ADVANCED METRICS (Safe Mode) ---
        # These complex metrics can sometimes crash (e.g., if there are 0 trades or flat returns).
        # We use try/except blocks to set them to "NaN" (Not a Number) instead of crashing the whole script.

        try:
            # Information Ratio: Returns relative to a benchmark. 
            information_ratio = float(portfolio.information_ratio(freq='D'))
        except Exception:
            information_ratio = np.nan

        try:
            # Tail Ratio: Measures extreme wins vs extreme losses (outlier analysis).
            tail_ratio = float(portfolio.tail_ratio(freq='D'))
        except Exception:
            tail_ratio = np.nan

        try:
            # Deflated Sharpe: Adjusts Sharpe for "p-hacking" (finding a strategy by luck/over-testing).
            deflated_sharpe_ratio = float(portfolio.deflated_sharpe_ratio(freq='D'))
        except Exception:
            deflated_sharpe_ratio = np.nan

        # --- ULCER INDEX CALCULATION ---
        # The Ulcer Index measures the "stress" of holding an asset.
        # It penalizes both the DEPTH of a crash and how LONG it takes to recover.
        # Since VectorBT might not have a direct function for this, we calculate it manually:
        
        returns = portfolio.returns()
        cum = (1 + returns).cumprod()      # Calculate cumulative growth curve
        peak = cum.cummax()                # Track the "all-time high" at every point
        dd = (cum - peak) / peak           # Calculate drawdown percentage
        
        # Formula: Square Root of the Mean of Squared Drawdowns
        ulcer_index = float(np.sqrt((dd.pow(2)).mean())) if len(dd) else np.nan

        # --- 6. TRADE METRICS ---
        # Get the list of all individual trades executed
        trades = portfolio.trades
        
        # Count total number of completed trades
        total_trades = int(len(trades))
        
        # Initialize these variables with placeholders (NaN or 0.0).
        # We will calculate the actual values in the next block.
        win_rate_pct = np.nan
        profit_factor = np.nan
        expectancy = 0.0
        avg_win_amount = 0.0
        avg_loss_amount = 0.0
        # --- 7. WIN/LOSS ANALYSIS ---
        # We only run this logic if the strategy actually made trades. 
        # If total_trades is 0, we skip this to avoid "Division by Zero" errors.
        if total_trades > 0:
    
         # Extract the returns (profit/loss %) for every individual trade.
            # We use 'hasattr' to check if the data is a Pandas Series or a Numpy array 
            # and handle it correctly to prevent format errors.
            tr = trades.returns.values if hasattr(trades.returns, 'values') else np.array(trades.returns)

            # Double check that the returns array isn't empty
            if tr.size > 0:
                # Separate the trades into two piles: Winners (Positive) and Losers (Negative)
                pos = tr[tr > 0]
                neg = tr[tr < 0]

                # Win Rate: The percentage of trades that were profitable.
             # Formula: (Count of Winners / Total Count of Trades) * 100
                win_rate_pct = (len(pos) / len(tr)) * 100.0

                # Calculate Total Gross Profit (Sum of all wins)
                gains = pos.sum() if len(pos) else 0.0

                # Calculate Total Gross Loss (Sum of all losses, absolute value)
                losses = abs(neg.sum()) if len(neg) else 0.0
        
                # Profit Factor: The "Gold Standard" metric for strategy robustness.
                # Formula: Gross Profit / Gross Loss. 
                # > 1.5 is usually good. > 3.0 is excellent.
                # If we have 0 losses (perfect strategy), we set it to Infinity (np.inf).
                profit_factor = (gains / losses) if losses > 0 else np.inf

                # Expectancy: The average percent return you can expect per trade.
                expectancy = float(tr.mean())

                # Average Win vs Average Loss
                # This tells us the "Payoff Ratio" later. Ideally, Avg Win > Avg Loss.
                avg_win_amount = float(pos.mean()) if len(pos) else 0.0
                avg_loss_amount = float(abs(neg.mean())) if len(neg) else 0.0

        # --- 7. FREQUENCY & STABILITY ANALYSIS ---
        
        # Calculate the total duration of the backtest in years.
        # We subtract the last date from the first date to get days, then divide by 365.25.
        # "1e-9" is a safety hack: if the data is empty, we divide by this tiny number 
        # instead of 0 to avoid a "ZeroDivisionError" crash.
        years = max((train_close.index[-1] - train_close.index[0]).days / 365.25, 1e-9)
        
        # Trades Per Year: This tells us how active the strategy is.
        trades_per_year = total_trades / years

        # --- 8. QUALITY FILTER ("The Soft Skip") ---
        # This is a critical step for optimization speed and quality.
        # If a strategy trades less than 2 times a year, it is not statistically significant.
        # We "continue" (skip) immediately, ignoring this combination and moving to the next one.
        if trades_per_year < 2:
            continue

        # --- 9. ADVANCED RATIOS ---
        
        # Payoff Ratio: (Avg Win / Avg Loss).
        # Measures if your wins are big enough to cover your losses.
        # We check "if avg_loss_amount not in (0.0, np.nan)" to avoid dividing by zero.
        payoff_ratio = (avg_win_amount / avg_loss_amount) if avg_loss_amount not in (0.0, np.nan) else np.inf

        # Streaks: Consecutive wins or losses.
        # This helps identify "psychological pain" (e.g., losing 10 times in a row).
        try:
            # vectorbt has built-in functions for this, but they crash if there are 0 trades.
            winning_streak = int(trades.winning_streak())
            losing_streak = int(trades.losing_streak())
        except Exception:
            # If the calculation fails, just set them to "Not a Number" (NaN).
            winning_streak = np.nan
            losing_streak = np.nan
        
        # --- 10. STORE RESULTS ---
        # We append all the calculated metrics for this specific combination into our main list.
        # This builds a "row" of data that we will later turn into DF
        # We use a Dictionary format: {"Column Name": Value}
        grid_search_results.append({
            "ema1_period": ema1_period,
            "ema2_period": ema2_period,
            "ema3_period": ema3_period,
            "total_return": total_return,
            "annualized_return": annualized_return,
            "max_drawdown": max_drawdown,
            "volatility": volatility,
            "sharpe_ratio": sharpe_ratio,
            "sortino_ratio": sortino_ratio,
            "information_ratio": information_ratio,
            "tail_ratio": tail_ratio,
            "deflated_sharpe_ratio": deflated_sharpe_ratio,
            "ulcer_index": ulcer_index,
            "total_trades": total_trades,
            "win_rate": win_rate_pct,
            "profit_factor": profit_factor,
            "expectancy": expectancy,
            "avg_win_amount": avg_win_amount,
            "avg_loss_amount": avg_loss_amount,
            "payoff_ratio": payoff_ratio,
            "winning_streak": winning_streak,
            "losing_streak": losing_streak,
            "trades_per_year": trades_per_year
        })

        # Increment our success counter so we can track valid tests
        successful_tests += 1

        #Get progress (to avoid staring at timer worried)
        if i % 10 == 0 or i == total_combinations:
            progress_pct = (i / total_combinations) * 100
            print(f"Progress: {i}/{total_combinations} ({progress_pct:1f}%)")
            print(f"Successful: {successful_tests} | Failed: {failed_tests}")
            print(f"    Latest: EMA({ema1_period},{ema2_period},{ema3_period}) -> Sharpe: {sharpe_ratio:.1f}, Return: {total_return:.1%}")
            print(f"    Trades: {total_trades}, Win Rate: {win_rate_pct:.1f}%, Max DD: {max_drawdown:.1%}\n")

    # --- 13. ERROR HANDLING ---
    # This block runs ONLY if something crashes inside the "try" block above.
    except Exception as e:
        failed_tests += 1
        
        # We still want to see progress updates even if things are failing.
        # This checks if we are at a "milestone" (every 10th item) or the very last item.
        if i % 10 == 0 or i == total_combinations:
            print(f"Error with EMA({ema1_period},{ema2_period},{ema3_period}): {str(e)[:80]}...")

# --- 14. FINAL SUMMARY ---
# This code runs once the loop has finished checking ALL combinations.
print("\nGRID SEARCH COMPLETED!")

print(f"Total combinations attempted: {total_combinations}")
print(f"Successfully completed: {successful_tests}")
print(f"Failed: {failed_tests}")

# Calculate success rate safely (avoid division by zero if total_combinations is 0)
if total_combinations > 0:
    success_rate = (successful_tests / total_combinations) * 100
    print(f"Success rate: {success_rate:.1f}%")

print("\nResults stored in 'grid_search_results'")


#we get 47.3% success the rest being under desired minimum trades of 2/year



INITIATING TRIPLE EMA CROSSOVER GRID SEARCH OPTIMIZATION
Testing Strategy: Triple Exponential Moving Average Crossover
Training Period: 2018-01-01 -> 2022-10-19
Initial Capital: $100,000
Transaction Costs: 0.05% per trade (fees + slippage)
Optimization Metric: Sharpe Ratio (risk-adjusted returns)
Starting grid search across 31680 parameter combos... 

Progress: 10/31680 (0.031566%)
Successful: 10 | Failed: 0
    Latest: EMA(4,50,147) -> Sharpe: 1.0, Return: 437.6%
    Trades: 30, Win Rate: 33.3%, Max DD: -49.6%

Progress: 20/31680 (0.063131%)
Successful: 20 | Failed: 0
    Latest: EMA(4,50,177) -> Sharpe: 1.1, Return: 553.1%
    Trades: 27, Win Rate: 29.6%, Max DD: -49.6%

Progress: 30/31680 (0.094697%)
Successful: 30 | Failed: 0
    Latest: EMA(4,50,207) -> Sharpe: 1.0, Return: 437.7%
    Trades: 28, Win Rate: 28.6%, Max DD: -52.9%

Progress: 40/31680 (0.126263%)
Successful: 40 | Failed: 0
    Latest: EMA(4,50,237) -> Sharpe: 1.0, Return: 413.9%
    Trades: 29, Win Rate: 27.6%, Max DD

In [44]:
# --- ANALYZE TRIPLE EMA GRID SEARCH RESULTS ---

# Convert the list of results (dictionaries) into a Pandas DataFrame.
results_df = pd.DataFrame(grid_search_results)
print(results_df.head())

print("Grid Search Results Analysis")
print("=" * 50)
print(f"Total combinations tested: {len(results_df)}")
print(f"Results shape: {results_df.shape}")

# --- COMPREHENSIVE STATISTICS ---
print("\nComprehensive Performance Statistics:")
print("-" * 50)

#Return Metrics
# We use .max() to find the single highest number in the column.
# We use .mean() to see how the strategy performs "on average" across all settings.
print("Return Metrics:")
print(f"   Best Total Return: {results_df['total_return'].max():.2%}")
print(f"   Average Total Return: {results_df['total_return'].mean():.2%}")
print(f"   Best Annualized Return: {results_df['annualized_return'].max():.2%}")

#Risk-Adjusted Metrics
print("\nRisk-Adjusted Metrics:")
print(f"   Best Sharpe Ratio: {results_df['sharpe_ratio'].max():.3f}")
print(f"   Best Sortino Ratio: {results_df['sortino_ratio'].max():.3f}")

# --- RISK METRICS ---
print("Risk Metrics:")

# We look at the 'mean' to see if the strategy family is generally risky, 
# and 'min'/'max' to see the extreme outliers.
print(f"   Average Max Drawdown: {results_df['max_drawdown'].mean():.2%}")

# Note: Since drawdowns are usually negative numbers (e.g., -0.20), .min() actually finds 
# the DEEPEST (worst) crash. .max() finds the smallest (best) drawdown.
# This line shows the worst-case scenario across all combinations.
print(f"   Best Max Drawdown: {results_df['max_drawdown'].min():.2%}")

print(f"   Average Volatility: {results_df['volatility'].mean():.2%}")

# --- TRADE PERFORMANCE ---
print("Trade Performance:")

# Win Rate is stored as a float 0-100 (e.g., 55.5), so we use ':.1f}%' formatting.
print(f"   Best Win Rate: {results_df['win_rate'].max():.1f}%") 
print(f"   Average Win Rate: {results_df['win_rate'].mean():.1f}%")

# Profit Factor: Gross Profit / Gross Loss. Higher is better.
print(f"   Best Profit Factor: {results_df['profit_factor'].max():.2f}")

# This gives us a range: Did strategies trade 5 times or 500 times?
print(f"   Total Trades Range: {results_df['total_trades'].min():.0f} - {results_df['total_trades'].max():.0f}")

# --- ADDITIONAL METRICS ---
print("Additional Metrics:")

# Expectancy: The average return per trade.
print(f"   Best Expectancy: {results_df['expectancy'].max():.4f}")

# How active is the strategy on average?
print(f"   Average Trades per Year: {results_df['trades_per_year'].mean():.1f}")

# --- FIND THE WINNER ---
# This is the most critical step.
# .idxmax() scans the 'sharpe_ratio' column and returns the INDEX (Row ID) of the highest value.
# .loc[] then grabs that entire row of data so we can see the parameters.
best_result = results_df.loc[results_df['sharpe_ratio'].idxmax()]

print("BEST STRATEGY (by Sharpe Ratio)")
print("-" * 50)
print(f"EMA1 Period (Fast): {int(best_result['ema1_period'])}")
print(f"EMA2 Period (Medium): {int(best_result['ema2_period'])}")
print(f"EMA3 Period (Slow): {int(best_result['ema3_period'])}")
print(f"Total Return: {best_result['total_return']:.2%}")
print(f"Sharpe Ratio: {best_result['sharpe_ratio']:.3f}")
print(f"Max Drawdown: {best_result['max_drawdown']:.2%}")
print(f"Trades per year: {best_result['trades_per_year']}")

# We use .1f here because Win Rate is stored as 0-100 (e.g., 55.5), not 0.55
print(f"Win Rate: {best_result['win_rate']:.1f}%") 
print(f"Total Trades: {int(best_result['total_trades'])}")

print("\nTop 5 Best Performers:")
# Find the 5 rows with the highest Sharpe Ratio
top_5 = results_df.nlargest(5, "sharpe_ratio")

# Iterate through the top 5 to print a nice leaderboard
# .iterrows() lets us loop through the DataFrame row by row
for i, (_, row) in enumerate(top_5.iterrows(), 1):
    print(f"{i}. EMA({int(row['ema1_period'])},{int(row['ema2_period'])},{int(row['ema3_period'])}) -> Sharpe: {row['sharpe_ratio']:.3f}, Return: {row['total_return']:.1%}")

print("\nAnalysis complete! Best strategy identified.")

   ema1_period  ema2_period  ema3_period  total_return  annualized_return  \
0            4           50          120      4.144487           0.406409   
1            4           50          123      4.144487           0.406409   
2            4           50          126      4.183755           0.408638   
3            4           50          129      4.154241           0.406964   
4            4           50          132      4.305956           0.415488   

   max_drawdown  volatility  sharpe_ratio  sortino_ratio  information_ratio  \
0     -0.510097    0.474649      0.954667       1.458823           0.009553   
1     -0.510097    0.474649      0.954667       1.458823           0.009553   
2     -0.510097    0.474638      0.958019       1.463961           0.009698   
3     -0.510097    0.474646      0.955503       1.460105           0.009589   
4     -0.510097    0.474946      0.967926       1.479903           0.010162   

   ...  total_trades   win_rate  profit_factor  expectancy  av

In [None]:
#COMPARE WITH BENCHMARK: BEST STRATEGY vs. MARKET (Buy and hold) using a T-test

if results_df.empty:
    print("No results to compare to benchmark")
else:
    #retrive the best strategy (highest sharpe here)
    best = results_df.loc[results_df["sharpe_ratio"].idxmax()]
    ema1, ema2, ema3 = int(best["ema1_period"]), int(best["ema2_period"]), int(best["ema3_period"])

print(f"BENCHMARK ANALYSIS: Best EMA({ema1},{ema2},{ema3})")

#Recreate the Best strategy portfolio in order to get the return to then conduct a t-test
ema1_na
    

SyntaxError: f-string: missing conversion character (1080554466.py, line 10)