In [4]:
import sys
import yfinance as yf
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
from sklearn.model_selection import ParameterSampler
from joblib import Parallel, delayed

# ——— Core indicator & backtest functions ———

def download_prices(ticker, start="2015-01-01"):
    df = yf.download(ticker, start=start, auto_adjust=True, progress=False)[['Close']]
    df.dropna(inplace=True)
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    return df

def compute_rsi(close, window):
    delta    = close.diff()
    gain     = delta.clip(lower=0)
    loss     = -delta.clip(upper=0)
    avg_gain = gain.ewm(alpha=1/window, min_periods=window).mean()
    avg_loss = loss.ewm(alpha=1/window, min_periods=window).mean()
    rs       = avg_gain / avg_loss
    return 100 - 100/(1 + rs)

def compute_macd(close, fast, slow, signal_smooth):
    ema_f = close.ewm(span=fast, adjust=False).mean()
    ema_s = close.ewm(span=slow, adjust=False).mean()
    macd_line = ema_f - ema_s
    sig_line  = macd_line.ewm(span=signal_smooth, adjust=False).mean()
    return macd_line, sig_line

def compute_vol(close, vol_window):
    return close.pct_change().rolling(vol_window).std()

def compute_bb(close, window, num_std):
    ma = close.rolling(window).mean()
    sd = close.rolling(window).std()
    return (close - ma) / sd

def rolling_zscore(series, thr_window):
    m = series.rolling(thr_window).mean()
    s = series.rolling(thr_window).std()
    return (series - m) / s

def generate_signal(z, long_z, short_z):
    sig, state = pd.Series(0, index=z.index), 0
    prev = z.shift(1)
    for t in z.index:
        c, p = z.at[t], prev.at[t]
        if pd.notna(c) and pd.notna(p):
            if   state == 0 and c <  long_z  and p >= long_z:   state =  1
            elif state == 0 and c >  short_z and p <= short_z:  state = -1
            elif state == 1 and c >= long_z  and p <  long_z:    state =  0
            elif state == -1 and c <= short_z and p >  short_z: state =  0
        sig.at[t] = state
    return sig.astype(int)

def backtest(df, sig, fee_per_trade=0.001):
    # ⚡ **NEW**: auto-align signal to the backtest df
    sig   = sig.reindex(df.index).fillna(0).astype(int)
    pos   = sig.shift(1).fillna(0).astype(int)
    ret   = df['Close'].pct_change().fillna(0)
    strat = pos * ret

    entries = (pos != pos.shift(1)) & (pos != 0)
    strat.loc[entries] -= fee_per_trade

    equity   = (1 + strat).cumprod()
    days     = len(df)
    total    = equity.iloc[-1] - 1
    cagr     = equity.iloc[-1]**(252/days) - 1
    ann_vol  = strat.std() * np.sqrt(252)
    sharpe   = (strat.mean()/strat.std()*np.sqrt(252)) if strat.std() != 0 else np.nan
    max_dd   = (equity / equity.cummax() - 1).min()
    nz       = strat[strat != 0]
    hit_rate = (nz > 0).sum() / len(nz) if len(nz) else np.nan

    # average holding days
    changes = pos != pos.shift(1)
    dates   = df.index[changes]
    vals    = pos[changes]
    hold, prev, ent = [], 0, None
    for d, v in zip(dates, vals):
        if v != 0 and prev == 0:
            ent = d
        if v == 0 and prev != 0 and ent is not None:
            hold.append((d - ent).days)
            ent = None
        prev = v
    if prev != 0 and ent is not None:
        hold.append((df.index[-1] - ent).days)
    avg_hold = np.mean(hold) if hold else np.nan

    return {
        'Total Return':  total,
        'CAGR':          cagr,
        'Ann Vol':       ann_vol,
        'Sharpe':        sharpe,
        'Max Drawdown':  max_dd,
        'Trades':        int(entries.sum()),
        'Hit Rate':      hit_rate,
        'Avg Hold Days': avg_hold,
        'Data Points':   days
    }


# ——— Hyperparameter grids ———

param_grids = {
    'RSI': {
        'rsi_window': [10,14,20],
        'long_z':     [-2.0,-1.5,-1.0],
        'short_z':    [ 1.0, 1.5, 2.0]
    },
    'MACD': {
        'fast':          [ 8, 12, 16],
        'slow':          [20, 26, 30],
        'signal_smooth': [ 6,  9, 12],
        'long_z':        [-2.0,-1.5,-1.0],
        'short_z':       [ 1.0, 1.5, 2.0]
    },
    'Volatility': {
        'vol_window': [10,20,30],
        'thr_window': [126,252],
        'long_z':     [-2.0,-1.5,-1.0],
        'short_z':    [ 1.0, 1.5, 2.0]
    },
    'BollingerBands': {
        'window':  [10,20,30],
        'num_std': [1.5,2.0,2.5],
        'long_z':  [-2.0,-1.5,-1.0],
        'short_z': [ 1.0, 1.5, 2.0]
    }
}


def process_ticker(ticker):
    df    = download_prices(ticker)
    dates = df.index

    # Precompute caches once
    rsi_cache = {w: compute_rsi(df['Close'], w) for w in param_grids['RSI']['rsi_window']}
    vol_cache = {w: compute_vol(df['Close'], w)    for w in param_grids['Volatility']['vol_window']}
    bb_cache  = {w: compute_bb(df['Close'], w, None) for w in param_grids['BollingerBands']['window']}
    spans     = set(param_grids['MACD']['fast'] + param_grids['MACD']['slow'])
    ema_cache = {s: df['Close'].ewm(span=s, adjust=False).mean() for s in spans}

    results = []
    train_start = dates.min()
    train_end   = train_start + relativedelta(years=5) - pd.Timedelta(days=1)

    while True:
        test_start = train_end + pd.Timedelta(days=1)
        test_end   = test_start + relativedelta(years=1) - pd.Timedelta(days=1)
        if test_start > dates.max():
            break

        train_df = df.loc[train_start:train_end]
        test_df  = df.loc[test_start:test_end]

        for strat, grid in param_grids.items():
            best_sh, best_p = -np.inf, None

            # ——— Randomized tuning on the TRAIN window ———
            for p in ParameterSampler(grid, n_iter=30, random_state=0):
                if strat == 'RSI':
                    z = rolling_zscore(rsi_cache[p['rsi_window']], thr_window=252)
                elif strat == 'MACD':
                    m = ema_cache[p['fast']] - ema_cache[p['slow']]
                    s = m.ewm(span=p['signal_smooth'], adjust=False).mean()
                    z = rolling_zscore(m - s, thr_window=252)
                elif strat == 'Volatility':
                    z = rolling_zscore(vol_cache[p['vol_window']], thr_window=p['thr_window'])
                else:  # BollingerBands
                    z = rolling_zscore(bb_cache[p['window']], thr_window=252)

                sig   = generate_signal(z, p['long_z'], p['short_z'])
                stats = backtest(train_df, sig)
                if stats['Sharpe'] > best_sh:
                    best_sh, best_p = stats['Sharpe'], p.copy()

            # ——— Evaluate best params on the TEST window ———
            if strat == 'RSI':
                z_full = rolling_zscore(rsi_cache[best_p['rsi_window']], thr_window=252)
            elif strat == 'MACD':
                m_full = ema_cache[best_p['fast']] - ema_cache[best_p['slow']]
                s_full = m_full.ewm(span=best_p['signal_smooth'], adjust=False).mean()
                z_full = rolling_zscore(m_full - s_full, thr_window=252)
            elif strat == 'Volatility':
                z_full = rolling_zscore(vol_cache[best_p['vol_window']], thr_window=best_p['thr_window'])
            else:
                z_full = rolling_zscore(bb_cache[best_p['window']], thr_window=252)

            z_t   = z_full.loc[test_start:test_end]
            sig_t = generate_signal(z_t, best_p['long_z'], best_p['short_z'])
            mets  = backtest(test_df, sig_t)

            mets.update({
                'Ticker':      ticker,
                'Strategy':    strat,
                'train_start': train_start.date(),
                'train_end':   train_end.date(),
                'test_start':  test_start.date(),
                'test_end':    test_end.date(),
                **{f"best_{k}": v for k, v in best_p.items()}
            })
            results.append(mets)

        train_end = test_end

    return results


if __name__ == "__main__":
    tickers = ['SOXX','XLF','XLV','XLRE','GDX','GUSH','FDN']

    # ——— Parallel run over tickers ———
    nested = Parallel(n_jobs=-1)(
        delayed(process_ticker)(t) for t in tickers
    )
    all_results = [m for sub in nested for m in sub]

    # ——— Final DataFrame ———
    df_results = (
        pd.DataFrame(all_results)
          .set_index(['Ticker','Strategy','train_start','train_end','test_start','test_end'])
    )
    print(df_results)


                                                                    Total Return  \
Ticker Strategy       train_start train_end  test_start test_end                   
SOXX   RSI            2015-01-02  2020-01-01 2020-01-02 2021-01-01      0.303405   
       MACD           2015-01-02  2020-01-01 2020-01-02 2021-01-01      0.149055   
       Volatility     2015-01-02  2020-01-01 2020-01-02 2021-01-01     -0.093599   
       BollingerBands 2015-01-02  2020-01-01 2020-01-02 2021-01-01      0.163581   
       RSI            2015-01-02  2021-01-01 2021-01-02 2022-01-01      0.084662   
...                                                                          ...   
FDN    BollingerBands 2015-01-02  2024-01-01 2024-01-02 2025-01-01      0.006699   
       RSI            2015-01-02  2025-01-01 2025-01-02 2026-01-01      0.010137   
       MACD           2015-01-02  2025-01-01 2025-01-02 2026-01-01     -0.048528   
       Volatility     2015-01-02  2025-01-01 2025-01-02 2026-01-01     -0.13

In [5]:
# ——— Final aggregation across all folds ———
# compute mean (and other stats) of each metric per Ticker/Strategy
final_metrics = (
    df_results
      .groupby(level=['Ticker','Strategy'])
      .agg({
          'Total Return' : 'mean',
          'CAGR'         : 'mean',
          'Ann Vol'      : 'mean',
          'Sharpe'       : 'mean',
          'Max Drawdown' : 'mean',
          'Trades'       : 'sum',   # sum trades across all years
          'Hit Rate'     : 'mean',
          'Avg Hold Days': 'mean'
      })
      .round(4)
)

print("\n=== Final Aggregated Metrics ===")
print(final_metrics)



=== Final Aggregated Metrics ===
                       Total Return    CAGR  Ann Vol  Sharpe  Max Drawdown  \
Ticker Strategy                                                              
FDN    BollingerBands        0.0376  0.0379   0.1052  0.3949       -0.0513   
       MACD                 -0.0255 -0.0380   0.1743 -0.0184       -0.1340   
       RSI                   0.0239  0.0267   0.1355  0.2821       -0.0902   
       Volatility           -0.0249 -0.0562   0.2191 -0.2767       -0.1880   
GDX    BollingerBands        0.0462  0.0451   0.1237  0.2862       -0.0844   
       MACD                  0.0986  0.1997   0.2054  0.4835       -0.1666   
       RSI                   0.0763  0.0755   0.1581  0.4196       -0.1149   
       Volatility           -0.1196 -0.1333   0.2180 -0.5458       -0.2200   
GUSH   BollingerBands       -0.1568 -0.1591   0.2528 -0.5397       -0.2819   
       MACD                  0.0227  0.0237   0.2194  0.6595       -0.1672   
       RSI                  -0