In [12]:
import sys
import yfinance as yf
import pandas as pd
import numpy as np
from itertools import product
from dateutil.relativedelta import relativedelta

# ——— 1) Core functions ———

def download_prices(ticker, start="2015-01-01"):
    df = yf.download(ticker, start=start, auto_adjust=True, progress=False)[['Close']]
    df.dropna(inplace=True)
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    return df

def compute_rsi(close, window):
    delta    = close.diff()
    gain     = delta.clip(lower=0)
    loss     = -delta.clip(upper=0)
    avg_gain = gain.ewm(alpha=1/window, min_periods=window).mean()
    avg_loss = loss.ewm(alpha=1/window, min_periods=window).mean()
    return 100 - 100/(1 + avg_gain/avg_loss)

def compute_macd(close, fast, slow, signal_smooth):
    ema_f     = close.ewm(span=fast, adjust=False).mean()
    ema_s     = close.ewm(span=slow, adjust=False).mean()
    macd_line = ema_f - ema_s
    sig_line  = macd_line.ewm(span=signal_smooth, adjust=False).mean()
    return macd_line, sig_line

def compute_vol(close, vol_window):
    return close.pct_change().rolling(vol_window).std()

def compute_bb(close, window, num_std):
    ma = close.rolling(window).mean()
    sd = close.rolling(window).std()
    return (close - ma) / sd

def rolling_zscore(series, thr_window):
    m = series.rolling(thr_window).mean()
    s = series.rolling(thr_window).std()
    return (series - m) / s

def generate_signal(z, long_z, short_z):
    sig, state = pd.Series(0, index=z.index), 0
    prev = z.shift(1)
    for t in z.index:
        c, p = z.at[t], prev.at[t]
        if pd.notna(c) and pd.notna(p):
            if   state == 0 and c <  long_z  and p >= long_z:   state =  1
            elif state == 0 and c >  short_z and p <= short_z:  state = -1
            elif state == 1 and c >= long_z  and p <  long_z:    state =  0
            elif state == -1 and c <= short_z and p >  short_z: state =  0
        sig.at[t] = state
    return sig.astype(int)

def backtest(df, sig, fee_per_trade=0.001):
    pos    = sig.shift(1).fillna(0).astype(int)
    ret    = df['Close'].pct_change().fillna(0)
    strat  = pos * ret
    entries = (pos != pos.shift(1)) & (pos != 0)
    strat.loc[entries] -= fee_per_trade

    equity   = (1 + strat).cumprod()
    days     = len(df)
    total    = equity.iloc[-1] - 1
    cagr     = equity.iloc[-1]**(252/days) - 1
    ann_vol  = strat.std() * np.sqrt(252)
    sharpe   = (strat.mean()/strat.std()*np.sqrt(252)) if strat.std() != 0 else np.nan
    max_dd   = (equity / equity.cummax() - 1).min()
    nz       = strat[strat != 0]
    hit_rate = (nz > 0).sum() / len(nz) if len(nz) else np.nan

    # compute avg holding days
    changes = pos != pos.shift(1)
    dates   = df.index[changes]
    vals    = pos[changes]
    hold, prev, ent = [], 0, None
    for d, v in zip(dates, vals):
        if v != 0 and prev == 0:
            ent = d
        if v == 0 and prev != 0 and ent is not None:
            hold.append((d - ent).days)
            ent = None
        prev = v
    if prev != 0 and ent is not None:
        hold.append((df.index[-1] - ent).days)
    avg_hold = np.mean(hold) if hold else np.nan

    return {
        'Total Return':  total,
        'CAGR':          cagr,
        'Ann Vol':       ann_vol,
        'Sharpe':        sharpe,
        'Max Drawdown':  max_dd,
        'Trades':        int(entries.sum()),
        'Hit Rate':      hit_rate,
        'Avg Hold Days': avg_hold,
        'Data Points':   days
    }

# ——— 2) Hyperparameter grids ———

param_grids = {
    'RSI': {
        'rsi_window': [10,14,20],
        'long_z':     [-2.0,-1.5,-1.0],
        'short_z':    [ 1.0, 1.5, 2.0]
    },
    'MACD': {
        'fast':          [ 8, 12, 16],
        'slow':          [20, 26, 30],
        'signal_smooth': [ 6,  9, 12],
        'long_z':        [-2.0,-1.5,-1.0],
        'short_z':       [ 1.0, 1.5, 2.0]
    },
    'Volatility': {
        'vol_window': [10,20,30],
        'thr_window': [126,252],
        'long_z':     [-2.0,-1.5,-1.0],
        'short_z':    [ 1.0, 1.5, 2.0]
    },
    'BollingerBands': {
        'window':  [10,20,30],
        'num_std': [1.5,2.0,2.5],
        'long_z':  [-2.0,-1.5,-1.0],
        'short_z': [ 1.0, 1.5, 2.0]
    }
}

# ——— 3) Walk-forward with fix & 1-fold break ———

tickers     = ['SOXX','XLF','XLV','XLRE','GDX','GUSH','FDN']
all_results = []

for ticker in tickers:
    df = download_prices(ticker, start="2015-01-01")

    # initial 5-year training window
    train_start = df.index.min()
    train_end   = train_start + relativedelta(years=5) - pd.Timedelta(days=1)

    while True:
        # define the next 1-year test period
        test_start = train_end + pd.Timedelta(days=1)
        test_end   = test_start + relativedelta(years=1) - pd.Timedelta(days=1)
        if test_start > df.index.max():
            break

        train_df = df.loc[train_start:train_end]
        test_df  = df.loc[test_start:test_end]

        for strat, grid in param_grids.items():
            # — tune hyper-params on train_df —
            best_sh, best_p = -np.inf, None
            for vals in product(*(grid[k] for k in grid)):
                p = dict(zip(grid.keys(), vals))

                # compute train-z
                if strat == 'RSI':
                    z = rolling_zscore(compute_rsi(train_df['Close'], p['rsi_window']), thr_window=252)
                elif strat == 'MACD':
                    m, sgn = compute_macd(train_df['Close'], p['fast'], p['slow'], p['signal_smooth'])
                    z      = rolling_zscore(m - sgn, thr_window=252)
                elif strat == 'Volatility':
                    z = rolling_zscore(compute_vol(train_df['Close'], p['vol_window']), thr_window=p['thr_window'])
                else:
                    dist = compute_bb(train_df['Close'], p['window'], p['num_std'])
                    z    = rolling_zscore(dist, thr_window=252)

                sig   = generate_signal(z, p['long_z'], p['short_z'])
                stats = backtest(train_df, sig)
                if stats['Sharpe'] > best_sh:
                    best_sh, best_p = stats['Sharpe'], p.copy()

            # — compute z over full history through test_end, then slice test:
            full_hist = df.loc[:test_end]

            if strat == 'RSI':
                full_hist['RSI'] = compute_rsi(full_hist['Close'], best_p['rsi_window'])
                z_full = rolling_zscore(full_hist['RSI'], thr_window=252)
            elif strat == 'MACD':
                m_f, s_f = compute_macd(full_hist['Close'],
                                        best_p['fast'], best_p['slow'], best_p['signal_smooth'])
                z_full   = rolling_zscore(m_f - s_f, thr_window=252)
            elif strat == 'Volatility':
                full_hist['Vol'] = compute_vol(full_hist['Close'], best_p['vol_window'])
                z_full          = rolling_zscore(full_hist['Vol'], thr_window=best_p['thr_window'])
            else:
                dist_full = compute_bb(full_hist['Close'],
                                       window=best_p['window'],
                                       num_std=best_p['num_std'])
                z_full    = rolling_zscore(dist_full, thr_window=252)

            # slice the test-period, generate signals & backtest
            z_t   = z_full.loc[test_start:test_end]
            sig_t = generate_signal(z_t, best_p['long_z'], best_p['short_z'])
            mets  = backtest(test_df, sig_t)

            # append and print **only the first fold**, then exit
            mets.update({
                'Ticker':      ticker,
                'Strategy':    strat,
                'train_start': train_start.date(),
                'train_end':   train_end.date(),
                'test_start':  test_start.date(),
                'test_end':    test_end.date(),
                **{f"best_{k}": v for k, v in best_p.items()}
            })
            all_results.append(mets)

        # expand train window to include the test year
        train_end = test_end


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  full_hist['RSI'] = compute_rsi(full_hist['Close'], best_p['rsi_window'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  full_hist['Vol'] = compute_vol(full_hist['Close'], best_p['vol_window'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  full_hist['RSI'] = compute_rsi(full_hist['Close'], best_p['

In [13]:
all_results

[{'Total Return': np.float64(0.3034028192394971),
  'CAGR': np.float64(0.30203842085506905),
  'Ann Vol': np.float64(0.16373234669081616),
  'Sharpe': np.float64(1.6905175872209937),
  'Max Drawdown': -0.046160803816129414,
  'Trades': 6,
  'Hit Rate': np.float64(0.7),
  'Avg Hold Days': np.float64(2.3333333333333335),
  'Data Points': 253,
  'Ticker': 'SOXX',
  'Strategy': 'RSI',
  'train_start': datetime.date(2015, 1, 2),
  'train_end': datetime.date(2020, 1, 1),
  'test_start': datetime.date(2020, 1, 2),
  'test_end': datetime.date(2021, 1, 1),
  'best_rsi_window': 14,
  'best_long_z': -2.0,
  'best_short_z': 2.0},
 {'Total Return': np.float64(-0.04182854291560756),
  'CAGR': np.float64(-0.04166670604696465),
  'Ann Vol': np.float64(0.34451934158645386),
  'Sharpe': np.float64(0.051519847099190275),
  'Max Drawdown': -0.27413921377123907,
  'Trades': 7,
  'Hit Rate': np.float64(0.6046511627906976),
  'Avg Hold Days': np.float64(8.285714285714286),
  'Data Points': 253,
  'Ticker': '

In [14]:
df_results = pd.DataFrame(all_results)

In [15]:
df_results.head()

Unnamed: 0,Total Return,CAGR,Ann Vol,Sharpe,Max Drawdown,Trades,Hit Rate,Avg Hold Days,Data Points,Ticker,...,best_rsi_window,best_long_z,best_short_z,best_fast,best_slow,best_signal_smooth,best_vol_window,best_thr_window,best_window,best_num_std
0,0.303403,0.302038,0.163732,1.690518,-0.046161,6,0.7,2.333333,253,SOXX,...,14.0,-2.0,2.0,,,,,,,
1,-0.041829,-0.041667,0.344519,0.05152,-0.274139,7,0.604651,8.285714,253,SOXX,...,,-1.5,1.5,16.0,30.0,9.0,,,,
2,-0.093598,-0.093246,0.355696,-0.098436,-0.273587,3,0.513514,17.333333,253,SOXX,...,,-1.5,2.0,,,,20.0,252.0,,
3,0.163579,0.162883,0.203228,0.843216,-0.110641,5,0.727273,3.0,253,SOXX,...,,-2.0,2.0,,,,,,30.0,1.5
4,0.084662,0.084662,0.102118,0.846149,-0.040738,8,0.578947,3.875,252,SOXX,...,14.0,-2.0,2.0,,,,,,,


In [17]:
agg = (
    df_results
      .groupby(['Ticker','Strategy'])
      .agg({
          'Total Return':  'mean',
          'CAGR':          'mean',
          'Ann Vol':       'mean',
          'Sharpe':        'mean',
          'Max Drawdown':  'mean',
          'Trades':        'sum',    # total across folds
          'Hit Rate':      'mean',
          'Avg Hold Days': 'mean'
      })
      .round(4)
      .reset_index()
)

print("\n=== Final Aggregated Metrics ===")
print(agg)


=== Final Aggregated Metrics ===
   Ticker        Strategy  Total Return    CAGR  Ann Vol  Sharpe  \
0     FDN  BollingerBands        0.0376  0.0379   0.1052  0.3949   
1     FDN            MACD       -0.0167 -0.0435   0.1770  0.0506   
2     FDN             RSI        0.0239  0.0267   0.1355  0.2821   
3     FDN      Volatility       -0.0249 -0.0562   0.2191 -0.2767   
4     GDX  BollingerBands        0.0462  0.0451   0.1237  0.2862   
5     GDX            MACD        0.0256  0.0681   0.2298  0.2759   
6     GDX             RSI        0.0763  0.0755   0.1581  0.4196   
7     GDX      Volatility       -0.1139 -0.1276   0.2200 -0.5258   
8    GUSH  BollingerBands       -0.1568 -0.1591   0.2528 -0.5397   
9    GUSH            MACD        0.0423  0.0417   0.2407  0.5028   
10   GUSH             RSI       -0.1646 -0.1637   0.3357 -0.0209   
11   GUSH      Volatility       -0.1752 -0.1758   0.3368 -0.4851   
12   SOXX  BollingerBands        0.0180  0.0179   0.0760  0.3156   
13   SOXX     

In [18]:
agg

Unnamed: 0,Ticker,Strategy,Total Return,CAGR,Ann Vol,Sharpe,Max Drawdown,Trades,Hit Rate,Avg Hold Days
0,FDN,BollingerBands,0.0376,0.0379,0.1052,0.3949,-0.0513,36,0.6005,3.5881
1,FDN,MACD,-0.0167,-0.0435,0.177,0.0506,-0.129,56,0.4979,10.6258
2,FDN,RSI,0.0239,0.0267,0.1355,0.2821,-0.0902,51,0.5257,5.1435
3,FDN,Volatility,-0.0249,-0.0562,0.2191,-0.2767,-0.188,59,0.4762,23.4869
4,GDX,BollingerBands,0.0462,0.0451,0.1237,0.2862,-0.0844,73,0.539,3.1703
5,GDX,MACD,0.0256,0.0681,0.2298,0.2759,-0.1777,85,0.5318,7.0574
6,GDX,RSI,0.0763,0.0755,0.1581,0.4196,-0.1149,53,0.5345,5.6759
7,GDX,Volatility,-0.1139,-0.1276,0.22,-0.5258,-0.2231,74,0.4728,7.1728
8,GUSH,BollingerBands,-0.1568,-0.1591,0.2528,-0.5397,-0.2819,61,0.4987,2.064
9,GUSH,MACD,0.0423,0.0417,0.2407,0.5028,-0.183,44,0.5606,5.6493
