In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # adjust import as needed

def compute_ma_regimes(prices: pd.Series,
                       short_w: int,
                       long_w: int) -> np.ndarray:
    """Compute MA‐crossover regimes on log(prices) for one instrument."""
    logp     = np.log(prices)
    ma_s     = logp.rolling(window=short_w, min_periods=1).mean()
    ma_l     = logp.rolling(window=long_w,  min_periods=1).mean()
    regs     = np.where(ma_l > ma_s, 0, 2)
    return regs

def grid_search_windows(price_file: str,
                        short_range: range,
                        long_range: range) -> pd.DataFrame:
    # 1) load full price matrix
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    n_inst = df.shape[1]
    T      = df.shape[0]

    # 2) get “true” regimes from your autolabeller (for one instrument at a time)
    #    here we just test on inst=0; you can loop or average over multiple insts
    true_regs = plot_all_regimes_long(end_point=T, plot_graph=False, inst=0)

    rows = []
    prices0 = df.iloc[:, 0]

    # 3) loop over window pairs
    for short_w in short_range:
        for long_w in long_range:
            if long_w <= short_w:
                continue
            pred_regs = compute_ma_regimes(prices0, short_w, long_w)
            # trim to same length
            pred_regs = pred_regs[: len(true_regs)]
            #print(pred_regs)
            acc = accuracy_score(true_regs, pred_regs)
            rows.append({"short_w": short_w,
                         "long_w":  long_w,
                         "accuracy": acc})

    results = pd.DataFrame(rows)
    return results.sort_values("accuracy", ascending=False)

# ─── Example usage ───────────────────────────────────────────────
shorts = range(5, 51, 5)    # try 5,10,…,50
longs  = range(20, 201, 10) # try 20,30,…,200

df_grid = grid_search_windows("prices.txt", shorts, longs)
print(df_grid.head(10))


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # your autolabeller

def compute_ema_regimes(prices: pd.Series,
                        short_span: int,
                        long_span: int) -> np.ndarray:
    """Compute EMA‐crossover regimes on log(prices) for one instrument."""
    logp  = np.log(prices)
    ema_s = logp.ewm(span=short_span, adjust=False).mean()
    ema_l = logp.ewm(span=long_span,  adjust=False).mean()
    regs  = np.where(ema_s > ema_l, 2, 0)  # bull=2 if short EMA > long EMA, else bear=0
    return regs

def grid_search_ema(price_file: str,
                    short_range: range,
                    long_range: range,
                    inst: int = 0) -> pd.DataFrame:
    # 1) load full price matrix
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    T  = df.shape[0]

    # 2) get “true” regimes from your autolabeller for instrument `inst`
    true_regs = plot_all_regimes_long(end_point=T, plot_graph=False, inst=inst)

    rows = []
    prices0 = df.iloc[:, inst]

    # 3) loop over span pairs
    for short_span in short_range:
        for long_span in long_range:
            if long_span <= short_span:
                continue
            pred_regs = compute_ema_regimes(prices0, short_span, long_span)
            pred_regs = pred_regs[: len(true_regs)]
            acc = accuracy_score(true_regs, pred_regs)
            rows.append({
                "short_span": short_span,
                "long_span":  long_span,
                "accuracy":   acc
            })

    results = pd.DataFrame(rows)
    return results.sort_values("accuracy", ascending=False)

# ─── Example usage ───────────────────────────────────────────────
shorts = range(5, 51, 5)    # try spans 5,10,…,50
longs  = range(20, 201, 10) # try spans 20,30,…,200

df_ema = grid_search_ema("prices.txt", shorts, longs, inst=0)
print(df_ema.head(10))


In [None]:
import numpy as np
import pandas as pd

def compute_slope_vol(prices: pd.Series,
                      slope_win: int,
                      vol_win:   int) -> pd.DataFrame:
    """
    Returns a DataFrame [slope, vol] where:
      - 'slope' is the OLS slope of log(price) over the past slope_win days,
        computed only when we have exactly slope_win points.
      - 'vol'   is the rolling std-dev of log-returns over vol_win days,
        computed only when we have exactly vol_win days of returns.
    """
    logp = np.log(prices)

    # prebuild the design matrix for slope
    t = np.arange(slope_win)
    X = np.vstack([t, np.ones_like(t)]).T

    def slope_of_window(y):
        # y will always be length == slope_win
        m, _ = np.linalg.lstsq(X, y, rcond=None)[0]
        return m

    # compute rolling slope, but only when we have slope_win points
    slope = (
        pd.Series(logp)
          .rolling(window=slope_win, min_periods=slope_win)
          .apply(slope_of_window, raw=True)
    )

    # compute rolling volatility of log-returns, only when vol_win returns exist
    rtn = pd.Series(logp).diff()
    vol = rtn.rolling(window=vol_win, min_periods=vol_win).std()

    return pd.DataFrame({"slope": slope, "vol": vol})
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long

def grid_search_slope_vol(price_file: str,
                          slope_range: range,
                          vol_range:   range,
                          inst: int = 0) -> pd.DataFrame:
    df     = pd.read_csv(price_file, sep=r"\s+", header=None)
    T      = df.shape[0]
    true   = plot_all_regimes_long(end_point=T, plot_graph=False, inst=inst)
    prices = df.iloc[:, inst]   

    rows = []
    for sw in slope_range:
        for vw in vol_range:
            feats = compute_slope_vol(prices, sw, vw)
            feats = feats.dropna()
            
            # ----- NEW: only keep indices that exist in `true` -----
            idx = feats.index
            idx = idx[idx < len(true)]
            feats = feats.loc[idx]
            
            # now safe to align
            true_trim = true[idx]

            # simple bull/bear rule
            thresh_vol = feats["vol"].median()
            regs = np.where(
                (feats["slope"] >  0) &
                (feats["vol"]   < thresh_vol),
                2,  # bull
                0   # bear
            )

            acc = accuracy_score(true_trim, regs)
            rows.append({"slope_win": sw, "vol_win": vw, "accuracy": acc})

    return pd.DataFrame(rows).sort_values("accuracy", ascending=False)




# ─── Example ────────────────────────────────────────────
slopes = range(30, 101, 10)   # 30,40,…,100 days
vols   = range(30, 101, 10)   # same for vol
df_best = grid_search_slope_vol("prices.txt", slopes, vols, inst=0)
print(df_best.head(10))


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # your autolabeller

def compute_macd_regimes(prices: pd.Series,
                         short_span: int,
                         long_span:  int,
                         signal_span: int,
                         drop_last:   int = 10) -> np.ndarray:
    """
    Compute MACD crossover regimes on log-prices:
      1) EMA_short  = EMA(logP, span=short_span)
      2) EMA_long   = EMA(logP, span=long_span)
      3) MACD_line  = EMA_short - EMA_long
      4) Signal_line= EMA(MACD_line, span=signal_span)
      5) regs = 2 if MACD_line > Signal_line else 0
    Returns: array of length (len(prices) - drop_last), with initial NaNs dropped.
    """
    logp       = np.log(prices)
    ema_s      = logp.ewm(span=short_span, adjust=False).mean()
    ema_l      = logp.ewm(span=long_span,  adjust=False).mean()
    macd_line  = ema_s - ema_l
    signal_ln  = macd_line.ewm(span=signal_span, adjust=False).mean()

    regs_full = np.where(macd_line > signal_ln, 2, 0)
    # drop last points to match labeller
    regs_full = regs_full[: len(prices) - drop_last ]

    # drop initial NaNs (from long_span or signal_span)
    valid = ~np.isnan(regs_full)
    return regs_full[valid], valid

def grid_search_macd(price_file: str,
                     short_range: range,
                     long_range:  range,
                     signal_range: range,
                     inst: int = 0,
                     drop_last: int = 10) -> pd.DataFrame:
    # 1) load prices & get ground truth
    df        = pd.read_csv(price_file, sep=r"\s+", header=None)
    T         = df.shape[0]
    true_regs = plot_all_regimes_long(
                    end_point=T,
                    plot_graph=False,
                    inst=inst,
                
                )
    # trim ground truth to drop_last
    true_regs = true_regs[: T - drop_last ]

    prices0 = df.iloc[:, inst]
    rows    = []

    # 2) brute over all span combinations
    for short_span in short_range:
        for long_span in long_range:
            if long_span <= short_span:
                continue
            for signal_span in signal_range:
                # compute predicted regimes + mask for valid idx
                pred_regs, valid = compute_macd_regimes(
                                      prices0,
                                      short_span,
                                      long_span,
                                      signal_span,
                                      drop_last
                                  )
                # align to true
                true_trim = true_regs[valid]

                acc = accuracy_score(true_trim, pred_regs)
                rows.append({
                    "short_span":  short_span,
                    "long_span":   long_span,
                    "signal_span": signal_span,
                    "accuracy":    acc
                })

    return pd.DataFrame(rows).sort_values("accuracy", ascending=False)

# ─── Example usage ────────────────────────────────────
shorts  = range(5, 51, 5)    # 5,10,...,50
longs   = range(20, 201, 10) # 20,30,...,200
signals = range(5, 41, 5)    # 5,10,...,40

df_macd = grid_search_macd("prices.txt", shorts, longs, signals, inst=0, drop_last=10)
print(df_macd.head(10))


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # your autolabeller

def compute_kalman_regimes(prices: pd.Series,
                           process_var:     float,
                           measurement_var: float,
                           drop_last:       int = 10) -> (np.ndarray, np.ndarray):
    """
    1D Kalman filter trend on log-prices.
    State x_t = trend estimate; measurement z_t = log-price.
    process_var = Q; measurement_var = R.
    Regime = 2 (bull) if logP_t > trend_t, else 0 (bear).
    Trims last `drop_last` points and returns (regs, valid_mask).
    """
    logp = np.log(prices).values
    n    = len(logp)

    x_est = np.zeros(n)  # filtered trend
    P     = np.zeros(n)  # estimate variance

    # initialize
    x_est[0] = logp[0]
    P[0]     = 1.0

    for t in range(1, n):
        # predict
        x_pred = x_est[t-1]
        P_pred = P[t-1] + process_var

        # update
        K         = P_pred / (P_pred + measurement_var)
        x_est[t]  = x_pred + K * (logp[t] - x_pred)
        P[t]      = (1 - K) * P_pred

    # regime: bull if price above trend, else bear
    regs_full = np.where(logp > x_est, 2, 0)

    # trim tail to match labeller (T - drop_last)
    N         = n - drop_last
    regs_trim = regs_full[:N]

    # no NaNs here, so all True
    valid     = np.ones_like(regs_trim, dtype=bool)
    return regs_trim, valid

def grid_search_kalman(price_file:      str,
                       process_vars:    list,
                       measurement_vars: list,
                       inst:            int = 0,
                       drop_last:       int = 10) -> pd.DataFrame:
    # 1) load prices & true regimes
    df        = pd.read_csv(price_file, sep=r"\s+", header=None)
    T         = df.shape[0]
    true_regs = plot_all_regimes_long(
                    end_point=T,
                    plot_graph=False,
                    inst=inst,
            
                )[: T - drop_last]

    prices0 = df.iloc[:, inst]
    rows    = []

    # 2) brute-force over Q × R
    for Q in process_vars:
        for R in measurement_vars:
            pred_regs, valid = compute_kalman_regimes(
                                  prices0, Q, R, drop_last
                               )
            true_trim = true_regs[valid]
            acc       = accuracy_score(true_trim, pred_regs)
            rows.append({
                "process_var (Q)":     Q,
                "measurement_var (R)": R,
                "accuracy":            acc
            })

    return pd.DataFrame(rows).sort_values("accuracy", ascending=False)

# ─── Example usage ───────────────────────────────────────────────
process_vars     = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
measurement_vars = [1e-2, 1e-1, 1.0, 10.0]

df_kalman = grid_search_kalman(
    "prices.txt",
    process_vars,
    measurement_vars,
    inst=0,
    drop_last=10
)
print(df_kalman.head(10))


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # your autolabeller

def compute_fib_regimes(prices: pd.Series,
                        window:       int,
                        lower_ratio:  float,
                        upper_ratio:  float,
                        drop_last:    int = 10):
    """
    - window: look-back for high/low
    - lower_ratio, upper_ratio: e.g. 0.382, 0.618 (must have lower_ratio < upper_ratio)
    Regime = 2 (bull) if price > low + upper_ratio*(high-low)
           = 0 (bear) if price < low + lower_ratio*(high-low)
           = 1 (neutral) otherwise
    """
    high = prices.rolling(window=window, min_periods=window).max()
    low  = prices.rolling(window=window, min_periods=window).min()
    range_ = high - low

    # retracement levels
    level_low  = low + lower_ratio * range_
    level_high = low + upper_ratio * range_

    regs_full = np.where(prices >  level_high, 2,
                 np.where(prices <  level_low,  0, 1))

    # trim tail to match autolabeller
    N         = len(prices) - drop_last
    regs_trim = regs_full[:N]
    lvl_trim  = level_low[:N]  # just to get mask shape

    valid = ~np.isnan(lvl_trim)
    return regs_trim[valid], valid

def grid_search_fib(price_file:     str,
                    window_range:   range,
                    ratio_range:    list,
                    inst:           int = 0,
                    drop_last:      int = 10) -> pd.DataFrame:
    # 1) load data & ground truth
    df        = pd.read_csv(price_file, sep=r"\s+", header=None)
    T         = df.shape[0]
    true_regs = plot_all_regimes_long(
                    end_point=T,
                    plot_graph=False,
                    inst=inst,
   
                )[: T - drop_last]

    prices0 = df.iloc[:, inst]
    rows    = []

    # 2) grid over window × lower_ratio × upper_ratio
    for w in window_range:
        for lr in ratio_range:
            for ur in ratio_range:
                if ur <= lr:
                    continue
                pred, valid = compute_fib_regimes(
                    prices=prices0,
                    window=w,
                    lower_ratio=lr,
                    upper_ratio=ur,
                    drop_last=drop_last
                )
                acc = accuracy_score(true_regs[valid], pred)
                rows.append({
                    "window":      w,
                    "lower_ratio": lr,
                    "upper_ratio": ur,
                    "accuracy":    acc
                })

    return pd.DataFrame(rows).sort_values("accuracy", ascending=False)

# ─── Example usage ────────────────────────────────────────
if __name__ == "__main__":
    windows     = range(20, 121, 10)  # 20,30,...,120-day lookbacks
    fib_levels  = [0.236, 0.382, 0.5, 0.618, 0.786]

    df_fib = grid_search_fib(
        "prices.txt",
        windows,
        fib_levels,
        inst=0,
        drop_last=10
    )

    # top 10
    print("Top 10 Fibonacci grid-search results:")
    print(df_fib.head(10).to_string(index=False))

    # best single combo
    best = df_fib.iloc[0]
    print(f"\nOptimal Fib params → window={best.window}, "
          f"lower={best.lower_ratio}, upper={best.upper_ratio}, "
          f"accuracy={best.accuracy:.4f}")


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # your autolabeller

def compute_psar_regimes(high: pd.Series,
                        low: pd.Series,
                        close: pd.Series,
                        step: float,
                        max_step: float,
                        drop_last: int = 10):
    """
    - step:      initial acceleration factor (e.g. 0.02)
    - max_step:  maximum acceleration factor (e.g. 0.20)
    PSAR algorithm on log-prices:
      * trend up/down tracked via EP (extreme price) & AF (acceleration factor)
      * PSAR_t = PSAR_{t-1} + AF * (EP - PSAR_{t-1})
      * flip trend when price pierces PSAR
    Regime = 2 if close > PSAR, else 0
    """
    # initialize
    n      = len(close)
    psar   = np.zeros(n)
    trend_up = True
    af       = step
    ep       = high.iloc[0]   # extreme point
    psar[0]  = low.iloc[0] if trend_up else high.iloc[0]

    # iterative PSAR
    for t in range(1, n):
        prev_psar = psar[t-1]
        psar[t]   = prev_psar + af * (ep - prev_psar)

        if trend_up:
            # check for reversal
            if low.iloc[t] < psar[t]:
                trend_up = False
                psar[t]  = ep
                ep       = low.iloc[t]
                af       = step
            else:
                # update EP & AF
                if high.iloc[t] > ep:
                    ep = high.iloc[t]
                    af = min(af + step, max_step)
        else:
            if high.iloc[t] > psar[t]:
                trend_up = True
                psar[t]  = ep
                ep       = high.iloc[t]
                af       = step
            else:
                if low.iloc[t] < ep:
                    ep = low.iloc[t]
                    af = min(af + step, max_step)

    # build regimes
    regs_full = np.where(close.values > psar, 2, 0)

    # trim tail to match labeller’s drop_last
    N         = n - drop_last
    regs_trim = regs_full[:N]
    psar_trim = psar[:N]

    # drop any NaN head (should be none after t=1)
    valid = ~np.isnan(psar_trim)
    return regs_trim[valid], valid

def grid_search_psar(price_file: str,
                     step_range:   list,
                     max_range:    list,
                     inst:         int = 0,
                     drop_last:    int = 10) -> pd.DataFrame:
    # 1) load price data & true regimes
    df        = pd.read_csv(price_file, sep=r"\s+", header=None)
    T         = df.shape[0]
    true_regs = plot_all_regimes_long(
                    end_point=T,
                    plot_graph=False,
                    inst=inst,
              
                )[: T - drop_last]

    # assume columns [inst]=Close, [inst+1]=High, [inst+2]=Low
    close = df.iloc[:, inst]
    high  = df.iloc[:, inst+1]
    low   = df.iloc[:, inst+2]

    rows = []
    for step in step_range:
        for max_step in max_range:
            if max_step <= step:
                continue
            pred, valid = compute_psar_regimes(
                high=high,
                low=low,
                close=close,
                step=step,
                max_step=max_step,
                drop_last=drop_last
            )
            acc = accuracy_score(true_regs[valid], pred)
            rows.append({
                "step":     step,
                "max_step": max_step,
                "accuracy": acc
            })

    return pd.DataFrame(rows).sort_values("accuracy", ascending=False)

if __name__ == "__main__":
    # parameter ranges to try
    step_range = [0.01, 0.02, 0.03, 0.04, 0.05]
    max_range  = [0.1, 0.15, 0.2, 0.25, 0.3]

    df_psar = grid_search_psar(
        "prices.txt",
        step_range,
        max_range,
        inst=0,
        drop_last=10
    )

    # print top 10 and best combo
    print("Top 10 Parabolic SAR parameter combos:")
    print(df_psar.head(10).to_string(index=False))

    best = df_psar.iloc[0]
    print(f"\nOptimal PSAR params → step={best.step}, "
          f"max_step={best.max_step}, accuracy={best.accuracy:.4f}")


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # your autolabeller

def compute_zscore_regimes(prices: pd.Series,
                           window: int,
                           threshold: float,
                           drop_last: int = 10):
    """
    Compute Z-score based regimes:
      - MA_t = rolling mean of prices over `window`
      - SD_t = rolling std of prices over `window`
      - Z_t  = (price_t - MA_t) / SD_t
      - regs_t = 2 (bull) if Z_t > threshold
               = 0 (bear) if Z_t < -threshold
               = 1 (neutral) otherwise
    Returns:
      - regs_trim: 1D array of regimes length = len(prices)-drop_last minus NaNs
      - valid:     boolean mask for non-NaN entries
    """
    ma = prices.rolling(window=window, min_periods=window).mean()
    sd = prices.rolling(window=window, min_periods=window).std()
    z = (prices - ma) / sd

    regs_full = np.where(z > threshold, 2,
                 np.where(z < -threshold, 0, 1))

    # trim tail to match labeller
    N = len(prices) - drop_last
    regs_trim = regs_full[:N]
    z_trim = z[:N]

    valid = ~np.isnan(z_trim)
    return regs_trim[valid], valid

def grid_search_zscore(price_file: str,
                       window_range: range,
                       threshold_range: list,
                       inst: int = 0,
                       drop_last: int = 10) -> pd.DataFrame:
    # load price data and true regimes
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    T  = df.shape[0]
    true_regs = plot_all_regimes_long(
        end_point=T,
        plot_graph=False,
        inst=inst,
        
    )[: T - drop_last]

    prices0 = df.iloc[:, inst]
    records = []

    for w in window_range:
        for th in threshold_range:
            pred, valid = compute_zscore_regimes(
                prices=prices0,
                window=w,
                threshold=th,
                drop_last=drop_last
            )
            acc = accuracy_score(true_regs[valid], pred)
            records.append({
                "window": w,
                "threshold": th,
                "accuracy": acc
            })

    results = pd.DataFrame(records).sort_values("accuracy", ascending=False)
    return results

# Example usage
if __name__ == "__main__":
    windows = range(10, 201, 10)          # e.g., 10,20,...,200
    thresholds = [0.5, 1.0, 1.5, 2.0]      # z-score thresholds

    df_z = grid_search_zscore(
        "prices.txt",
        windows,
        thresholds,
        inst=0,
        drop_last=10
    )
    print("Top Z-score regime parameter combos:")
    print(df_z.head(10).to_string(index=False))

    best = df_z.iloc[0]
    print(f"\nOptimal Z-score params → window={best.window}, "
          f"threshold={best.threshold}, accuracy={best.accuracy:.4f}")


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # your autolabeller

def compute_zscore_regimes(prices: pd.Series,
                           window: int,
                           threshold: float,
                           drop_last: int = 10):
    """
    Compute Z-score based regimes:
      - MA_t = rolling mean of prices over `window`
      - SD_t = rolling std of prices over `window`
      - Z_t  = (price_t - MA_t) / SD_t
      - regs_t = 2 (bull) if Z_t > threshold
               = 0 (bear) if Z_t < -threshold
               = 1 (neutral) otherwise (excluded from evaluation)
    Returns:
      - regs_trim: 1D array of regimes length = len(prices)-drop_last minus NaNs
      - valid:     boolean mask for non-NaN entries
    """
    ma = prices.rolling(window=window, min_periods=window).mean()
    sd = prices.rolling(window=window, min_periods=window).std()
    z = (prices - ma) / sd

    regs_full = np.where(z > threshold, 2,
                 np.where(z < -threshold, 0, 1))

    N = len(prices) - drop_last
    regs_trim = regs_full[:N]
    z_trim = z[:N]

    valid = ~np.isnan(z_trim)
    return regs_trim[valid], valid

def grid_search_zscore(price_file: str,
                       window_range: range,
                       threshold_range: list,
                       inst: int = 0,
                       drop_last: int = 10) -> pd.DataFrame:
    # Load price data and true regimes
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    T  = df.shape[0]
    true_regs = plot_all_regimes_long(
        end_point=T,
        plot_graph=False,
        inst=inst,
    )[: T - drop_last]

    prices0 = df.iloc[:, inst]
    records = []

    for w in window_range:
        for th in threshold_range:
            pred, valid = compute_zscore_regimes(
                prices=prices0,
                window=w,
                threshold=th,
                drop_last=drop_last
            )

            # Filter out neutral predictions (label=1)
            pred_series = pd.Series(pred, index=np.arange(len(pred)))
            true_series = pd.Series(true_regs[valid], index=np.arange(len(pred)))

            mask = pred_series != 1
            pred_bin = pred_series[mask]
            true_bin = true_series[mask]

            if len(pred_bin) == 0:
                acc = np.nan  # skip if no bull/bear predictions
            else:
                acc = accuracy_score(true_bin, pred_bin)

            records.append({
                "window": w,
                "threshold": th,
                "accuracy": acc,
                "num_compared": len(pred_bin)
            })

    results = pd.DataFrame(records).sort_values("accuracy", ascending=False)
    return results

# Example usage
if __name__ == "__main__":
    windows = range(10, 201, 10)         # e.g., 10,20,...,200
    thresholds = [0.5, 1.0, 1.5, 2.0]    # Z-score thresholds

    df_z = grid_search_zscore(
        "prices.txt",
        windows,
        thresholds,
        inst=0,
        drop_last=10
    )

    print("Top Z-score regime parameter combos (bull/bear only):")
    print(df_z.head(10).to_string(index=False))

    best = df_z.iloc[0]
    print(f"\nOptimal binary Z-score params → window={best.window}, "
          f"threshold={best.threshold}, accuracy={best.accuracy:.4f}, "
          f"compared on {int(best.num_compared)} samples")


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # or wherever you have it

def compute_weighted_return_regimes(prices: pd.Series,
                                    window: int,
                                    power:  float,
                                    drop_last: int = 10):
    """
    - window: look-back in days
    - power:  exponent on the linear weights (w_i = i**power)
    Returns:
      regs_trim, valid_mask
    """
    # 1) 1-day returns
    r = prices.pct_change()

    # 2) build weights [1**p, 2**p, …, window**p] normalized
    idx = np.arange(1, window+1)
    w   = idx**power
    w  /= w.sum()

    # 3) rolling weighted sum (note: roll.apply hands you the last `window` values)
    def wavg(arr):
        # arr[-window:] corresponds to r_{t-window+1}…r_t
        return np.dot(arr, w)

    wr = r.rolling(window=window, min_periods=window).apply(wavg, raw=True)

    # 4) regime: bull if >0, bear if <0, else neutral
    regs_full = np.where(wr >  0, 2,
                 np.where(wr <  0, 0, 1))

    # 5) trim tail & head
    N        = len(prices) - drop_last
    regs_t   = regs_full[:N]
    wr_t     = wr[:N]
    valid    = ~np.isnan(wr_t)

    return regs_t[valid], valid

def grid_search_weighted_returns(price_file:    str,
                                 window_range:  range,
                                 power_range:   list,
                                 inst:          int = 0,
                                 drop_last:     int = 10) -> pd.DataFrame:
    # 1) load data & true regimes
    df        = pd.read_csv(price_file, sep=r"\s+", header=None)
    T         = df.shape[0]
    true_regs = plot_all_regimes_long(
                    end_point=T,
                    plot_graph=False,
                    inst=inst,
                
                )[: T - drop_last]

    prices0 = df.iloc[:, inst]
    rows    = []

    # 2) grid over window × power
    for w in window_range:
        for p in power_range:
            pred, valid = compute_weighted_return_regimes(
                prices=prices0,
                window=w,
                power=p,
                drop_last=drop_last
            )
            acc = accuracy_score(true_regs[valid], pred)
            rows.append({
                "window":   w,
                "power":    p,
                "accuracy": acc
            })

    return pd.DataFrame(rows).sort_values("accuracy", ascending=False)

# ─── Example usage ────────────────────────────────────────────
if __name__ == "__main__":
    windows = range(5, 51, 5)     # try 5,10,…,50-day lookbacks
    powers  = [0.5, 1.0, 1.5, 2.0] # weight exponents

    df_wr = grid_search_weighted_returns(
        "prices.txt",
        windows,
        powers,
        inst=0,
        drop_last=10
    )
    print("Top weighted-returns regime parameter combos:")
    print(df_wr.head(10).to_string(index=False))

    best = df_wr.iloc[0]
    print(f"\nOptimal params → window={best.window}, power={best.power}, "
          f"accuracy={best.accuracy:.4f}")


In [None]:
To save your Jupyter notebook as a PDF:

1. In the Jupyter notebook interface, go to the menu bar and click on **File**.
2. Select **Download as** > **PDF via LaTeX (.pdf)**.

If you do not see the PDF option, you may need to install TeX (such as MiKTeX for Windows or MacTeX for Mac) and the `nbconvert` dependencies.  
Alternatively, you can:

- Go to **File** > **Print Preview**, then use your browser’s print dialog to "Save as PDF".

This will save the PDF to your default downloads folder.