In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # your autolabeller

def compute_ma_regimes(prices: pd.Series,
                       short_w: int,
                       long_w:  int) -> np.ndarray:
    """
    MA‐crossover regime (0=bear, 2=bull)
    on log(prices), returns array length == len(prices)
    """
    logp = np.log(prices)
    ma_s = logp.rolling(window=short_w, min_periods=1).mean()
    ma_l = logp.rolling(window=long_w,  min_periods=1).mean()
    regs = np.where(ma_l > ma_s, 0, 2)
    return regs

def per_inst_ma_grid(df_prices: pd.DataFrame,
                     inst: int,
                     endpoint: int,
                     short_range: range,
                     long_range:  range) -> dict:
    """
    Grid‐search MA windows on one instrument up to `endpoint` rows.
    Returns dict with best short_w, long_w, acc.
    """
    prices = df_prices.iloc[:endpoint, inst]
    true_regs = plot_all_regimes_long(end_point=endpoint,
                                      plot_graph=False,
                                      inst=inst)
    best = {"short_w": None, "long_w": None, "accuracy": -1}
    
    for sw in short_range:
        for lw in long_range:
            if lw <= sw: 
                continue
            pred = compute_ma_regimes(prices, sw, lw)
            # align lengths
            pred = pred[:len(true_regs)]
            acc = accuracy_score(true_regs, pred)
            if acc > best["accuracy"]:
                best.update({"short_w": sw, "long_w": lw, "accuracy": acc})
    return best

def compare_ma_two_endpoints(price_file: str,
                             short_range: range,
                             long_range:  range,
                             endpoint1: int,
                             endpoint2: int,
                             n_inst:   int = 50) -> pd.DataFrame:
    """
    Runs per‐instrument MA grid search at two endpoints,
    returns a summary DataFrame.
    """
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    records = []
    
    for inst in range(n_inst):
        print(f"Instrument {inst:02d} → endpoint {endpoint1}")
        best1 = per_inst_ma_grid(df, inst, endpoint1, short_range, long_range)
        
        print(f"Instrument {inst:02d} → endpoint {endpoint2}")
        best2 = per_inst_ma_grid(df, inst, endpoint2, short_range, long_range)
        
        records.append({
            "inst":    inst,
            "sw_ep1":  best1["short_w"],
            "lw_ep1":  best1["long_w"],
            "acc_ep1": best1["accuracy"],
            "sw_ep2":  best2["short_w"],
            "lw_ep2":  best2["long_w"],
            "acc_ep2": best2["accuracy"],
            "Δsw":      best2["short_w"] - best1["short_w"],
            "Δlw":      best2["long_w"]  - best1["long_w"],
            "Δacc":     best2["accuracy"]- best1["accuracy"]
        })
    
    return pd.DataFrame(records)

# ─── Example usage ────────────────────────────────────────────────────────
if __name__ == "__main__":
    price_file   = "prices.txt"
    # search windows e.g. 5 to 50 by 5 and 20 to 200 by 10
    short_range  = range(5,  101,  5)
    long_range   = range(20, 101, 5)
    ep1, ep2     = 500, 750

    df_compare = compare_ma_two_endpoints(
        price_file,
        short_range,
        long_range,
        endpoint1=ep1,
        endpoint2=ep2,
        n_inst=50
    )

 
    print(df_compare.to_string(index=False))
   


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long

def compute_ma_regimes(prices: pd.Series, short_w: int, long_w: int) -> np.ndarray:
    logp = np.log(prices)
    ma_s = logp.rolling(window=short_w, min_periods=1).mean()
    ma_l = logp.rolling(window=long_w,  min_periods=1).mean()
    return np.where(ma_l > ma_s, 0, 2)

def per_inst_ma_best(df_prices: pd.DataFrame, inst: int, endpoint: int,
                     short_range: range, long_range: range, drop_last: int=0):
    prices = df_prices.iloc[:endpoint, inst]
    true = plot_all_regimes_long(end_point=endpoint, plot_graph=False, inst=inst)
    best = {'short_w':None,'long_w':None,'acc':-1}
    for sw in short_range:
        for lw in long_range:
            if lw <= sw: 
                continue
            pred = compute_ma_regimes(prices, sw, lw)[:len(true)]
            acc = accuracy_score(true, pred)
            if acc > best['acc']:
                best.update({'short_w':sw,'long_w':lw,'acc':acc})
    print(f"  Instrument {inst:02d} @ {endpoint}d → best (sw, lw) = ({best['short_w']}, {best['long_w']}) with acc={best['acc']:.4f}")
    return best

def find_universal_ma(price_file: str, short_range: range, long_range: range,
                      ep1: int, ep2: int, tol: float=0.1, n_inst: int=50):
    df = pd.read_csv(price_file, sep=r"\s+", header=None)

    # step 1: per-inst best at ep1 and ep2
    print(f"=== Step 1: per-instrument grid search at endpoints {ep1} and {ep2} ===")
    rec = []
    for inst in range(n_inst):
        print(f"\n>> Instrument {inst:02d}")
        b1 = per_inst_ma_best(df, inst, ep1, short_range, long_range)
        b2 = per_inst_ma_best(df, inst, ep2, short_range, long_range)
        rec.append({
            'inst': inst,
            'sw1': b1['short_w'], 'lw1': b1['long_w'], 'acc1': b1['acc'],
            'sw2': b2['short_w'], 'lw2': b2['long_w'], 'acc2': b2['acc'],
            'd_sw': abs(b2['short_w']-b1['short_w'])/b1['short_w'],
            'd_lw': abs(b2['long_w']-b1['long_w'])/b1['long_w']
        })
    dfc = pd.DataFrame(rec)

    # step 2: select stable instruments
    stable = dfc[(dfc['d_sw'] <= tol) & (dfc['d_lw'] <= tol)]['inst'].tolist()
    print(f"\n=== Step 2: {len(stable)}/{n_inst} instruments are stable (≤{tol*100:.0f}% change) ===")

    # step 3: grid search on ep2 across stable instruments
    print(f"\n=== Step 3: global grid search at endpoint {ep2} using only stable instruments ===")
    records = []
    for sw in short_range:
        for lw in long_range:
            if lw <= sw:
                continue
            accs = []
            for inst in stable:
                prices = df.iloc[:ep2, inst]
                true = plot_all_regimes_long(end_point=ep2, plot_graph=False, inst=inst)
                pred = compute_ma_regimes(prices, sw, lw)[:len(true)]
                accs.append(accuracy_score(true, pred))
            mean_acc = np.mean(accs)
            records.append({'short_w': sw, 'long_w': lw, 'mean_acc': mean_acc, 'n_stable': len(stable)})
        print(f"  Tested short_w={sw}, top mean_acc so far={(max(r['mean_acc'] for r in records if r['short_w']==sw)):.4f}")
    df_univ = pd.DataFrame(records).sort_values('mean_acc', ascending=False)

    univ = df_univ.iloc[0].to_dict()
    print(f"\n=== Universal MA params selected ===")
    print(f"short_w={univ['short_w']}, long_w={univ['long_w']}, mean_acc={univ['mean_acc']:.4f}, using {univ['n_stable']} instruments")

    return dfc, df_univ, univ


In [None]:
shorts = range(5,  51,  5)
longs  = range(20, 201, 10)
df_compare, df_univ, universal = find_universal_ma(
    price_file="prices.txt",
    short_range=shorts,
    long_range=longs,
    ep1=500,
    ep2=750,
    tol=0.10,
    n_inst=50
)

print("Universal MA params:", universal)
df_univ.head(10)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def compute_ma_regimes(prices: pd.Series, short_w: int, long_w: int) -> pd.Series:
    """
    Return a Series of 0/2 regimes:
      - 2 (bull) when short MA > long MA
      - 0 (bear) otherwise
    """
    logp  = np.log(prices)
    ma_s  = logp.rolling(window=short_w, min_periods=1).mean()
    ma_l  = logp.rolling(window=long_w,  min_periods=1).mean()
    regs  = np.where(ma_s > ma_l, 2, 0)
    return pd.Series(regs, index=prices.index)

def plot_all_ma_regimes(price_file: str, short_w: int, long_w: int):
    """
    For each of the 50 instruments (columns), plot:
      - price
      - green background for bull, red for bear
    """
    df = pd.read_csv(price_file, sep=r'\s+', header=None)
    n_inst = df.shape[1]

    for inst in range(n_inst):
        prices = df.iloc[:, inst]
        regimes = compute_ma_regimes(prices, short_w, long_w)

        fig, ax = plt.subplots(figsize=(12, 4))
        # Shade bull/bear regions
        is_bull = regimes == 2
        # find continuous spans
        starts = is_bull & (~is_bull.shift(1, fill_value=False))
        ends   = is_bull & (~is_bull.shift(-1, fill_value=False))
        bull_starts = np.flatnonzero(starts)
        bull_ends   = np.flatnonzero(ends)
        for s, e in zip(bull_starts, bull_ends):
            ax.axvspan(s, e, color='green', alpha=0.3)
        # bear is simply the inverse
        is_bear = regimes == 0
        starts = is_bear & (~is_bear.shift(1, fill_value=False))
        ends   = is_bear & (~is_bear.shift(-1, fill_value=False))
        bear_starts = np.flatnonzero(starts)
        bear_ends   = np.flatnonzero(ends)
        for s, e in zip(bear_starts, bear_ends):
            ax.axvspan(s, e, color='red', alpha=0.3)

        ax.plot(prices.values, color='black', linewidth=1)
        ax.set_title(f"Instrument {inst} — MA({short_w},{long_w}) Regimes")
        ax.set_ylabel("Price")
        ax.set_xlabel("Time")
        plt.tight_layout()
        plt.show()


In [None]:

plot_all_ma_regimes("prices.txt", short_w=5, long_w=40)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def compute_macd_regimes(prices: pd.Series,
                         short_span:  int,
                         long_span:   int,
                         signal_span: int) -> pd.Series:
    """
    Returns a Series of 0/2 MACD regimes aligned to the price index:
      - 2 (bull) when MACD line > signal line
      - 0 (bear) otherwise
    """
    logp       = np.log(prices)
    ema_s      = logp.ewm(span=short_span, adjust=False).mean()
    ema_l      = logp.ewm(span=long_span,  adjust=False).mean()
    macd_line  = ema_s - ema_l
    signal_ln  = macd_line.ewm(span=signal_span, adjust=False).mean()

    regs = np.where(macd_line > signal_ln, 2, 0)
    return pd.Series(regs, index=prices.index)

def plot_all_macd_regimes(price_file: str,
                          short_span:  int,
                          long_span:   int,
                          signal_span: int):
    """
    For each of the 50 instruments, plot price + MACD bull/bear shading.
    """
    df = pd.read_csv(price_file, sep=r'\s+', header=None)
    n_inst = df.shape[1]

    for inst in range(n_inst):
        prices  = df.iloc[:, inst]
        regimes = compute_macd_regimes(prices, short_span, long_span, signal_span)

        fig, ax = plt.subplots(figsize=(12, 4))

        # Shade bull periods
        is_bull = regimes == 2
        starts = is_bull & (~is_bull.shift(1, fill_value=False))
        ends   = is_bull & (~is_bull.shift(-1, fill_value=False))
        for s, e in zip(np.flatnonzero(starts), np.flatnonzero(ends)):
            ax.axvspan(s, e, color='green', alpha=0.3)

        # Shade bear periods
        is_bear = regimes == 0
        starts = is_bear & (~is_bear.shift(1, fill_value=False))
        ends   = is_bear & (~is_bear.shift(-1, fill_value=False))
        for s, e in zip(np.flatnonzero(starts), np.flatnonzero(ends)):
            ax.axvspan(s, e, color='red', alpha=0.3)

        ax.plot(prices.values, color='black', linewidth=1)
        ax.set_title(f"Instrument {inst} — MACD({short_span},{long_span},{signal_span}) Regimes")
        ax.set_ylabel("Price")
        ax.set_xlabel("Time")
        plt.tight_layout()
        plt.show()


In [None]:
# e.g. with your optimal MACD spans
plot_all_macd_regimes(
    "prices.txt",
    short_span=50,
    long_span=90,
    signal_span=40
)


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long

# ─── Regime-computation functions ─────────────────────────────────────────

def compute_dema_regimes(prices: pd.Series, short_span: int, long_span: int) -> np.ndarray:
    logp   = np.log(prices)
    ema_s  = logp.ewm(span=short_span, adjust=False).mean()
    ema_s2 = ema_s.ewm(span=short_span, adjust=False).mean()
    dema_s = 2 * ema_s  - ema_s2

    ema_l  = logp.ewm(span=long_span, adjust=False).mean()
    ema_l2 = ema_l.ewm(span=long_span, adjust=False).mean()
    dema_l = 2 * ema_l  - ema_l2

    return np.where(dema_s > dema_l, 2, 0)

def compute_tema_regimes(prices: pd.Series, short_span: int, long_span: int) -> np.ndarray:
    logp    = np.log(prices)
    ema1_s  = logp.ewm(span=short_span, adjust=False).mean()
    ema2_s  = ema1_s.ewm(span=short_span, adjust=False).mean()
    ema3_s  = ema2_s.ewm(span=short_span, adjust=False).mean()
    tema_s  = 3*ema1_s - 3*ema2_s + ema3_s

    ema1_l  = logp.ewm(span=long_span, adjust=False).mean()
    ema2_l  = ema1_l.ewm(span=long_span, adjust=False).mean()
    ema3_l  = ema2_l.ewm(span=long_span, adjust=False).mean()
    tema_l  = 3*ema1_l - 3*ema2_l + ema3_l

    return np.where(tema_s > tema_l, 2, 0)

def compute_hma_regimes(prices: pd.Series, length: int) -> np.ndarray:
    logp      = np.log(prices)
    wma_half  = logp.ewm(span=length//2, adjust=False).mean()
    wma_full  = logp.ewm(span=length,   adjust=False).mean()
    diff      = 2*wma_half - wma_full
    hma       = diff.ewm(span=int(np.sqrt(length)), adjust=False).mean()
    slope     = hma.diff().fillna(0)
    return np.where(slope > 0, 2, 0)

# ─── Per-instrument grid search ──────────────────────────────────────────

def per_inst_indicator_best(df: pd.DataFrame,
                            inst: int,
                            endpoint: int,
                            compute_fn,
                            param_grid: list) -> dict:
    prices = df.iloc[:endpoint, inst]
    true   = plot_all_regimes_long(end_point=endpoint, plot_graph=False, inst=inst)
    best   = {'params': None, 'acc': -1.}

    print(f"    → Searching inst={inst}, endpoint={endpoint}")
    for params in param_grid:
        regs = compute_fn(prices, **params)[:len(true)]
        acc  = accuracy_score(true, regs)
        if acc > best['acc']:
            best.update({'params': params.copy(), 'acc': acc})
            print(f"        New best {params} → acc={acc:.4f}")

    print(f"    ← Best inst={inst}, ep={endpoint}: {best['params']} acc={best['acc']:.4f}\n")
    return best

# ─── Compare across two endpoints ────────────────────────────────────────

def compare_indicator_two_endpoints(price_file: str,
                                    compute_fn,
                                    param_grid: list,
                                    ep1: int,
                                    ep2: int,
                                    n_inst: int = 50) -> pd.DataFrame:
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    records = []

    for inst in range(n_inst):
        print(f">>> Instrument {inst:02d} <<<")
        best1 = per_inst_indicator_best(df, inst, ep1, compute_fn, param_grid)
        best2 = per_inst_indicator_best(df, inst, ep2, compute_fn, param_grid)

        records.append({
            'inst':    inst,
            'params1': best1['params'],
            'acc1':    best1['acc'],
            'params2': best2['params'],
            'acc2':    best2['acc']
        })

    return pd.DataFrame(records)

# ─── Build parameter grids ──────────────────────────────────────────────

# example for DEMA / TEMA
shorts = range(5,  51,  5)
longs  = range(20, 201, 10)
dema_grid = [{'short_span': s, 'long_span': l} for s in shorts for l in longs if l > s]
tema_grid = dema_grid.copy()

# example for HMA
hma_grid = [{'length': L} for L in range(5, 101, 5)]

# ─── Example usage ─────────────────────────────────────────────────────

if __name__ == "__main__":
    price_file = "prices.txt"

    print("== DEMA Grid Search ==")
    df_dema = compare_indicator_two_endpoints(
        price_file, compute_dema_regimes, dema_grid, ep1=500, ep2=750, n_inst=50
    )

    print("== TEMA Grid Search ==")
    df_tema = compare_indicator_two_endpoints(
        price_file, compute_tema_regimes, tema_grid, ep1=500, ep2=750, n_inst=50
    )

    print("== HMA Grid Search ==")
    df_hma  = compare_indicator_two_endpoints(
        price_file, compute_hma_regimes, hma_grid, ep1=500, ep2=750, n_inst=50
    )


In [None]:
def print_hyperparam_diff(df, param_names, name):
    """
    df: DataFrame with columns ['inst','params1','acc1','params2','acc2']
    param_names: list of keys in the params dict to compare, e.g. ['short_span','long_span']
    name: string label for this indicator ('DEMA','TEMA','HMA')
    """
    rows = []
    for _, row in df.iterrows():
        p1, p2 = row['params1'], row['params2']
        diff = {f"Δ{name}_{k}": p2[k] - p1[k] for k in param_names}
        rows.append({
            'inst':      row['inst'],
            **{f"{name}_{k}_500": p1[k] for k in param_names},
            **{f"{name}_{k}_750": p2[k] for k in param_names},
            **diff,
            f"{name}_acc500": row['acc1'],
            f"{name}_acc750": row['acc2'],
            f"Δ{name}_acc":   row['acc2'] - row['acc1']
        })
    df_diff = pd.DataFrame(rows)
    print(f"\n--- {name} Hyperparameter Differences (500 vs 750) ---")
    print(df_diff.to_string(index=False))


# After you have run your three grids:
# df_dema, df_tema, df_hma = compare_indicator_two_endpoints(...)

# DEMA uses 'short_span' and 'long_span'
print_hyperparam_diff(df_dema, ['short_span','long_span'], name='DEMA')

# TEMA likewise
print_hyperparam_diff(df_tema, ['short_span','long_span'], name='TEMA')

# HMA uses just 'length'
print_hyperparam_diff(df_hma, ['length'], name='HMA')


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from precision_labeller import plot_all_regimes_long

def compute_dema_regimes(prices: pd.Series, short_span: int, long_span: int) -> np.ndarray:
    """
    Double‐EMA (DEMA) crossover regimes:
      - DEMA_short = 2*EMA(short) − EMA(EMA(short))
      - DEMA_long  = 2*EMA(long)  − EMA(EMA(long))
      Regime = 2 if DEMA_short > DEMA_long else 0
    """
    logp = np.log(prices)
    ema_s = logp.ewm(span=short_span, adjust=False).mean()
    ema_s2 = ema_s.ewm(span=short_span, adjust=False).mean()
    dema_s = 2*ema_s - ema_s2

    ema_l = logp.ewm(span=long_span, adjust=False).mean()
    ema_l2 = ema_l.ewm(span=long_span, adjust=False).mean()
    dema_l = 2*ema_l - ema_l2

    return np.where(dema_s > dema_l, 2, 0)

def plot_dema_vs_autolabeller(price_file: str,
                              inst: int,
                              short_span: int,
                              long_span: int,
                              drop_last: int = 10):
    """
    Compare autolabeller regimes to DEMA regimes for one instrument.
    Plots three panels:
      1) Price with autolabeller bull/bear shading
      2) Autolabeller regimes
      3) DEMA predicted regimes
    """
    # Load data
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    prices = df.iloc[:, inst]
    T = len(prices)

    # True regimes
    true_regs = np.array(plot_all_regimes_long(end_point=T, plot_graph=False, inst=inst)[:T-drop_last])

    # DEMA regimes
    dema_regs, valid = compute_dema_regimes(prices, short_span, long_span)[:len(true_regs)], None
    # Align valid by ignoring NaNs in DEMA (none here) and length
    valid = ~np.isnan(dema_regs)  # all True
    price_trim = prices.iloc[:T-drop_last].to_numpy()[valid]
    true_trim = true_regs[valid]
    pred_trim = dema_regs[valid]
    time = np.arange(len(price_trim))

    # Plot
    fig, axes = plt.subplots(3, 1, figsize=(14, 8), sharex=True)

    def shade(ax, regimes, title=None):
        for i, reg in enumerate(regimes):
            color = 'green' if reg==2 else 'red'
            ax.axvspan(i, i+1, color=color, alpha=0.3)
        if title:
            ax.set_title(title)
        ax.set_yticks([])

    # Price with DEMA shading
    shade(axes[0], pred_trim)
    axes[0].plot(time, price_trim, color='black')
    axes[0].set_title(f"Inst {inst} Price with DEMA({short_span},{long_span}) Regimes")

    # Autolabeller
    shade(axes[1], true_trim, title="Autolabeller Regimes")

    # DEMA regimes
    shade(axes[2], pred_trim, title=f"DEMA Predicted Regimes (short={short_span}, long={long_span})")

    axes[2].set_xlabel("Time step")
    plt.tight_layout()
    plt.show()

# Example usage: loop through all 50 instruments
for i in range(50):
    plot_dema_vs_autolabeller("prices.txt", inst=i, short_span= 40, long_span=200, drop_last=10)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
# point this at your autolabeller
from precision_labeller import plot_all_regimes_long  

def compute_area_regimes(prices: pd.Series,
                         win: int,
                         thresh: float) -> pd.Series:
    """
    Compute the 'area under returns' regime:
      - take log‐returns,
      - rolling‐sum over `win` bars → area,
      - regime=2 if area>thresh else 0.
    """
    # 1) log returns (pad first bar with 0)
    r = np.log(prices).diff().fillna(0)
    # 2) rolling‐sum = “surface area”
    area = r.rolling(window=win, min_periods=1).sum()
    # 3) threshold test → 2=bull, 0=bear
    regs = np.where(area > thresh, 2, 0)
    return pd.Series(regs, index=prices.index)

def find_best_params(prices: pd.Series,
                     inst: int,
                     area_ws: range,
                     f_list: list[float]):
    """
    Grid‐search (win, f) to maximize accuracy vs autolabeller
    over the full series.
    Returns: best_win, best_f, best_threshold, best_score
    """
    # --- get true regimes for this instrument ---
    true_array = np.array(
        plot_all_regimes_long(end_point=len(prices),
                              plot_graph=False,
                              inst=inst)
    )
    Ntruth = len(true_array)
    idx    = prices.index[:Ntruth]
    true_regs = pd.Series(true_array, index=idx)

    # --- precompute returns once ---
    r = np.log(prices).diff().fillna(0)

    best_score   = -1.0
    best_win     = None
    best_f       = None
    best_thresh  = None

    for win in area_ws:
        # rolling‐sum over the first Ntruth bars
        area_train = r.rolling(win, min_periods=1).sum().iloc[:Ntruth]
        std_area   = area_train.std()
        for f in f_list:
            thresh = f * std_area
            pred   = pd.Series(np.where(area_train > thresh, 2, 0), index=idx)
            score  = accuracy_score(true_regs, pred)
            if score > best_score:
                best_score  = score
                best_win    = win
                best_f      = f
                best_thresh = thresh

    return best_win, best_f, best_thresh, best_score

def plot_comparison(inst: int,
                    prices: pd.Series,
                    best_win: int,
                    best_thresh: float):
    """
    Two‐panel plot:
      LEFT  = price + autolabeller shading
      RIGHT = price + area‐based shading
    """
    # 1) true (autolabeller) regimes
    true_array = np.array(
        plot_all_regimes_long(end_point=len(prices),
                              plot_graph=False,
                              inst=inst)
    )
    Ntruth = len(true_array)
    idx    = prices.index[:Ntruth]
    true_regs = pd.Series(true_array, index=idx)

    # 2) predicted regimes (area‐based)
    pred_regs = compute_area_regimes(prices, best_win, best_thresh).iloc[:Ntruth]

    # 3) price segment
    price_seg = np.log(prices.iloc[:Ntruth]).values

    # 4) plot side by side
    fig, axes = plt.subplots(1, 2, figsize=(14, 4), sharey=True)
    for ax, (regs, title) in zip(axes, [
        (true_regs,  "Autolabeller"),
        (pred_regs,  f"Area({best_win}) Regime")
    ]):
        # shade regimes
        for val, col in [(2, "green"), (0, "red")]:
            mask   = regs == val
            starts = mask & ~mask.shift(1, fill_value=False)
            ends   = mask & ~mask.shift(-1, fill_value=False)
            for s, e in zip(np.flatnonzero(starts), np.flatnonzero(ends)):
                ax.axvspan(s, e, color=col, alpha=0.3)
        # overlay price
        ax.plot(price_seg, 'k-', lw=1)
        ax.set_title(f"Instrument {inst} — {title}")
        ax.set_xlabel("Time")
    axes[0].set_ylabel("Price")
    plt.tight_layout()
    plt.show()

def run_all(price_file: str):
    # load all 50 instruments
    df     = pd.read_csv(price_file, sep=r'\s+', header=None)
    n_inst = df.shape[1]

    # grid ranges — feel free to tighten/widen
    area_ws = range(20, 501, 20)            # window sizes 20,40,…,500
    f_list  = np.linspace(0, 1.0, 11).tolist()  # threshold = f * std(area)

    # collect summary
    summary = []

    for inst in range(n_inst):
        prices = df.iloc[:, inst]

        # find best (win, f) for this instrument
        win, f, thresh, score = find_best_params(prices, inst, area_ws, f_list)
        print(f"✔ Inst {inst:2d}: win={win}, f={f:.2f}, thr={thresh:.4f}, acc={score:.3f}")

        # plot autolabeller vs. area‐based
        plot_comparison(inst, prices, win, thresh)

        summary.append((inst, win, f, thresh, score))

    # final summary table
    summary_df = pd.DataFrame(summary,
        columns=["inst","win","f","threshold","accuracy"])
    print("\n=== Optimal Area‐Regime Parameters ===")
    print(summary_df.to_string(index=False))

# ── Usage ──────────────────────────────────────────────────────────────────────
# Just call:
#    run_all("prices.txt")
# and you’ll get, for each instrument:
#  • the optimal window N and threshold-factor f (with accuracy)
#  • a two-panel plot: autolabeller vs. your surface-area regimes
#  • a final summary table of (inst, N, f, thresh, accuracy)


In [None]:
run_all("prices.txt")