In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def plot_ma_regimes(price_file: str,
                    short_window: int = 20,
                    long_window: int  = 50):

    # 1) load prices
    df = pd.read_csv("../prices.txt", sep=r"\s+", header=None)
    n_inst = df.shape[1]
    dates  = np.arange(df.shape[0])

    def shade_background(ax, regs):
        """Shade red where regs==0, green where regs==2."""
        cur, seg0 = regs[0], dates[0]
        for i, r in enumerate(regs[1:], 1):
            if r != cur:
                color = "red" if cur == 0 else "green"
                ax.axvspan(seg0, dates[i], facecolor=color, alpha=0.3)
                cur, seg0 = r, dates[i]
        # final segment
        color = "red" if cur == 0 else "green"
        ax.axvspan(seg0, dates[-1], facecolor=color, alpha=0.3)

    # 2) loop instruments
    for inst in range(n_inst):
        prices   = df.iloc[:, inst]

        prices   = np.log(prices)

        ma_short = prices.rolling(window=short_window, min_periods=1).mean()
        ma_long  = prices.rolling(window= long_window, min_periods=1).mean()

        # 3) regime: bear=0 when long>short, else bull=2
        regs = np.where(ma_long > ma_short, 0, 2)

        # 4) plot
        fig, ax = plt.subplots(figsize=(10,4))
        ax.plot(dates, prices,   color="black", lw=1, label="Price")
        ax.plot(dates, ma_short, color="blue",  lw=1, label=f"MA{short_window}")
        ax.plot(dates, ma_long,  color="orange",lw=1, label=f"MA{long_window}")
        shade_background(ax, regs)

        ax.set_title(f"Instrument {inst+1} — MA{long_window} vs MA{short_window} regimes")
        ax.set_xlabel("Time")
        ax.set_ylabel("Price")
        ax.legend(loc="upper left")
        plt.tight_layout()
        plt.show()

# ─── Example usage in a notebook ───────────────────────────────────────────────
plot_ma_regimes("prices.txt", short_window=5, long_window=70)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def plot_sma_bull_bear(prices: pd.Series,
                       short_w: int = 5,
                       long_w:  int = 70,
                       use_log: bool = True,
                       show_price: bool = True,
                       title: str | None = None,
                       ax: plt.Axes | None = None) -> plt.Axes:
    """
    Plot SMA-{short_w}/{long_w} crossover regimes.

    Colours:
        • green background = bull  (SMA_short > SMA_long)
        • red   background = bear  (SMA_short ≤ SMA_long)

    Parameters
    ----------
    prices : pd.Series
        Raw price series (index = time; values = float).
    short_w, long_w : int
        SMA windows.  Must satisfy long_w > short_w.
    use_log : bool
        Run the SMAs on log-prices (default) or on raw prices.
    show_price : bool
        Overlay the raw-price curve (grey, secondary-y) for context.
    title : str | None
        Figure title; defaults to “SMA {short}/{long} Regimes”.
    ax : plt.Axes | None
        Optionally pass an existing Axes to draw on; a new Figure/Axes
        is created otherwise.

    Returns
    -------
    ax : plt.Axes
        The Axes that received the plot.
    """
    if long_w <= short_w:
        raise ValueError("long_w must be strictly greater than short_w")

    series = np.log(prices) if use_log else prices
    sma_s  = series.rolling(short_w, min_periods=1).mean()
    sma_l  = series.rolling(long_w,  min_periods=1).mean()
    regime = (sma_s > sma_l).astype(int)      # 1 = bull, 0 = bear

    # ─── set up axes ────────────────────────────────────────────────────
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 4))
    ax2 = ax.twinx() if show_price else None

    # ─── background shading ────────────────────────────────────────────
    switch_idx = np.where(regime.diff().fillna(1).astype(bool))[0]
    starts = np.concatenate(([0], switch_idx))
    ends   = np.concatenate((switch_idx, [len(regime) - 1]))

    for s, e in zip(starts, ends):
        color = "green" if regime.iloc[s] == 1 else "red"
        ax.axvspan(regime.index[s], regime.index[e],
                   alpha=0.12, color=color, linewidth=0)

    # ─── lines: SMAs (+ raw price) ──────────────────────────────────────
    ax.plot(sma_s.index, sma_s,  label=f"SMA {short_w}", lw=1.5)
    ax.plot(sma_l.index, sma_l,  label=f"SMA {long_w}",  lw=1.5, ls="--")
    ax.set_ylabel("log-price (SMA)" if use_log else "price (SMA)")
    ax.legend(loc="upper left")

    if show_price:
        ax2.plot(prices.index, prices, color="grey", alpha=0.6, lw=1,
                 label="Price")
        ax2.set_ylabel("price")
        ax2.legend(loc="upper right")

    ax.set_title(title or f"SMA {short_w}/{long_w} Bull–Bear Regimes")
    ax.grid(ls=":", alpha=0.4)

    return ax


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# one instrument from prices.txt
price_df = pd.read_csv("../prices.txt", sep=r"\s+", header=None)
price_series = price_df.iloc[:, 0]          # instrument 0

plot_sma_bull_bear(price_series,
                   short_w=5, long_w=70,
                   use_log=True)
plt.show()


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long  # your autolabeller

def compute_kalman_regimes(prices: pd.Series,
                           process_var:     float,
                           measurement_var: float,
                           drop_last:       int = 10) -> (np.ndarray, np.ndarray):
    """
    1D Kalman filter trend on log-prices.
    State x_t = trend estimate; measurement z_t = log-price.
    process_var = Q; measurement_var = R.
    Regime = 2 (bull) if logP_t > trend_t, else 0 (bear).
    Trims last `drop_last` points and returns (regs, valid_mask).
    """
    logp = np.log(prices).values
    n    = len(logp)

    x_est = np.zeros(n)  # filtered trend
    P     = np.zeros(n)  # estimate variance

    # initialize
    x_est[0] = logp[0]
    P[0]     = 1.0

    for t in range(1, n):
        # predict
        x_pred = x_est[t-1]
        P_pred = P[t-1] + process_var

        # update
        K         = P_pred / (P_pred + measurement_var)
        x_est[t]  = x_pred + K * (logp[t] - x_pred)
        P[t]      = (1 - K) * P_pred

    # regime: bull if price above trend, else bear
    regs_full = np.where(logp > x_est, 2, 0)

    # trim tail to match labeller (T - drop_last)
    N         = n - drop_last
    regs_trim = regs_full[:N]

    # no NaNs here, so all True
    valid     = np.ones_like(regs_trim, dtype=bool)
    return regs_trim, valid

In [None]:
# ─── Kalman-regime visualiser ────────────────────────────────────────────
import matplotlib.pyplot as plt
import pandas as pd

def plot_kalman_regimes(price_file:      str,
                        inst:            int   = 0,
                        process_var:     float = 0.01,
                        measurement_var: float = 10.0,
                        drop_last:       int   = 10,
                        ax=None) -> None:
    """
    Plot price history coloured by Kalman regimes
    (green = bull, red = bear).

    Parameters
    ----------
    price_file : str
        Path to the whitespace-separated prices.txt file.
    inst : int, default 0
        Instrument column to plot.
    process_var : float, default 0.01    (Q)
    measurement_var : float, default 10  (R)
    drop_last : int, default 10
        Trim off the last few points to match your labeller’s length.
    ax : matplotlib axis, optional
        Pass an axis to embed in an existing figure; otherwise
        a new figure is created.
    """
    # 1) load prices
    df      = pd.read_csv(price_file, sep=r"\s+", header=None)
    prices  = df.iloc[:, inst]

    # 2) Kalman regimes
    regs, _ = compute_kalman_regimes(prices,
                                     process_var,
                                     measurement_var,
                                     drop_last)
    prices  = prices.iloc[:len(regs)]      # align lengths

    # 3) build the plot
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 4))

    # iterate over contiguous regime segments
    start = 0
    for i in range(1, len(regs) + 1):
        if i == len(regs) or regs[i] != regs[i - 1]:
            segment = prices.iloc[start:i]
            colour  = "green" if regs[start] == 2 else "red"
            ax.plot(segment.index, segment.values, color=colour, linewidth=1.2)
            start = i

    ax.set(
        title = f"Kalman regimes (inst={inst}, Q={process_var}, R={measurement_var})",
        xlabel = "Time index",
        ylabel = "Price"
    )
    ax.grid(True)
    plt.show()

# ─── Quick demo ──────────────────────────────────────────────────────────
# plot_kalman_regimes("prices.txt", inst=0)


In [None]:
# Just run this in another cell
plot_kalman_regimes(
    "../prices.txt",
    inst=0,          # pick a different column if you like
    process_var=0.01,
    measurement_var=10.0,
    drop_last=10
)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from precision_labeller import plot_all_regimes_long

def compute_zscore_regimes(prices: pd.Series, window: int, threshold: float, drop_last: int = 10):
    ma = prices.rolling(window=window, min_periods=window).mean()
    sd = prices.rolling(window=window, min_periods=window).std()
    z = (prices - ma) / sd

    regs_full = np.where(z > threshold, 2,
                         np.where(z < -threshold, 0, 1))

    N = len(prices) - drop_last
    regimes = regs_full[:N]
    z_trim = z[:N]

    valid = ~np.isnan(z_trim)
    return regimes[valid], valid


def plot_regime_comparison(price_file: str,
                           inst: int,
                           best_window: int,
                           best_threshold: float,
                           drop_last: int = 10):
    """
    Plots a side-by-side comparison of:
    - True autolabelled regimes
    - Z-score predicted regimes
    
    Parameters:
        - price_file: path to prices.txt
        - inst: instrument index (0 to 49)
        - best_window: optimal Z-score window
        - best_threshold: optimal Z-score threshold
        - drop_last: number of future days removed from the label
    """
    df = pd.read_csv("prices.txt", sep=r"\s+", header=None)
    prices = df.iloc[:, inst]
    T = len(prices)

    # Get true regimes (autolabeller)
    true_regs = plot_all_regimes_long(end_point=T, plot_graph=False, inst=inst)
    true_regs = np.array(true_regs[:T - drop_last])  # length 740

    # Compute predicted regimes
    pred_regs, valid_mask = compute_zscore_regimes(prices, best_window, best_threshold, drop_last)

    # Align both to valid_mask
    true_trimmed = true_regs[valid_mask]
    pred_trimmed = pred_regs  # already aligned to valid

    time = np.arange(len(true_trimmed))

    # ————— Plotting —————
    fig, axes = plt.subplots(2, 1, figsize=(15, 5), sharex=True)

    def plot_regime(axes, regime_array, title):
        for i in range(len(regime_array)):
            color = 'green' if regime_array[i] == 2 else 'red'
            axes.axvspan(i, i+1, color=color, alpha=0.5)
        axes.set_title(title)
        axes.set_yticks([])
        axes.set_xlim([0, len(regime_array)])

    plot_regime(axes[0], true_trimmed, f"Autolabelled Regimes (Instrument {inst})")
    plot_regime(axes[1], pred_trimmed, f"Z-score Regimes → window={best_window}, threshold={best_threshold}")

    plt.xlabel("Time")
    plt.tight_layout()
    plt.show()


In [None]:
plot_regime_comparison(
    price_file="prices.txt",
    inst=24,                          # change to view others
    best_window=80,                 # fill in your best found
    best_threshold=0.5,             # fill in your best found
    drop_last=10
)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from precision_labeller import plot_all_regimes_long


def compute_zscore_regimes(prices: pd.Series, window: int, threshold: float, drop_last: int = 10):
    ma = prices.rolling(window=window, min_periods=window).mean()
    sd = prices.rolling(window=window, min_periods=window).std()
    z = (prices - ma) / sd

    regs_full = np.where(z > threshold, 2,
                         np.where(z < -threshold, 0, 1))

    N = len(prices) - drop_last
    regimes = regs_full[:N]
    z_trim = z[:N]

    valid = ~np.isnan(z_trim)
    return regimes[valid], valid


def plot_regime_comparison_with_price(price_file: str,
                                      inst: int,
                                      best_window: int,
                                      best_threshold: float,
                                      drop_last: int = 10):
    """
    Plots:
    - price curve with colored Z-score regime background
    - autolabelled regimes (color blocks)
    - Z-score predicted regimes (color blocks)
    """

    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    prices_full = df.iloc[:, inst]
    T = len(prices_full)

    # Labelled regimes from precision_labeller
    true_regs = plot_all_regimes_long(end_point=T, plot_graph=False, inst=inst)
    true_regs = np.array(true_regs[:T - drop_last])  # len = 740

    # Predicted z-score regimes and valid mask
    pred_regs, valid_mask = compute_zscore_regimes(prices_full, best_window, best_threshold, drop_last)

    # Final aligned data
    price_trimmed = prices_full.iloc[:T - drop_last].to_numpy()[valid_mask]
    true_trimmed = true_regs[valid_mask]
    pred_trimmed = pred_regs
    time = np.arange(len(price_trimmed))

    # ——— Plot setup ———
    fig, axes = plt.subplots(3, 1, figsize=(15, 8), sharex=True)

    def shade_regions(ax, regimes, label=""):
        for i, regime in enumerate(regimes):
            color = 'green' if regime == 2 else 'red' if regime == 0 else 'gray'
            ax.axvspan(i, i + 1, color=color, alpha=0.3)
        ax.set_yticks([])
        if label:
            ax.set_title(label)

    # — Price Plot with Z-score background —
    shade_regions(axes[0], pred_trimmed)
    axes[0].plot(time, price_trimmed, color='black', linewidth=1.5)
    axes[0].set_title(f"Price (Instrument {inst}) with Z-score Regimes")

    # — Autolabelled —
    shade_regions(axes[1], true_trimmed, label="Autolabelled Regimes")

    # — Z-score prediction —
    shade_regions(axes[2], pred_trimmed, label=f"Z-score Predicted Regimes (win={best_window}, thresh={best_threshold})")

    axes[2].set_xlabel("Time Step")
    plt.tight_layout()
    plt.show()


In [None]:
plot_regime_comparison_with_price(
    price_file="prices.txt",
    inst=5,
    best_window=84,          # replace with your found best
    best_threshold=0.75,      # replace with your found best
    drop_last=10
)


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long


def compute_zscore_regimes(prices: pd.Series, window: int, threshold: float, drop_last: int = 10):
    ma = prices.rolling(window=window, min_periods=window).mean()
    sd = prices.rolling(window=window, min_periods=window).std()
    z = (prices - ma) / sd

    regs_full = np.where(z > threshold, 2,
                         np.where(z < -threshold, 0, 1))

    N = len(prices) - drop_last
    regimes = regs_full[:N]
    z_trim = z[:N]

    valid = ~np.isnan(z_trim)
    return regimes[valid], valid


def per_inst_grid_search(df_prices, inst: int, endpoint: int,
                         window_range: range, threshold_range: list,
                         drop_last: int = 10) -> dict:
    prices = df_prices.iloc[:endpoint, inst]
    true_regs = plot_all_regimes_long(
        end_point=endpoint,
        plot_graph=False,
        inst=inst
    )[:endpoint - drop_last]

    best_score = -1
    best_params = {"window": None, "threshold": None, "accuracy": None}

    for w in window_range:
        for th in threshold_range:
            pred, valid = compute_zscore_regimes(prices, w, th, drop_last)
            true_trimmed = np.array(true_regs)[valid]

            mask = pred != 1
            pred_bin = pred[mask]
            true_bin = true_trimmed[mask]

            if len(pred_bin) == 0:
                continue

            acc = accuracy_score(true_bin, pred_bin)

            if acc > best_score:
                best_score = acc
                best_params = {
                    "window": w,
                    "threshold": th,
                    "accuracy": acc
                }

    return best_params


def compare_grid_search_two_endpoints(price_file: str,
                                      endpoint1: int,
                                      endpoint2: int,
                                      window_range: range,
                                      threshold_range: list,
                                      drop_last: int = 10,
                                      n_inst: int = 50) -> pd.DataFrame:
    df_prices = pd.read_csv(price_file, sep=r"\s+", header=None)
    records = []

    for inst in range(n_inst):
        print(f"\n🔍 Instrument {inst}")

        best1 = per_inst_grid_search(df_prices, inst, endpoint1, window_range, threshold_range, drop_last)
        best2 = per_inst_grid_search(df_prices, inst, endpoint2, window_range, threshold_range, drop_last)

        print(f"  ▶ endpoint {endpoint1}: w={best1['window']} | th={best1['threshold']} | acc={best1['accuracy']:.4f}")
        print(f"  ▶ endpoint {endpoint2}: w={best2['window']} | th={best2['threshold']} | acc={best2['accuracy']:.4f}")

        records.append({
            "instrument": inst,
            "w_ep1": best1['window'],
            "th_ep1": best1['threshold'],
            "acc_ep1": best1['accuracy'],
            "w_ep2": best2['window'],
            "th_ep2": best2['threshold'],
            "acc_ep2": best2['accuracy'],
            "Δ_window": best2['window'] - best1['window'],
            "Δ_thresh": best2['threshold'] - best1['threshold'],
            "Δ_acc": best2['accuracy'] - best1['accuracy']
        })

    df_compare = pd.DataFrame(records)
    return df_compare


In [None]:
df_summary = compare_grid_search_two_endpoints(
    price_file="prices.txt",
    endpoint1=500,
    endpoint2=740,
    window_range=range(10, 201, 10),
    threshold_range=[0.5, 1.0, 1.5, 2.0],
    drop_last=10,
    n_inst=50
)

print("\n📊 Summary:")
print(df_summary.to_string(index=False))


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from precision_labeller import plot_all_regimes_long

def compute_zscore_regimes(prices: pd.Series, window: int, threshold: float, drop_last: int = 10):
    ma = prices.rolling(window=window, min_periods=window).mean()
    sd = prices.rolling(window=window, min_periods=window).std()
    z = (prices - ma) / sd

    regs_full = np.where(z > threshold, 2,
                         np.where(z < -threshold, 0, 1))

    N = len(prices) - drop_last
    regimes = regs_full[:N]
    z_trim = z[:N]

    valid = ~np.isnan(z_trim)
    return regimes[valid], valid

def plot_regime_comparison_with_price(price_file: str,
                                      inst: int,
                                      best_window: int,
                                      best_threshold: float,
                                      drop_last: int = 10):
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    prices_full = df.iloc[:, inst]
    T = len(prices_full)

    true_regs = plot_all_regimes_long(end_point=T, plot_graph=False, inst=inst)
    true_regs = np.array(true_regs[:T - drop_last])

    pred_regs, valid_mask = compute_zscore_regimes(prices_full, best_window, best_threshold, drop_last)

    price_trimmed = prices_full.iloc[:T - drop_last].to_numpy()[valid_mask]
    true_trimmed = true_regs[valid_mask]
    pred_trimmed = pred_regs
    time = np.arange(len(price_trimmed))

    fig, axes = plt.subplots(3, 1, figsize=(15, 8), sharex=True)

    def shade_regions(ax, regimes, label=""):
        for i, regime in enumerate(regimes):
            color = 'green' if regime == 2 else 'red' if regime == 0 else 'gray'
            ax.axvspan(i, i + 1, color=color, alpha=0.3)
        ax.set_yticks([])
        if label:
            ax.set_title(label)

    shade_regions(axes[0], pred_trimmed)
    axes[0].plot(time, price_trimmed, color='black', linewidth=1.5)
    axes[0].set_title(f"Price (Instrument {inst}) with Z-score Regimes")

    shade_regions(axes[1], true_trimmed, label="Autolabelled Regimes")
    shade_regions(axes[2], pred_trimmed, label=f"Z-score Predicted Regimes (win={best_window}, thresh={best_threshold})")

    axes[2].set_xlabel("Time Step")
    plt.tight_layout()
    plt.show()

def plot_all_instruments(price_file: str,
                         best_window: int,
                         best_threshold: float,
                         drop_last: int = 10):
    """
    Loops through all instruments in the price_file and
    plots the regime comparison for each.
    """
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    n_inst = df.shape[1]
    for inst in range(n_inst):
        plot_regime_comparison_with_price(price_file, inst, best_window, best_threshold, drop_last)

# Example usage in Jupyter:
# plot_all_instruments("prices.txt", best_window=90, best_threshold=1.0, drop_last=10)


In [None]:
plot_all_instruments(
    price_file="prices.txt",
    best_window=84,       # your chosen optimal window
    best_threshold=0.75,   # your chosen optimal threshold
    drop_last=10
)
