In [None]:
# ─── Notebook Setup ────────────────────────────────────────────────────────────
%matplotlib inline
import numpy  as np
import pandas as pd
import matplotlib.pyplot as plt
from pre_proc_labelling__long_eval import plot_all_regimes_long

plt.rcParams["figure.figsize"] = (10, 6)   # a bit larger by default


# ─── PARAMETERS ────────────────────────────────────────────────────────────────
N_INST = 50      # number of instruments
T_FULL = 750     # full history length expected by plot_all_regimes_long
START  = 160     # first timestep both true & preds overlap
END    = 739     # last  timestep both true & preds overlap
SHIFT  = 0      # ← set this to how many days you want to left-shift your preds


# ─── LOAD & THRESHOLD PREDICTIONS ───────────────────────────────────────────────
df_pred      = pd.read_csv("predictions.csv", index_col=0)
pred_matrix  = df_pred.to_numpy()               # shape (50, nt_pred)
pred_regimes = np.where(pred_matrix > 0.72, 0, 2)  # 0=Bear, 2=Bull


# ─── LOAD “TRUE” LABELS via your helper ─────────────────────────────────────────
true_list = [
    plot_all_regimes_long(T_FULL, False, inst_idx)
    for inst_idx in range(N_INST)
]
true2d = np.vstack(true_list)   # shape (50, T_FULL)


# ─── ALIGN & APPLY SHIFT ───────────────────────────────────────────────────────
length        = END - START + 1
true_aligned  = true2d[:,   START:END+1]    # (50, length)
pred_aligned  = pred_regimes[:, :length]    # (50, length)

if SHIFT:
    # Roll left by SHIFT; fill the right‐most SHIFT days with the last valid state
    pred_aligned = np.roll(pred_aligned, -SHIFT, axis=1)
    fill_vals    = pred_aligned[:, -SHIFT-1][:, None]
    pred_aligned[:, -SHIFT:] = np.repeat(fill_vals, SHIFT, axis=1)


dates = np.arange(START, END+1)


# ─── LOAD PRICES ────────────────────────────────────────────────────────────────
price_df = pd.read_csv("prices.txt", sep=r"\s+", header=None)


# ─── PLOTTING FUNCTION ─────────────────────────────────────────────────────────
def plot_inst(inst_idx: int):
    prices = price_df.iloc[START:END+1, inst_idx].values

    def shade(ax, regs):
        cur, seg0 = regs[0], dates[0]
        for i, r in enumerate(regs[1:], 1):
            if r != cur:
                ax.axvspan(seg0, dates[i],
                           facecolor=("green" if cur==2 else "red"),
                           alpha=0.3)
                cur, seg0 = r, dates[i]
        ax.axvspan(seg0, dates[-1],
                   facecolor=("green" if cur==2 else "red"),
                   alpha=0.3)

    fig, (ax1, ax2) = plt.subplots(2,1, sharex=True)

    # True regimes
    ax1.plot(dates, prices, color="black", lw=1)
    shade(ax1, true_aligned[inst_idx])
    ax1.set_ylabel("Price")
    ax1.set_title(f"Inst {inst_idx+1}: TRUE regimes")

    # Predicted regimes (shifted by SHIFT)
    ax2.plot(dates, prices, color="black", lw=1)
    shade(ax2, pred_aligned[inst_idx])
    ax2.set_ylabel("Price")
    ax2.set_xlabel("Time (Days)")
    title = f"Inst {inst_idx+1}: PREDICTED regimes"
    if SHIFT:
        title += f"  (shifted ← {SHIFT} days)"
    ax2.set_title(title)

    plt.tight_layout()
    plt.show()


# ─── LOOP & DISPLAY ALL 50 INSTRUMENTS ─────────────────────────────────────────
for inst in range(N_INST):
    plot_inst(inst)


In [None]:
import pandas as pd
import numpy as np

# 1) Load your CSV (assuming the first column is an index and the rest are floats)
df = pd.read_csv("predictions.csv", index_col=0)

# 2) Flatten to a 1-D array of all values
all_vals = df.values.flatten()

# 3) Round to nearest 0.5
rounded = np.round(all_vals * 20) / 20

# 4) Compute frequencies
freq = pd.Series(rounded).value_counts().sort_index()

print(freq)


In [None]:
# ─── GRID‐SEARCH BEST THRESHOLD ────────────────────────────────────────────────
# (assumes pred_matrix, true_aligned, length, and SHIFT are already defined)

# 1) define the candidate thresholds
thresholds = np.linspace(0.0, 1.0, 101)   # 0.00, 0.01, 0.02, …, 1.00

# 2) collect (thr, accuracy) pairs
results = []
for thr in thresholds:
    # binarize at this threshold
    pred_regs = np.where(pred_matrix > thr, 2, 0)   # shape (50, nt_pred)
    # align to the same window [START:END]
    pred_al = pred_regs[:, :length].copy()
    
    # apply any left‐shift you want (SHIFT defined above)
    if SHIFT:
        pred_al = np.roll(pred_al, -SHIFT, axis=1)
        fill_vals = pred_al[:, -SHIFT-1][:, None]
        pred_al[:, -SHIFT:] = np.repeat(fill_vals, SHIFT, axis=1)
    
    # compute overall accuracy
    acc = (pred_al == true_aligned).mean()  # fraction of matching cells
    results.append((thr, acc))

# 3) build a DataFrame and pick best
res_df = pd.DataFrame(results, columns=["threshold", "accuracy"])
best = res_df.loc[res_df.accuracy.idxmax()]

print(f"👉 Best threshold = {best.threshold:.2f}  with accuracy ≈ {best.accuracy:.4f}")

# 4) optional: display the full table
res_df.head(-30)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def plot_ma_regimes(price_file: str,
                    short_window: int = 20,
                    long_window: int  = 50):
    """
    For each column (instrument) in price_file:
      - compute rolling MA(short_window) & MA(long_window)
      - regime[t] = 0 (bear) if MA_long[t] > MA_short[t], else 2 (bull)
      - plot price with red/green background shading
    """
    # 1) load prices
    df = pd.read_csv(price_file, sep=r"\s+", header=None)
    n_inst = df.shape[1]
    dates  = np.arange(df.shape[0])

    def shade_background(ax, regs):
        """Shade red where regs==0, green where regs==2."""
        cur, seg0 = regs[0], dates[0]
        for i, r in enumerate(regs[1:], 1):
            if r != cur:
                color = "red" if cur == 0 else "green"
                ax.axvspan(seg0, dates[i], facecolor=color, alpha=0.3)
                cur, seg0 = r, dates[i]
        # final segment
        color = "red" if cur == 0 else "green"
        ax.axvspan(seg0, dates[-1], facecolor=color, alpha=0.3)

    # 2) loop instruments
    for inst in range(n_inst):
        prices   = df.iloc[:, inst]
        ma_short = prices.rolling(window=short_window, min_periods=1).mean()
        ma_long  = prices.rolling(window= long_window, min_periods=1).mean()

        # 3) regime: bear=0 when long>short, else bull=2
        regs = np.where(ma_long > ma_short, 0, 2)

        # 4) plot
        fig, ax = plt.subplots(figsize=(10,4))
        ax.plot(dates, prices,   color="black", lw=1, label="Price")
        ax.plot(dates, ma_short, color="blue",  lw=1, label=f"MA{short_window}")
        ax.plot(dates, ma_long,  color="orange",lw=1, label=f"MA{long_window}")
        shade_background(ax, regs)

        ax.set_title(f"Instrument {inst+1} — MA{long_window} vs MA{short_window} regimes")
        ax.set_xlabel("Time")
        ax.set_ylabel("Price")
        ax.legend(loc="upper left")
        plt.tight_layout()
        plt.show()

# ─── Example usage in a notebook ───────────────────────────────────────────────
plot_ma_regimes("prices.txt", short_window=20, long_window=50)
