In [None]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# ─── CONFIG ────────────────────────────────────────────────────────────────
NUM_INST      = 50      # number of instruments (columns)
EMA_ALPHA     = 0.0095  # smoothing factor for EMA
HORIZON       = 5      # days ahead to predict
SHORT_WIN     = 5       # days for fast slope feature
LONG_WIN      = 30      # days for slow slope & EMA & vol
VOL_WIN       = 30      # days for rolling volatility
TRAIN_START   = 0       # first day to train on
TRAIN_END     = 600     # last day (exclusive) to train on
TEST_START    = 600     # first day to test (inclusive)
TEST_END      = 1000     # last day (exclusive) to test on
N_ESTIMATORS  = 100     # trees in the forest
MAX_FEAT      = "sqrt" # sqrt(m) features per split
RS            = 42      # random seed
MODEL_DIR     = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

# ─── locate prices.txt ─────────────────────────────────────────────────────
cwd = Path.cwd()
for folder in (cwd, *cwd.parents):
    if (folder / "prices.txt").exists():
        PRICES_PATH = folder / "prices.txt"
        break
else:
    raise FileNotFoundError("prices.txt not found")

# ─── HELPERS ────────────────────────────────────────────────────────────────
def norm_slope(arr: np.ndarray) -> float:
    """Min–max normalize indices & values, fit LinearRegression, return slope."""
    m = len(arr)
    x = np.arange(m, dtype=float)
    x = (x - x.min()) / (np.ptp(x) or 1)
    y = (arr - arr.min()) / (np.ptp(arr) or 1)
    return LinearRegression().fit(x.reshape(-1,1), y).coef_[0]


def calc_features(smooth: pd.Series) -> pd.DataFrame:
    """Compute features & labels for the EMA-smoothed series."""
    rets = smooth.pct_change().fillna(0)
    records = []
    n = len(smooth)
    for t in range(max(LONG_WIN, VOL_WIN, HORIZON), n - HORIZON):
        win5  = smooth.iloc[t - SHORT_WIN + 1 : t + 1].values
        win30 = smooth.iloc[t - LONG_WIN  + 1 : t + 1].values
        vol30 = rets.iloc[t - VOL_WIN + 1 : t + 1].std()
        records.append({
            "t":          t,
            "slope_fast": norm_slope(win5),
            "slope_slow": norm_slope(win30),
            "slope_diff": norm_slope(win30) - norm_slope(win5),
            "vol30":      vol30,
            "ret1":       rets.iloc[t],
            "label":      int(smooth.iloc[t + HORIZON] > smooth.iloc[t])
        })
    return pd.DataFrame(records).dropna().reset_index(drop=True)

# ─── TRAIN/TEST PIPELINE ───────────────────────────────────────────────────
def train_and_save():
    df = pd.read_csv(PRICES_PATH, sep=r"\s+", header=None)
    df = df.iloc[:, :NUM_INST]

    for inst in df.columns:
        series = df[inst]
        smooth = series.ewm(alpha=EMA_ALPHA, adjust=False).mean()
        df_feat = calc_features(smooth)

        # select train/test by 't' value
        train_df = df_feat[(df_feat['t'] >= TRAIN_START) & (df_feat['t'] < TRAIN_END)]
        test_df  = df_feat[(df_feat['t'] >= TEST_START)  & (df_feat['t'] < TEST_END)]

        X_train = train_df[["slope_fast","slope_slow","slope_diff","vol30","ret1"]]
        y_train = train_df["label"]
        X_test  = test_df[["slope_fast","slope_slow","slope_diff","vol30","ret1"]]
        y_test  = test_df["label"]

        rf = RandomForestClassifier(
            n_estimators=N_ESTIMATORS,
            max_features=MAX_FEAT,
            random_state=RS
        )
        rf.fit(X_train, y_train)

        y_tr = rf.predict(X_train)
        y_te = rf.predict(X_test)
        print(f"Inst {inst}: Train acc={accuracy_score(y_train,y_tr):.3f}, Test acc={accuracy_score(y_test,y_te):.3f}")

        # safe classification report
        unique = sorted(y_test.unique())
        if len(unique) > 1:
            names = ["Down" if c==0 else "Up" for c in unique]
            print(classification_report(y_test, y_te, labels=unique, target_names=names))
        else:
            print(f"Only one class present in Test for Inst {inst} (class {unique[0]}), skipping classification_report.")

        # save model
        joblib.dump(rf, MODEL_DIR / f"rf_inst{inst}.joblib")

# ─── PLOTTING ON TEST WINDOW ────────────────────────────────────────────────
def plot_test_predictions():
    df = pd.read_csv(PRICES_PATH, sep=r"\s+", header=None)
    df = df.iloc[:, :NUM_INST]

    for inst in df.columns:
        series = df[inst]
        smooth = series.ewm(alpha=EMA_ALPHA, adjust=False).mean()
        df_feat = calc_features(smooth)

        rf = joblib.load(MODEL_DIR / f"rf_inst{inst}.joblib")

        test_df = df_feat[(df_feat['t'] >= TEST_START) & (df_feat['t'] < TEST_END)]
        X_test  = test_df[["slope_fast","slope_slow","slope_diff","vol30","ret1"]]
        preds   = rf.predict(X_test)
        days    = test_df['t'].values

        # build raw pos array for full series
        prices = series.values
        pos = np.zeros(len(prices), dtype=int)
        for sig, t in zip(preds, days):
            pos[t] = 1 if sig==1 else -1
        pos = pd.Series(pos).ffill().values

        # plot
        fig, (ax1, ax2) = plt.subplots(2,1,figsize=(12,8), sharex=True)

        ax1.plot(days, smooth.iloc[days], color="black", label="EMA Price")
        ax1.fill_between(days, smooth.min(), smooth.max(), where=preds==1,
                         facecolor="green", alpha=0.3)
        ax1.fill_between(days, smooth.min(), smooth.max(), where=preds==0,
                         facecolor="red", alpha=0.3)
        ax1.set_ylabel("EMA Price")
        ax1.set_title(f"Inst {inst}: Predictions t={TEST_START}-{TEST_END}")

        all_days = np.arange(len(prices))
        ax2.plot(all_days, prices, color="black", label="Raw Price")
        ax2.fill_between(all_days, prices.min(), prices.max(), where=pos==1,
                         facecolor="green", alpha=0.3)
        ax2.fill_between(all_days, prices.min(), prices.max(), where=pos==-1,
                         facecolor="red", alpha=0.3)
        ax2.set_ylabel("Raw Price")
        ax2.set_xlabel("Day")

        plt.tight_layout()
        plt.show()

# ─── MAIN ──────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    start = time.time()
    train_and_save()
    plot_test_predictions()
    print(f"Done in {time.time()-start:.1f}s")


In [None]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# ─── CONFIG ────────────────────────────────────────────────────────────────
NUM_INST       = 10      # number of instruments (columns)
EMA_ALPHA      = 0.0095  # smoothing factor for EMA
HORIZON        = 5     # days ahead to predict
SHORT_WIN      = 5       # days for fast slope feature
LONG_WIN       = 30      # days for slow slope & EMA & vol
VOL_WIN        = 30      # days for rolling volatility
TRAIN_START    = 0       # first day to train on
TRAIN_END      = 600     # last day (exclusive) to train on
TEST_START     = 600     # first day to test (inclusive)
TEST_END       = 1000     # last day (exclusive) to test on
N_ESTIMATORS   = 100     # trees in the forest
MAX_FEAT       = "sqrt" # sqrt(m) features per split
RS             = 42      # random seed


PROBA_WIN      = 10      # window for regression on predicted probabilities
SLOPE_THRESH   = 0.01   # threshold below which slope is considered flat
MODEL_DIR      = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

# ─── locate prices.txt ─────────────────────────────────────────────────────
cwd = Path.cwd()
for folder in (cwd, *cwd.parents):
    if (folder / "prices.txt").exists():
        PRICES_PATH = folder / "prices.txt"
        break
else:
    raise FileNotFoundError("prices.txt not found")

# ─── HELPERS ────────────────────────────────────────────────────────────────
def norm_slope(arr: np.ndarray) -> float:
    """Min–max normalize indices & values, fit LinearRegression, return slope."""
    m = len(arr)
    x = np.arange(m, dtype=float)
    x = (x - x.min()) / (np.ptp(x) or 1)
    y = (arr - arr.min()) / (np.ptp(arr) or 1)
    return LinearRegression().fit(x.reshape(-1,1), y).coef_[0]


def calc_features(smooth: pd.Series) -> pd.DataFrame:
    """Compute features & labels for the EMA-smoothed series."""
    rets = smooth.pct_change().fillna(0)
    records = []
    n = len(smooth)
    for t in range(max(LONG_WIN, VOL_WIN, HORIZON), n - HORIZON):
        win5  = smooth.iloc[t - SHORT_WIN + 1 : t + 1].values
        win30 = smooth.iloc[t - LONG_WIN  + 1 : t + 1].values
        vol30 = rets.iloc[t - VOL_WIN + 1 : t + 1].std()
        records.append({
            "t":          t,
            "slope_fast": norm_slope(win5),
            "slope_slow": norm_slope(win30),
            "slope_diff": norm_slope(win30) - norm_slope(win5),
            "vol30":      vol30,
            "ret1":       rets.iloc[t],
            "label":      int(smooth.iloc[t + HORIZON] > smooth.iloc[t])
        })
    return pd.DataFrame(records).dropna().reset_index(drop=True)

# ─── TRAIN/TEST PIPELINE ───────────────────────────────────────────────────
def train_and_save():
    df = pd.read_csv(PRICES_PATH, sep=r"\s+", header=None).iloc[:, :NUM_INST]
    for inst in df.columns:
        series = df[inst]
        smooth = series.ewm(alpha=EMA_ALPHA, adjust=False).mean()
        df_feat = calc_features(smooth)

        train_df = df_feat[(df_feat['t']>=TRAIN_START)&(df_feat['t']<TRAIN_END)]
        test_df  = df_feat[(df_feat['t']>=TEST_START)&(df_feat['t']<TEST_END)]

        X_train = train_df[["slope_fast","slope_slow","slope_diff","vol30","ret1"]]
        y_train = train_df["label"]
        X_test  = test_df[["slope_fast","slope_slow","slope_diff","vol30","ret1"]]
        y_test  = test_df["label"]

        rf = RandomForestClassifier(n_estimators=N_ESTIMATORS,
                                    max_features=MAX_FEAT,
                                    random_state=RS)
        rf.fit(X_train, y_train)
        y_tr = rf.predict(X_train)
        y_te = rf.predict(X_test)
        print(f"Inst {inst}: Train acc={accuracy_score(y_train,y_tr):.3f}, "
              f"Test acc={accuracy_score(y_test,y_te):.3f}")
        # skip classification_report if only one class
        if y_test.nunique()>1:
            print(classification_report(y_test, y_te, target_names=["Down","Up"]))
        else:
            print(f"Inst {inst}: single-class test set, skipping report.")

        joblib.dump(rf, MODEL_DIR/f"rf_inst{inst}.joblib")

# ─── PLOTTING WITH PROBABILITY SLOPE DETECTION ──────────────────────────────
def plot_test_predictions():
    df = pd.read_csv(PRICES_PATH, sep=r"\s+", header=None).iloc[:, :NUM_INST]
    for inst in df.columns:
        series = df[inst]
        smooth = series.ewm(alpha=EMA_ALPHA, adjust=False).mean()
        df_feat = calc_features(smooth)
        rf = joblib.load(MODEL_DIR/f"rf_inst{inst}.joblib")

        test_df = df_feat[(df_feat['t']>=TEST_START)&(df_feat['t']<TEST_END)]
        X_test  = test_df[["slope_fast","slope_slow","slope_diff","vol30","ret1"]]
        proba_up= rf.predict_proba(X_test)[:,1]
        days    = test_df['t'].values

        # compute rolling slope on predicted probabilities
        proba_slope = np.full_like(proba_up, np.nan)
        Xw = np.arange(PROBA_WIN).reshape(-1,1)
        lr = LinearRegression()
        for i in range(PROBA_WIN-1, len(proba_up)):
            yw = proba_up[i-PROBA_WIN+1:i+1]
            lr.fit(Xw, yw)
            proba_slope[i] = lr.predict([[PROBA_WIN-1]])[0]

        # detect flat regimes where slope < threshold
        flat_mask = proba_slope < SLOPE_THRESH

        # build raw position from binary preds
        preds = (proba_up>0.5).astype(int)
        prices = series.values
        pos = np.zeros(len(prices),dtype=int)
        for sig,t in zip(preds, days): pos[t] = 1 if sig else -1
        pos = pd.Series(pos).ffill().values

        fig, axs = plt.subplots(3,1,figsize=(12,10), sharex=True)
        # EMA & preds
        axs[0].plot(days, smooth.iloc[days], color='black', label='EMA Price')
        axs[0].fill_between(days, smooth.min(), smooth.max(), where=preds==1,
                            facecolor='green',alpha=0.3)
        axs[0].fill_between(days, smooth.min(), smooth.max(), where=preds==0,
                            facecolor='red',alpha=0.3)
        axs[0].set_ylabel('EMA Price')
        axs[0].set_title(f'Inst {inst}: t={TEST_START}-{TEST_END}')

        # raw price & positions
        all_days = np.arange(len(prices))
        axs[1].plot(all_days, prices, color='black', label='Raw Price')
        axs[1].fill_between(all_days, prices.min(), prices.max(), where=pos==1,
                            facecolor='green',alpha=0.3)
        axs[1].fill_between(all_days, prices.min(), prices.max(), where=pos==-1,
                            facecolor='red',alpha=0.3)
        axs[1].set_ylabel('Raw Price')

        # probability & slope
        axs[2].plot(days, proba_up, color='blue', label='P(up)')
        axs[2].plot(days, proba_slope, color='orange', label='Proba Slope')
        axs[2].fill_between(days, 0,1, where=flat_mask,
                            facecolor='grey',alpha=0.2,label='Flat Regime')
        axs[2].set_ylabel('Prob / Slope')
        axs[2].set_xlabel('Day')
        axs[2].legend(loc='upper left')

        plt.tight_layout()
        plt.show()

# ─── MAIN ──────────────────────────────────────────────────────────────────
if __name__ == '__main__':
    start = time.time()
    train_and_save()
    plot_test_predictions()
    print(f'Done in {time.time()-start:.1f}s')


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ─── Auto-discover prices.txt ──────────────────────────────────────────────────
cwd = Path.cwd()
for folder in (cwd, *cwd.parents):
    if (folder / "prices.txt").exists():
        PRICES_PATH = folder / "prices.txt"
        break
else:
    raise FileNotFoundError("prices.txt not found in cwd or any parent directory")

# ─── Core Functions ────────────────────────────────────────────────────────────
def ma_gradient_signals_from_file(prices_file: Path,
                                  ma_window: int = 50,
                                  regress_window: int = 50,
                                  long_threshold: float = 0.0,
                                  short_threshold: float = 0.0):
    # ... same as before ...
    prices = pd.read_csv(prices_file, sep=r'\s+', header=None)
    T, N   = prices.shape
    ma      = pd.DataFrame(index=prices.index, columns=prices.columns, dtype=float)
    signals = pd.DataFrame(index=prices.index, columns=prices.columns, dtype=int)

    for inst in prices.columns:
        p = prices[inst].values
        m = np.empty(T, dtype=float)
        s = np.zeros(T, dtype=int)
        cumsum = 0.0

        for t in range(T):
            cumsum += p[t]
            if t >= ma_window:
                cumsum -= p[t - ma_window]
                m[t] = cumsum / ma_window
            else:
                m[t] = cumsum / (t + 1)

            if t >= regress_window:
                y = m[t - regress_window + 1 : t + 1]
                x = np.arange(regress_window)
                A = np.vstack([x, np.ones(regress_window)]).T
                slope, _ = np.linalg.lstsq(A, y, rcond=None)[0]
                norm_slope = slope / np.mean(y)

                if norm_slope > long_threshold:
                    s[t] = 1
                elif norm_slope < short_threshold:
                    s[t] = -1
                else:
                    s[t] = 0
            else:
                s[t] = 0

        ma   [inst] = m
        signals[inst] = s

    return ma, signals


def plot_instrument(prices_file: Path,
                    ma: pd.DataFrame,
                    signals: pd.DataFrame,
                    inst: int,
                    ma_window: int,
                    t0: int,
                    t1: int):
    """
    Plot price, MA, and green/red shading for a single instrument index
    over the time window [t0, t1).

    Args:
        prices_file : Path to prices.txt
        ma          : DataFrame of MAs
        signals     : DataFrame of signals
        inst        : instrument column index
        ma_window   : window length for SMA (for label)
        t0          : start timestep (inclusive)
        t1          : end   timestep (exclusive)
    """
    df = pd.read_csv(prices_file, sep=r'\s+', header=None)
    p = df[inst].values[t0:t1]
    m = ma   [inst].values[t0:t1]
    s = signals[inst].values[t0:t1]
    T = t1 - t0
    idx = np.arange(t0, t1)

    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(idx, p, label="Price")
    ax.plot(idx, m, label=f"SMA (window={ma_window})", linestyle="--")

    # shade long = green, short = red
    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == 1), color="green", alpha=0.1, step="pre")
    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == -1), color="red",   alpha=0.1, step="pre")

    ax.set_title(f"Instrument {inst}: Price + MA Signals (t={t0} to {t1})")
    ax.set_xlabel("Time")
    ax.set_ylabel("Price")
    ax.legend()
    plt.show()


# ─── Example Usage ───────────────────────────────────────────────────────────
if __name__ == "__main__":
    ma_window       = 4
    regress_window  = 3
    long_threshold  = 0.0001
    short_threshold = -0.0001

    ma_df, signals_df = ma_gradient_signals_from_file(
        PRICES_PATH,
        ma_window=ma_window,
        regress_window=regress_window,
        long_threshold=long_threshold,
        short_threshold=short_threshold
    )

    # Plot instrument 5 between t=100 and t=300
    plot_instrument(
        PRICES_PATH,
        ma_df,
        signals_df,
        inst=1,
        ma_window=ma_window,
        t0=400,
        t1=550
    )


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ─── Auto-discover prices.txt ──────────────────────────────────────────────────
cwd = Path.cwd()
for folder in (cwd, *cwd.parents):
    if (folder / "prices.txt").exists():
        PRICES_PATH = folder / "prices.txt"
        break
else:
    raise FileNotFoundError("prices.txt not found in cwd or any parent directory")

# ─── Core Functions ────────────────────────────────────────────────────────────
def ma_gradient_signals_from_file(prices_file: Path,
                                  ma_window: int = 50,
                                  regress_window: int = 50,
                                  long_threshold: float = 0.0,
                                  short_threshold: float = 0.0):
    # ... same as before ...
    prices = pd.read_csv(prices_file, sep=r'\s+', header=None)
    T, N   = prices.shape
    ma      = pd.DataFrame(index=prices.index, columns=prices.columns, dtype=float)
    signals = pd.DataFrame(index=prices.index, columns=prices.columns, dtype=int)

    for inst in prices.columns:
        p = prices[inst].values
        m = np.empty(T, dtype=float)
        s = np.zeros(T, dtype=int)
        cumsum = 0.0

        for t in range(T):
            cumsum += p[t]
            if t >= ma_window:
                cumsum -= p[t - ma_window]
                m[t] = cumsum / ma_window
            else:
                m[t] = cumsum / (t + 1)

            if t >= regress_window:
                y = m[t - regress_window + 1 : t + 1]
                x = np.arange(regress_window)
                A = np.vstack([x, np.ones(regress_window)]).T
                slope, _ = np.linalg.lstsq(A, y, rcond=None)[0]
                norm_slope = slope / np.mean(y)

                if norm_slope > long_threshold:
                    s[t] = 1
                elif norm_slope < short_threshold:
                    s[t] = -1
                else:
                    s[t] = 0
            else:
                s[t] = 0

        ma   [inst] = m
        signals[inst] = s

    return ma, signals


def plot_instrument(prices_file: Path,
                    ma: pd.DataFrame,
                    signals: pd.DataFrame,
                    inst: int,
                    ma_window: int,
                    t0: int,
                    t1: int):
    """
    Plot price, MA, and green/red shading for a single instrument index
    over the time window [t0, t1).

    Args:
        prices_file : Path to prices.txt
        ma          : DataFrame of MAs
        signals     : DataFrame of signals
        inst        : instrument column index
        ma_window   : window length for SMA (for label)
        t0          : start timestep (inclusive)
        t1          : end   timestep (exclusive)
    """
    df = pd.read_csv(prices_file, sep=r'\s+', header=None)
    p = df[inst].values[t0:t1]
    m = ma   [inst].values[t0:t1]
    s = signals[inst].values[t0:t1]
    T = t1 - t0
    idx = np.arange(t0, t1)

    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(idx, p, label="Price")
    ax.plot(idx, m, label=f"SMA (window={ma_window})", linestyle="--")

    # shade long = green, short = red
    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == 1), color="green", alpha=0.1, step="pre")
    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == -1), color="red",   alpha=0.1, step="pre")

    ax.set_title(f"Instrument {inst}: Price + MA Signals (t={t0} to {t1})")
    ax.set_xlabel("Time")
    ax.set_ylabel("Price")
    ax.legend()
    plt.show()


# ─── Example Usage ───────────────────────────────────────────────────────────
if __name__ == "__main__":
    ma_window       = 4
    regress_window  = 10
    long_threshold  = 0.0001
    short_threshold = -0.0001

    ma_df, signals_df = ma_gradient_signals_from_file(
        PRICES_PATH,
        ma_window=ma_window,
        regress_window=regress_window,
        long_threshold=long_threshold,
        short_threshold=short_threshold
    )

    # Plot instrument 5 between t=100 and t=300
    plot_instrument(
        PRICES_PATH,
        ma_df,
        signals_df,
        inst=1,
        ma_window=ma_window,
        t0=300,
        t1=350
    )


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ─── Auto-discover prices.txt ─────────────────────────────────────────────────
cwd = Path.cwd()
for folder in (cwd, *cwd.parents):
    if (folder / "prices.txt").exists():
        PRICES_PATH = folder / "prices.txt"
        break
else:
    raise FileNotFoundError("prices.txt not found in cwd or any parent directory")

# ─── Core SMA-gradient function (unchanged) ──────────────────────────────────
def ma_gradient_signals_from_file(prices_file: Path,
                                  ma_window: int = 50,
                                  regress_window: int = 50,
                                  long_threshold: float = 0.0,
                                  short_threshold: float = 0.0):
    prices = pd.read_csv(prices_file, sep=r'\s+', header=None)
    T, N   = prices.shape
    ma      = pd.DataFrame(index=prices.index, columns=prices.columns, dtype=float)
    signals = pd.DataFrame(0,    index=prices.index, columns=prices.columns, dtype=int)

    for inst in prices.columns:
        p = prices[inst].values
        m = np.empty(T, dtype=float)
        s = np.zeros(T, dtype=int)
        cumsum = 0.0

        for t in range(T):
            cumsum += p[t]
            if t >= ma_window:
                cumsum -= p[t - ma_window]
                m[t] = cumsum / ma_window
            else:
                m[t] = cumsum / (t + 1)

            if t >= regress_window:
                y = m[t - regress_window + 1 : t + 1]
                x = np.arange(regress_window)
                A = np.vstack([x, np.ones(regress_window)]).T
                slope, _ = np.linalg.lstsq(A, y, rcond=None)[0]
                norm_slope = slope / np.mean(y)

                if norm_slope > long_threshold:
                    s[t] = 1
                elif norm_slope < short_threshold:
                    s[t] = -1

        ma   [inst] = m
        signals[inst] = s

    return ma, signals


# ─── Combine two signal sets by “AND” agreement ─────────────────────────────
def combined_signals(prices_file: Path,
                     ma_window: int,
                     params1: dict,
                     params2: dict):
    """
    Run ma_gradient twice (with params1 and params2) and only keep
    +1/-1 where both agree; else 0.
    """
    # ensure both use same ma_window
    p1 = params1.copy(); p1['ma_window'] = ma_window
    p2 = params2.copy(); p2['ma_window'] = ma_window

    # get MA and signals for each
    ma1, sig1 = ma_gradient_signals_from_file(prices_file, **p1)
    _,   sig2 = ma_gradient_signals_from_file(prices_file, **p2)

    # intersect: only keep non-zero where sig1 == sig2
    combined = sig1.copy()
    mask_disagree = (sig1 != sig2)
    combined[mask_disagree] = 0

    return ma1, combined


# ─── Plotting (you can reuse your existing one) ──────────────────────────────
def plot_instrument(prices_file: Path,
                    ma: pd.DataFrame,
                    signals: pd.DataFrame,
                    inst: int,
                    ma_window: int,
                    t0: int,
                    t1: int):
    df = pd.read_csv(prices_file, sep=r'\s+', header=None)
    p = df[inst].values[t0:t1]
    m = ma   [inst].values[t0:t1]
    s = signals[inst].values[t0:t1]
    idx = np.arange(t0, t1)

    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(idx, p, label="Price")
    ax.plot(idx, m, label=f"SMA (window={ma_window})", linestyle="--")

    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == 1), color="green", alpha=0.1, step="pre")
    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == -1), color="red",   alpha=0.1, step="pre")

    ax.set_title(f"Instrument {inst}: Price + Consensus Signals (t={t0}–{t1})")
    ax.set_xlabel("Time")
    ax.set_ylabel("Price")
    ax.legend()
    plt.show()


# ─── Example Usage ────────────────────────────────────────────────────────────
if __name__ == "__main__":
    ma_window = 4

    # two parameter sets to “vote”
    params_short = {
        'regress_window': 3,
        'long_threshold':  0.0001,
        'short_threshold': -0.0001
    }
    params_long  = {
        'regress_window': 10,
        'long_threshold':  0.0001,
        'short_threshold': -0.0001
    }

    ema_df, signals_df = combined_signals(
        PRICES_PATH,
        ma_window,
        params_short,
        params_long
    )

    # Plot instrument 33 between t=100 and t=300
    plot_instrument(
        PRICES_PATH,
        ema_df,
        signals_df,
        inst=1,
        ma_window=ma_window,
        t0=300,
        t1=350
    )


In [None]:
#EMA USAGE

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ─── Auto-discover prices.txt ──────────────────────────────────────────────────
cwd = Path.cwd()
for folder in (cwd, *cwd.parents):
    if (folder / "prices.txt").exists():
        PRICES_PATH = folder / "prices.txt"
        break
else:
    raise FileNotFoundError("prices.txt not found in cwd or any parent directory")

# ─── Core Functions ────────────────────────────────────────────────────────────
def ema_gradient_signals_from_file(prices_file: Path,
                                   ema_span: int = 30,
                                   regress_window: int = 30,
                                   long_threshold: float = 0.0,
                                   short_threshold: float = 0.0):
    """
    Compute per-instrument EMA and normalized-gradient signals.

    prices_file   : Path to whitespace-sep price matrix (T×N)
    ema_span      : span parameter for pandas’ EWM
    regress_window: look-back length for linear regression on EMA
    long_threshold: normalized-slope above which we go long (1)
    short_threshold: normalized-slope below which we go short (-1)
    """
    # 1) load prices
    prices = pd.read_csv(prices_file, sep=r'\s+', header=None)

    # 2) compute EMA for each instrument
    ema = prices.ewm(span=ema_span, adjust=False).mean()

    # 3) prepare signals DataFrame
    signals = pd.DataFrame(0, index=prices.index, columns=prices.columns, dtype=int)

    T, _ = prices.shape

    # 4) loop instruments to compute normalized‐slope signals
    for inst in prices.columns:
        m = ema[inst].values
        s = np.zeros(T, dtype=int)

        for t in range(T):
            if t >= regress_window:
                y = m[t - regress_window + 1 : t + 1]
                x = np.arange(regress_window)
                A = np.vstack([x, np.ones(regress_window)]).T
                slope, _ = np.linalg.lstsq(A, y, rcond=None)[0]

                # normalize by mean level of the window
                mean_y = np.mean(y)
                if mean_y != 0:
                    norm_slope = slope / mean_y
                else:
                    norm_slope = 0.0

                if norm_slope > long_threshold:
                    s[t] = 1
                elif norm_slope < short_threshold:
                    s[t] = -1
                # else leave s[t] = 0
            # for t < regress_window, s[t] stays 0

        signals[inst] = s

    return ema, signals


def plot_instrument(prices_file: Path,
                    ema: pd.DataFrame,
                    signals: pd.DataFrame,
                    inst: int,
                    ema_span: int,
                    t0: int,
                    t1: int):
    """
    Plot price, EMA, and green/red shading for a single instrument.
    """
    df = pd.read_csv(prices_file, sep=r'\s+', header=None)
    p = df[inst].values[t0:t1]
    m = ema[inst].values[t0:t1]
    s = signals[inst].values[t0:t1]
    idx = np.arange(t0, t1)

    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(idx, p, label="Price")
    ax.plot(idx, m, label=f"EMA (span={ema_span})", linestyle="--")

    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == 1), color="green", alpha=0.1, step="pre")
    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == -1), color="red",   alpha=0.1, step="pre")

    ax.set_title(f"Instrument {inst}: Price + EMA Signals (t={t0} to {t1})")
    ax.set_xlabel("Time")
    ax.set_ylabel("Price")
    ax.legend()
    plt.show()


# ─── Example Usage ───────────────────────────────────────────────────────────
if __name__ == "__main__":
    ema_span        = 6
    regress_window  = 6
    long_threshold  = 0.0001
    short_threshold = -0.0001

    ema_df, signals_df = ema_gradient_signals_from_file(
        PRICES_PATH,
        ema_span=ema_span,
        regress_window=regress_window,
        long_threshold=long_threshold,
        short_threshold=short_threshold
    )

    # Plot instrument 33 between t=100 and t=300
    plot_instrument(
        PRICES_PATH,
        ema_df,
        signals_df,
        inst=1,
        ema_span=ema_span,
        t0=300,
        t1=350
    )


In [None]:
#Could we apply supertrendline agreement to the above code combining?

# MA : REG
# 8 : 10
# 4 : 10
# 4 : 3

In [None]:
# Debounce for tiebreak

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ─── Auto-discover prices.txt ──────────────────────────────────────────────────
cwd = Path.cwd()
for folder in (cwd, *cwd.parents):
    if (folder / "prices.txt").exists():
        PRICES_PATH = folder / "prices.txt"
        break
else:
    raise FileNotFoundError("prices.txt not found in cwd or any parent directory")

# ─── Core Functions with Debounce ──────────────────────────────────────────────
def ema_gradient_signals_from_file(prices_file: Path,
                                   ema_span: int = 50,
                                   regress_window: int = 50,
                                   long_threshold: float = 0.0,
                                   short_threshold: float = 0.0,
                                   buffer: int = 10):
    """
    Compute per-instrument EMA + normalized-gradient signals with a debounce.

    Once the signal flips (e.g. 0→1 or 1→-1), no further flips are allowed
    until at least `buffer` timesteps have elapsed.

    Args:
      prices_file    : Path to whitespace-sep price matrix (T×N)
      ema_span       : span parameter for pandas’ EWM
      regress_window : look-back length for linear regression on EMA
      long_threshold : normalized-slope above which we go long (+1)
      short_threshold: normalized-slope below which we go short (−1)
      buffer         : minimum bars between regime changes
    """
    prices = pd.read_csv(prices_file, sep=r'\s+', header=None)
    ema    = prices.ewm(span=ema_span, adjust=False).mean()
    T, N   = prices.shape

    signals = pd.DataFrame(0, index=prices.index, columns=prices.columns, dtype=int)

    for inst in prices.columns:
        m = ema[inst].values
        s = np.zeros(T, dtype=int)

        prev_sig     = 0       # last emitted regime
        last_switch  = -buffer # so first switch can happen immediately

        for t in range(T):
            # —— compute raw signal for this bar ——
            raw = 0
            if t >= regress_window:
                y = m[t - regress_window + 1 : t + 1]
                x = np.arange(regress_window)
                A = np.vstack([x, np.ones(regress_window)]).T
                slope, _ = np.linalg.lstsq(A, y, rcond=None)[0]

                mean_y = np.mean(y)
                norm_slope = slope/mean_y if mean_y != 0 else 0.0

                if norm_slope > long_threshold:
                    raw = 1
                elif norm_slope < short_threshold:
                    raw = -1

            # —— apply debounce: only switch if buffer bars passed ——
            if raw != prev_sig and (t - last_switch) >= buffer:
                prev_sig    = raw
                last_switch = t

            s[t] = prev_sig

        signals[inst] = s

    return ema, signals


def plot_instrument(prices_file: Path,
                    ema: pd.DataFrame,
                    signals: pd.DataFrame,
                    inst: int,
                    ema_span: int,
                    t0: int,
                    t1: int):
    """
    Plot price, EMA, and green/red shading for a single instrument.
    """
    df = pd.read_csv(prices_file, sep=r'\s+', header=None)
    p  = df[inst].values[t0:t1]
    m  = ema[inst].values[t0:t1]
    s  = signals[inst].values[t0:t1]
    idx = np.arange(t0, t1)

    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(idx, p, label="Price")
    ax.plot(idx, m, label=f"EMA (span={ema_span})", linestyle="--")

    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == 1), color="green", alpha=0.1, step="pre")
    ax.fill_between(idx, p.min(), p.max(),
                    where=(s == -1), color="red",   alpha=0.1, step="pre")

    ax.set_title(f"Instrument {inst}: Price + Debounced EMA Signals (t={t0}–{t1})")
    ax.set_xlabel("Time")
    ax.set_ylabel("Price")
    ax.legend()
    plt.show()


# ─── Example Usage ────────────────────────────────────────────────────────────
if __name__ == "__main__":
    ema_span        = 6
    regress_window  = 6
    long_threshold  = 0.0001
    short_threshold = -0.0001
    buffer          = 10  # bars to wait between regime switches

    ema_df, signals_df = ema_gradient_signals_from_file(
        PRICES_PATH,
        ema_span=ema_span,
        regress_window=regress_window,
        long_threshold=long_threshold,
        short_threshold=short_threshold,
        buffer=buffer
    )

    # Plot instrument 33 between t=600 and t=700
    plot_instrument(
        PRICES_PATH,
        ema_df,
        signals_df,
        inst=33,
        ema_span=ema_span,
        t0=600,
        t1=700
    )


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def kalman_trend_smoother(prices: np.ndarray,
                          R: float = 1.0,
                          Q_level: float = 1e-3,
                          Q_trend: float = 1e-5):
    """
    Causal Kalman filter for level+trend smoothing.
    
    prices    : 1D array of observed prices
    R         : measurement noise variance
    Q_level   : process noise variance for level
    Q_trend   : process noise variance for trend
    
    Returns
    -------
    smoothed  : 1D array of the same length, the filtered level estimate
    """
    n = len(prices)
    # State: [level; trend]
    x = np.array([prices[0], 0.0])       # initial state estimate
    P = np.eye(2)                        # initial covariance
    
    # State transition and observation matrices
    F = np.array([[1, 1],
                  [0, 1]])
    H = np.array([1, 0]).reshape(1, 2)
    Q = np.diag([Q_level, Q_trend])
    
    smoothed = np.zeros(n)
    
    for t in range(n):
        # 1) Predict
        x_prior = F @ x
        P_prior = F @ P @ F.T + Q
        
        # 2) Update with measurement z = prices[t]
        z = prices[t]
        y = z - (H @ x_prior)[0]               # innovation
        S = (H @ P_prior @ H.T)[0,0] + R       # innovation covariance
        K = (P_prior @ H.T) / S                # Kalman gain (2×1)
        
        x = x_prior + (K.flatten() * y)        # posterior state
        P = (np.eye(2) - K @ H) @ P_prior      # posterior covariance
        
        smoothed[t] = x[0]
    
    return smoothed

def plot_kalman_smoothing(
    instrument: int,
    prices_file: str = 'prices.txt',
    R: float = 1.0,
    Q_level: float = 1e-3,
    Q_trend: float = 1e-5
) -> None:
    """
    Load price series for `instrument` and plot raw vs. Kalman-smoothed trend.
    """
    # ─── Auto-discover prices_file ─────────────────────────────────────────────
    cwd = Path.cwd()
    for folder in (cwd, *cwd.parents):
        candidate = folder / prices_file
        if candidate.exists():
            path = candidate
            break
    else:
        raise FileNotFoundError(f"'{prices_file}' not found in cwd or any parent directory")
    
    # ─── Load data ───────────────────────────────────────────────────────────────
    df = pd.read_csv(path, sep=r'\s+', header=None)
    if not (0 <= instrument < df.shape[1]):
        raise IndexError(f"Instrument index must be between 0 and {df.shape[1]-1}")
    
    prices = df.iloc[:, instrument].values
    smoothed = kalman_trend_smoother(prices, R=R, Q_level=Q_level, Q_trend=Q_trend)
    
    # ─── Plot ───────────────────────────────────────────────────────────────────
    x = np.arange(len(prices))
    plt.figure(figsize=(12, 5))
    plt.plot(x, prices,  label='Raw Price')
    plt.plot(x, smoothed, label='Kalman Smoothed Trend', linewidth=2)
    plt.xlabel('Time Step')
    plt.ylabel('Price')
    plt.title(f'Instrument {instrument}: Raw vs Kalman-Filtered Trend')
    plt.legend()
    plt.tight_layout()
    plt.show()

# Example usage
if __name__ == "__main__":
    # You can tune R, Q_level, Q_trend to balance smoothness vs responsiveness
    plot_kalman_smoothing(
        instrument=0,
        prices_file='prices.txt',
        R=0.075,
        Q_level=1e-5,
        Q_trend=1e-4
    )


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def kalman_trend_smoother(prices: np.ndarray,
                          R: float = 1.0,
                          Q_level: float = 1e-3,
                          Q_trend: float = 1e-5) -> np.ndarray:
    """
    Causal Kalman filter for level+trend smoothing.
    Returns the smoothed level estimate.
    """
    n = len(prices)
    x = np.array([prices[0], 0.0])     # [level, trend]
    P = np.eye(2)
    F = np.array([[1,1],[0,1]])
    H = np.array([[1,0]])
    Q = np.diag([Q_level, Q_trend])

    smoothed = np.zeros(n)
    for t in range(n):
        # predict
        x_prior = F @ x
        P_prior = F @ P @ F.T + Q
        # update
        z = prices[t]
        y = z - (H @ x_prior)[0]
        S = (H @ P_prior @ H.T)[0,0] + R
        K = (P_prior @ H.T) / S
        x = x_prior + (K.flatten() * y)
        P = (np.eye(2) - K @ H) @ P_prior
        smoothed[t] = x[0]

    return smoothed

def plot_kalman_signals_window(
    instrument: int,
    prices_file: str = 'prices.txt',
    R: float = 1.0,
    Q_level: float = 1e-3,
    Q_trend: float = 1e-5,
    T1: int = 200,
    T2: int = 400,
    pos_thres: float = 0.001,
    neg_thres: float = -0.001
) -> None:
    """
    Plot raw price with green/red shading based on the normalized
    Kalman-trend gradient between timesteps T1 and T2.
    """
    # ─── Auto-discover prices_file ──────────────────────────────────────────
    cwd = Path.cwd()
    for folder in (cwd, *cwd.parents):
        candidate = folder / prices_file
        if candidate.exists():
            path = candidate
            break
    else:
        raise FileNotFoundError(f"'{prices_file}' not found")

    # ─── Load data ─────────────────────────────────────────────────────────
    df = pd.read_csv(path, sep=r'\s+', header=None)
    if not (0 <= instrument < df.shape[1]):
        raise IndexError(f"Instrument must be between 0 and {df.shape[1]-1}")

    prices = df.iloc[:, instrument].values
    smoothed = kalman_trend_smoother(prices, R, Q_level, Q_trend)

    # ─── Compute normalized gradient ────────────────────────────────────────
    grad = np.zeros_like(smoothed)
    # fractional change per bar: Δlevel / previous level
    grad[1:] = (smoothed[1:] - smoothed[:-1]) / smoothed[:-1]

    # ─── Slice window ──────────────────────────────────────────────────────
    if T2 is None or T2 > len(prices):
        T2 = len(prices)
    x = np.arange(T1, T2)
    p_slice = prices[T1:T2]
    grad_slice = grad[T1:T2]

    # ─── Determine signals ─────────────────────────────────────────────────
    signals = np.zeros_like(grad_slice, dtype=int)
    signals[grad_slice >  pos_thres] =  1   # long
    signals[grad_slice <  neg_thres] = -1   # short

    # ─── Plot ─────────────────────────────────────────────────────────────
    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(x, p_slice, label='Raw Price')
    ax.plot(x, smoothed[T1:T2], label='Kalman Smoothed', linestyle='--')

    # shade long (green) / short (red)
    ymin, ymax = p_slice.min(), p_slice.max()
    ax.fill_between(x, ymin, ymax,
                    where=(signals ==  1), color='green', alpha=0.1, step='pre')
    ax.fill_between(x, ymin, ymax,
                    where=(signals == -1), color='red',   alpha=0.1, step='pre')

    ax.set_title(f'Instrument {instrument}: Price & Kalman Signals (t={T1}–{T2})')
    ax.set_xlabel('Time Step')
    ax.set_ylabel('Price')
    ax.legend()
    plt.tight_layout()
    plt.show()


# Example usage
if __name__ == "__main__":
    plot_kalman_signals_window(
        instrument=1,
        prices_file='prices.txt',
        R=0.05,
        Q_level=4e-03,
        Q_trend=1e-5  ,
        T1=300,
        T2=350,
        pos_thres=0.001,
        neg_thres=-0.001
    )


In [None]:
#TEMA approach

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def tema_smoother(prices: np.ndarray,
                  span: int = 51) -> np.ndarray:
    """
    Triple Exponential Moving Average (TEMA), causal.
    span : equivalent window length (like your SG window)
    """
    p = pd.Series(prices)
    ema1 = p.ewm(span=span, adjust=False).mean()
    ema2 = ema1.ewm(span=span, adjust=False).mean()
    ema3 = ema2.ewm(span=span, adjust=False).mean()
    # TEMA formula:
    tema = 3 * ema1 - 3 * ema2 + ema3
    return tema.values

def load_prices(prices_file: str, instrument: int) -> np.ndarray:
    cwd = Path.cwd()
    for folder in (cwd, *cwd.parents):
        f = folder / prices_file
        if f.exists():
            df = pd.read_csv(f, sep=r'\s+', header=None)
            break
    else:
        raise FileNotFoundError(f"{prices_file} not found")
    if not (0 <= instrument < df.shape[1]):
        raise IndexError(f"Instrument must be 0–{df.shape[1]-1}")
    return df.iloc[:, instrument].values

def plot_tema_vs_price(
    instrument: int,
    prices_file: str = 'prices.txt',
    span: int = 51,
    t0: int = 200,
    t1: int = 400
):
    prices = load_prices(prices_file, instrument)
    tema   = tema_smoother(prices, span=span)

    x       = np.arange(t0, t1)
    p_slice = prices[t0:t1]
    t_slice = tema[t0:t1]

    plt.figure(figsize=(12, 5))
    plt.plot(x, p_slice, label='Raw Price')
    plt.plot(x, t_slice, '--', label=f'TEMA (span={span})')
    plt.title(f'Instrument {instrument}: Price vs TEMA (t={t0}–{t1})')
    plt.xlabel('Time')
    plt.ylabel('Price')
    plt.legend()
    plt.tight_layout()
    plt.show()

# Example usage
if __name__ == "__main__":
    plot_tema_vs_price(
        instrument=33,
        prices_file='prices.txt',
        span=51,
        t0=200,
        t1=400
    )


In [None]:
#ZLEMA AND HMA testing for lag

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def load_prices(prices_file: str, instrument: int) -> np.ndarray:
    cwd = Path.cwd()
    for folder in (cwd, *cwd.parents):
        candidate = folder / prices_file
        if candidate.exists():
            df = pd.read_csv(candidate, sep=r'\s+', header=None)
            break
    else:
        raise FileNotFoundError(f"'{prices_file}' not found in cwd or parents")
    if not (0 <= instrument < df.shape[1]):
        raise IndexError(f"Instrument must be 0–{df.shape[1]-1}")
    return df.iloc[:, instrument].values

def zlema(prices: np.ndarray, span: int) -> np.ndarray:
    """
    Zero-Lag EMA: EMA of (price + (price - price_lag))
    where price_lag = price.shift(lag), lag = (span-1)/2
    """
    import pandas as pd
    p = pd.Series(prices)
    lag = int((span - 1) // 2)
    adj = p + (p - p.shift(lag)).fillna(0)
    return adj.ewm(span=span, adjust=False).mean().values

def wma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Simple weighted moving average, causal.
    weights = 1,2,…,period
    """
    n = len(prices)
    out = np.full(n, np.nan)
    w = np.arange(1, period+1)
    denom = w.sum()
    for i in range(period-1, n):
        window = prices[i-period+1:i+1]
        out[i] = (w * window).sum() / denom
    out[:period-1] = prices[:period-1]
    return out

def hma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Hull MA: HMA(n) = WMA( 2*WMA(n/2) - WMA(n) , sqrt(n) )
    """
    half = max(1, period//2)
    sqrtp = max(1, int(np.sqrt(period)))
    wma_half = wma(prices, half)
    wma_full = wma(prices, period)
    diff     = 2 * wma_half - wma_full
    return wma(diff, sqrtp)

if __name__ == "__main__":
    prices_file = 'prices.txt'
    instrument  = 41
    span         = 11
    t0, t1       = 200, 400

    prices = load_prices(prices_file, instrument)
    x = np.arange(t0, t1)
    p_slice = prices[t0:t1]

    plt.figure(figsize=(12, 5))
    plt.plot(x, p_slice, label='Raw Price')

    # plot ZLEMA(51)
    z = zlema(prices, span)[t0:t1]
    plt.plot(x, z, '--', label=f'ZLEMA({span})')

    # plot HMA(51)
    h = hma(prices, span)[t0:t1]
    plt.plot(x, h, '-.', label=f'HMA({span})')

    plt.title(f'Instrument {instrument}: Raw vs ZLEMA / HMA (span={span}, t={t0}-{t1})')
    plt.xlabel('Time')
    plt.ylabel('Price')
    plt.legend()
    plt.tight_layout()
    plt.show()


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter

# ─── Auto-discover prices.txt ──────────────────────────────────────────────────
cwd = Path.cwd()
for folder in (cwd, *cwd.parents):
    candidate = folder / "prices.txt"
    if candidate.exists():
        PRICES_PATH = candidate
        break
else:
    raise FileNotFoundError("'prices.txt' not found in cwd or any parent directory")

# ─── Core Functions ────────────────────────────────────────────────────────────
def sg_gradient_signals_from_file(
    prices_file: Path,
    smoothing_window: int = 51,
    polyorder: int = 3,
    regress_window: int = 50,
    long_threshold: float = 0.0,
    short_threshold: float = 0.0
):
    """
    Compute per-instrument Savitzky–Golay smoothed curve and gradient signals.

    prices_file     : Path to whitespace-separated price matrix (T×N)
    smoothing_window: window length for Savitzky–Golay filter (must be odd)
    polyorder       : polynomial order for SG filter
    regress_window  : look-back length for linear regression on smoothed curve
    long_threshold  : normalized-slope above which we go long (+1)
    short_threshold : normalized-slope below which we go short (-1)
    """
    prices = pd.read_csv(prices_file, sep=r'\s+', header=None)
    T, N = prices.shape
    smoothed = pd.DataFrame(index=prices.index, columns=prices.columns, dtype=float)
    signals  = pd.DataFrame(index=prices.index, columns=prices.columns, dtype=int)

    # Ensure smoothing_window is odd and <= T
    if smoothing_window > T:
        smoothing_window = T if T % 2 == 1 else T - 1
    if smoothing_window <= polyorder:
        smoothing_window = polyorder + 2 + ((polyorder + 2) % 2 == 0)

    for inst in prices.columns:
        p = prices[inst].values
        # apply Savitzky–Golay filter
        m = savgol_filter(p, smoothing_window, polyorder, mode='interp')
        s = np.zeros(T, dtype=int)

        # compute slope-based signals
        for t in range(T):
            if t >= regress_window:
                y = m[t - regress_window + 1 : t + 1]
                x = np.arange(regress_window)
                A = np.vstack([x, np.ones(regress_window)]).T
                slope, _ = np.linalg.lstsq(A, y, rcond=None)[0]
                norm_slope = slope / np.mean(y)

                if norm_slope > long_threshold:
                    s[t] = 1
                elif norm_slope < short_threshold:
                    s[t] = -1
                else:
                    s[t] = 0
            else:
                s[t] = 0

        smoothed[inst] = m
        signals[inst]  = s

    return smoothed, signals


def plot_instrument(
    prices_file: Path,
    smoothed: pd.DataFrame,
    signals: pd.DataFrame,
    inst: int,
    smoothing_window: int,
    t0: int,
    t1: int
):
    """
    Plot raw price, Savitzky–Golay smoothed curve, and shaded signals
    for a single instrument over [t0, t1).
    """
    df = pd.read_csv(prices_file, sep=r'\s+', header=None)
    p = df[inst].values[t0:t1]
    m = smoothed[inst].values[t0:t1]
    s = signals[inst].values[t0:t1]
    idx = np.arange(t0, t1)

    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(idx, p, label="Raw Price")
    ax.plot(idx, m, label=f"SG Smooth (window={smoothing_window})", linestyle="--")

    # shade long = green, short = red
    ax.fill_between(idx, p.min(), p.max(), where=(s == 1), color="green", alpha=0.1, step="pre")
    ax.fill_between(idx, p.min(), p.max(), where=(s == -1), color="red",   alpha=0.1, step="pre")

    ax.set_title(f"Instrument {inst}: Price + SG Signals (t={t0} to {t1})")
    ax.set_xlabel("Time")
    ax.set_ylabel("Price")
    ax.legend()
    plt.show()


# ─── Example Usage ───────────────────────────────────────────────────────────
if __name__ == "__main__":
    smoothing_window = 51  # must be odd
    polyorder        = 3
    regress_window   = 50
    long_threshold   = 0.00001
    short_threshold  = -0.0000

    sm_df, sig_df = sg_gradient_signals_from_file(
        PRICES_PATH,
        smoothing_window=smoothing_window,
        polyorder=polyorder,
        regress_window=regress_window,
        long_threshold=long_threshold,
        short_threshold=short_threshold
    )

    # Plot instrument 33 between t=200 and t=400
    plot_instrument(
        PRICES_PATH,
        sm_df,
        sig_df,
        inst=33,
        smoothing_window=smoothing_window,
        t0=200,
        t1=400
    )


In [None]:
#No future peeking new logig using HMA11

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def load_prices(prices_file: str, instrument: int) -> np.ndarray:
    """
    Auto-discovers `prices_file` in cwd or any parent, loads it,
    and returns the price column for `instrument`.
    """
    cwd = Path.cwd()
    for folder in (cwd, *cwd.parents):
        candidate = folder / prices_file
        if candidate.exists():
            df = pd.read_csv(candidate, sep=r'\s+', header=None)
            break
    else:
        raise FileNotFoundError(f"'{prices_file}' not found in cwd or any parent")
    if not (0 <= instrument < df.shape[1]):
        raise IndexError(f"Instrument must be in [0, {df.shape[1]-1}]")
    return df.iloc[:, instrument].values

def wma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Simple weighted moving average, causal.
    weights = 1,2,…,period
    """
    n = len(prices)
    out = np.full(n, np.nan)
    w = np.arange(1, period+1)
    denom = w.sum()
    for i in range(period-1, n):
        window = prices[i-period+1:i+1]
        out[i] = (w * window).sum() / denom
    out[:period-1] = prices[:period-1]
    return out

def hma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Hull MA: HMA(n) = WMA( 2*WMA(n/2) - WMA(n) , sqrt(n) ), all causal.
    """
    half   = max(1, period//2)
    sqrtp  = max(1, int(np.sqrt(period)))
    wma_half = wma(prices, half)
    wma_full = wma(prices, period)
    diff    = 2 * wma_half - wma_full
    return wma(diff, sqrtp)

def plot_hma_signals_window(
    instrument: int,
    prices_file: str = 'prices.txt',
    period: int = 11,
    T1: int = 200,
    T2: int = 400,
    pos_thres: float = 0.001,
    neg_thres: float = -0.001
) -> None:
    """
    Plot raw price + HMA(period) with green/red shading between [T1, T2)
    based on normalized Hull‐MA gradient thresholds.
    """
    # Load price series
    prices = load_prices(prices_file, instrument)
    n = len(prices)
    if T2 is None or T2 > n:
        T2 = n

    # Compute HMA smoothing
    smooth = hma(prices, period)

    # Compute normalized gradient: Δsmooth / previous smooth
    grad = np.zeros(n, dtype=float)
    grad[1:] = (smooth[1:] - smooth[:-1]) / smooth[:-1]

    # Slice for plotting
    x         = np.arange(T1, T2)
    p_slice   = prices[T1:T2]
    s_slice   = smooth[T1:T2]
    grad_slice= grad[T1:T2]

    # Determine signals: +1 long, -1 short, 0 neutral
    signals = np.zeros_like(grad_slice, dtype=int)
    signals[grad_slice >  pos_thres] =  1
    signals[grad_slice <  neg_thres] = -1

    # Plot
    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(x, p_slice,   label='Raw Price')
    ax.plot(x, s_slice,   '--', label=f'HMA({period})', linewidth=2)

    ymin, ymax = p_slice.min(), p_slice.max()
    ax.fill_between(x, ymin, ymax,
                    where=(signals ==  1), color='green', alpha=0.2, step='pre')
    ax.fill_between(x, ymin, ymax,
                    where=(signals == -1), color='red',   alpha=0.2, step='pre')

    ax.set_title(f'Instrument {instrument}: Price + HMA({period}) Signals (t={T1}-{T2})')
    ax.set_xlabel('Time Step')
    ax.set_ylabel('Price')
    ax.legend()
    plt.tight_layout()
    plt.show()

# ─── Example usage ───────────────────────────────────────────────────────────
if __name__ == "__main__":
    plot_hma_signals_window(
        instrument=1,
        prices_file='prices.txt',
        period=6,
        T1=300,
        T2=350,
        pos_thres=0.00001,
        neg_thres=-0.00001
    )


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def load_prices(prices_file: str, instrument: int) -> np.ndarray:
    """
    Auto-discovers `prices_file` in cwd or any parent, loads it,
    and returns the price column for `instrument`.
    """
    cwd = Path.cwd()
    for folder in (cwd, *cwd.parents):
        candidate = folder / prices_file
        if candidate.exists():
            df = pd.read_csv(candidate, sep=r'\s+', header=None)
            break
    else:
        raise FileNotFoundError(f"'{prices_file}' not found in cwd or any parent")
    if not (0 <= instrument < df.shape[1]):
        raise IndexError(f"Instrument must be in [0, {df.shape[1]-1}]")
    return df.iloc[:, instrument].values

def wma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Causal weighted moving average with weights 1..period.
    """
    n = len(prices)
    out = np.full(n, np.nan)
    w = np.arange(1, period + 1)
    S = w.sum()
    for i in range(period-1, n):
        window = prices[i-period+1:i+1]
        out[i] = (w * window).sum() / S
    out[:period-1] = prices[:period-1]
    return out

def hma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Causal Hull Moving Average:
      HMA(n) = WMA(2 * WMA(n/2) - WMA(n), sqrt(n))
    """
    half   = max(1, period//2)
    sqrtp  = max(1, int(np.sqrt(period)))
    h1 = wma(prices, half)
    h2 = wma(prices, period)
    diff = 2*h1 - h2
    return wma(diff, sqrtp)

def plot_hma_binary_signals(
    instrument: int,
    prices_file: str = 'prices.txt',
    period: int = 11,
    T1: int = 200,
    T2: int = 400
) -> None:
    """
    Plot raw price + HMA(period) with binary green/red shading between [T1, T2)
    Long (green) if HMA trend gradient > 0, else Short (red).
    No neutral state, only uses past & present data.
    """
    prices = load_prices(prices_file, instrument)
    n = len(prices)
    T2 = min(T2, n)

    # compute HMA smoothing
    smooth = hma(prices, period)

    # compute normalized gradient: Δsmooth / previous smooth
    grad = np.zeros(n)
    grad[1:] = (smooth[1:] - smooth[:-1]) / smooth[:-1]

    # slice for plotting
    x       = np.arange(T1, T2)
    p_slice = prices[T1:T2]
    s_slice = smooth[T1:T2]
    g_slice = grad[T1:T2]

    # binary signals: +1 long if gradient>0, else -1 short
    signals = np.where(g_slice > 0, 1, -1)

    # plot
    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(x, p_slice, label='Raw Price', color='black')
    ax.plot(x, s_slice, '--', label=f'HMA({period})', color='orange', linewidth=2)

    y0, y1 = p_slice.min(), p_slice.max()
    ax.fill_between(
        x, y0, y1,
        where=(signals==1),
        facecolor='green', alpha=0.2,
        step='pre',
        edgecolor='none',
        interpolate=True
    )
    ax.fill_between(
        x, y0, y1,
        where=(signals==-1),
        facecolor='red', alpha=0.2,
        step='pre',
        edgecolor='none',
        interpolate=True
    )



    ax.set_title(f'Instrument {instrument}: Price + HMA({period}) Binary Signals (t={T1}-{T2})')
    ax.set_xlabel('Time Step')
    ax.set_ylabel('Price')
    ax.legend(loc='upper left')
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    plot_hma_binary_signals(
        instrument=1,
        prices_file='prices.txt',
        period=6,
        T1=300,
        T2=350
    )


In [None]:
#Gridsearch on Kalman trends to find optimal parameters labelled against Savitzky-Golay future looking model

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.signal import savgol_filter
import itertools
import time

def kalman_trend_smoother(prices: np.ndarray,
                          R: float,
                          Q_level: float,
                          Q_trend: float) -> np.ndarray:
    """
    Causal Kalman filter for level+trend smoothing.
    Returns the smoothed level estimate.
    """
    n = len(prices)
    x = np.array([prices[0], 0.0], dtype=float)
    P = np.eye(2)
    F = np.array([[1,1],[0,1]], dtype=float)
    H = np.array([[1,0]], dtype=float)
    Q = np.diag([Q_level, Q_trend])

    smoothed = np.zeros(n, dtype=float)
    for t in range(n):
        # predict
        x_prior = F @ x
        P_prior = F @ P @ F.T + Q

        # update
        z = prices[t]
        y = z - (H @ x_prior)[0]
        S = (H @ P_prior @ H.T)[0,0] + R
        K = (P_prior @ H.T) / S

        x = x_prior + (K.flatten() * y)
        P = (np.eye(2) - K @ H) @ P_prior

        smoothed[t] = x[0]
    return smoothed

def sg_smoother(prices: np.ndarray,
                window: int = 51,
                polyorder: int = 3) -> np.ndarray:
    """Symmetric Savitzky–Golay smoothing."""
    return savgol_filter(prices, window_length=window, polyorder=polyorder, mode='interp')

def load_prices(prices_file: str, instrument: int) -> np.ndarray:
    cwd = Path.cwd()
    for folder in (cwd, *cwd.parents):
        candidate = folder / prices_file
        if candidate.exists():
            df = pd.read_csv(candidate, sep=r'\s+', header=None)
            break
    else:
        raise FileNotFoundError(f"'{prices_file}' not found in cwd or parents")
    if not (0 <= instrument < df.shape[1]):
        raise IndexError(f"Instrument must be 0–{df.shape[1]-1}")
    return df.iloc[:, instrument].values

if __name__ == "__main__":
    # ─── Settings ──────────────────────────────────────────────────────────────
    prices_file  = "prices.txt"
    instrument   = 33

    # SG reference
    sg_window    = 51
    sg_polyorder = 3
    T1, T2       = 200, 400

    # ─── Load data & SG reference ──────────────────────────────────────────────
    prices     = load_prices(prices_file, instrument)
    sg_ref     = sg_smoother(prices, window=sg_window, polyorder=sg_polyorder)
    ref_slice  = sg_ref[T1:T2]

    # ─── Fine grid but ~1/6 the size of the original ───────────────────────────
    # original was 41 points each -> ~69k combos
    # now 22 points each -> 22^3 = 10,648 combos ≈ 1/6 of 69k
    R_vals  = np.linspace(0.05, 0.10, 22)    # around your previously best 0.075
    Ql_vals = np.linspace(0.002, 0.004, 22)  # around ~0.003
    Qt_vals = np.logspace(-6, -5, 22)        # 1e-6 → 1e-5

    total = len(R_vals) * len(Ql_vals) * len(Qt_vals)
    print(f"Grid search: {total} combinations (~1/6 of full grid)...")

    results = []
    count = 0
    t_start = time.time()

    for R, Ql, Qt in itertools.product(R_vals, Ql_vals, Qt_vals):
        count += 1
        # progress update
        if count == 1 or count % 1000 == 0 or count == total:
            elapsed = time.time() - t_start
            pct = count / total * 100
            print(f"[{count}/{total}] {pct:5.1f}% done, elapsed {elapsed:.1f}s")

        sm = kalman_trend_smoother(prices, R=R, Q_level=Ql, Q_trend=Qt)
        diff = sm[T1:T2] - ref_slice
        mse  = np.mean(diff**2)
        results.append((mse, R, Ql, Qt))

    # ─── Report top 10 ─────────────────────────────────────────────────────────
    results.sort(key=lambda x: x[0])
    print("\nTop 10 Kalman parameter sets matching SG(51,3):")
    print("     MSE       R       Q_level      Q_trend")
    for mse, R, Ql, Qt in results[:10]:
        print(f"{mse:12.3e}  {R:<7.5f}  {Ql:<11.1e}  {Qt:<11.1e}")


In [None]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# ─── CONFIG ────────────────────────────────────────────────────────────────
NUM_INST      = 1      # number of instruments (columns)
EMA_ALPHA     = 0.0095  # smoothing factor for EMA
HORIZON       = 5      # days ahead to predict
SHORT_WIN     = 5       # days for fast slope feature
LONG_WIN      = 30      # days for slow slope & EMA & vol
VOL_WIN       = 30      # days for rolling volatility
TRAIN_START   = 0       # first day to train on
TRAIN_END     = 600     # last day (exclusive) to train on
TEST_START    = 600     # first day to test (inclusive)
TEST_END      = 1000     # last day (exclusive) to test on
N_ESTIMATORS  = 100     # trees in the forest
MAX_FEAT      = "sqrt" # sqrt(m) features per split
RS            = 42      # random seed
MODEL_DIR     = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

# ─── locate prices.txt ─────────────────────────────────────────────────────
cwd = Path.cwd()
for folder in (cwd, *cwd.parents):
    if (folder / "prices.txt").exists():
        PRICES_PATH = folder / "prices.txt"
        break
else:
    raise FileNotFoundError("prices.txt not found")

# ─── HELPERS ────────────────────────────────────────────────────────────────
def norm_slope(arr: np.ndarray) -> float:
    """Min–max normalize indices & values, fit LinearRegression, return slope."""
    m = len(arr)
    x = np.arange(m, dtype=float)
    x = (x - x.min()) / (np.ptp(x) or 1)
    y = (arr - arr.min()) / (np.ptp(arr) or 1)
    return LinearRegression().fit(x.reshape(-1,1), y).coef_[0]


def calc_features(smooth: pd.Series) -> pd.DataFrame:
    """Compute features & labels for the EMA-smoothed series."""
    rets = smooth.pct_change().fillna(0)
    records = []
    n = len(smooth)
    for t in range(max(LONG_WIN, VOL_WIN, HORIZON), n - HORIZON):
        win5  = smooth.iloc[t - SHORT_WIN + 1 : t + 1].values
        win30 = smooth.iloc[t - LONG_WIN  + 1 : t + 1].values
        vol30 = rets.iloc[t - VOL_WIN + 1 : t + 1].std()
        records.append({
            "t":          t,
            "slope_fast": norm_slope(win5),
            "slope_slow": norm_slope(win30),
            "slope_diff": norm_slope(win30) - norm_slope(win5),
            "vol30":      vol30,
            "ret1":       rets.iloc[t],
            "label":      int(smooth.iloc[t + HORIZON] > smooth.iloc[t])
        })
    return pd.DataFrame(records).dropna().reset_index(drop=True)

# ─── TRAIN/TEST PIPELINE ───────────────────────────────────────────────────
def train_and_save():
    df = pd.read_csv(PRICES_PATH, sep=r"\s+", header=None)
    df = df.iloc[:, :NUM_INST]

    for inst in df.columns:
        series = df[inst]
        smooth = series.ewm(alpha=EMA_ALPHA, adjust=False).mean()
        df_feat = calc_features(smooth)

        # select train/test by 't' value
        train_df = df_feat[(df_feat['t'] >= TRAIN_START) & (df_feat['t'] < TRAIN_END)]
        test_df  = df_feat[(df_feat['t'] >= TEST_START)  & (df_feat['t'] < TEST_END)]

        X_train = train_df[["slope_fast","slope_slow","slope_diff","vol30","ret1"]]
        y_train = train_df["label"]
        X_test  = test_df[["slope_fast","slope_slow","slope_diff","vol30","ret1"]]
        y_test  = test_df["label"]

        rf = RandomForestClassifier(
            n_estimators=N_ESTIMATORS,
            max_features=MAX_FEAT,
            random_state=RS
        )
        rf.fit(X_train, y_train)

        y_tr = rf.predict(X_train)
        y_te = rf.predict(X_test)
        print(f"Inst {inst}: Train acc={accuracy_score(y_train,y_tr):.3f}, Test acc={accuracy_score(y_test,y_te):.3f}")

        # safe classification report
        unique = sorted(y_test.unique())
        if len(unique) > 1:
            names = ["Down" if c==0 else "Up" for c in unique]
            print(classification_report(y_test, y_te, labels=unique, target_names=names))
        else:
            print(f"Only one class present in Test for Inst {inst} (class {unique[0]}), skipping classification_report.")

        # save model
        joblib.dump(rf, MODEL_DIR / f"rf_inst{inst}.joblib")

# ─── PLOTTING ON TEST WINDOW ────────────────────────────────────────────────
def plot_test_predictions():
    df = pd.read_csv(PRICES_PATH, sep=r"\s+", header=None)
    df = df.iloc[:, :NUM_INST]

    for inst in df.columns:
        series = df[inst]
        smooth = series.ewm(alpha=EMA_ALPHA, adjust=False).mean()
        df_feat = calc_features(smooth)

        rf = joblib.load(MODEL_DIR / f"rf_inst{inst}.joblib")

        test_df = df_feat[(df_feat['t'] >= TEST_START) & (df_feat['t'] < TEST_END)]
        X_test  = test_df[["slope_fast","slope_slow","slope_diff","vol30","ret1"]]
        preds   = rf.predict(X_test)
        days    = test_df['t'].values

        # build raw pos array for full series
        prices = series.values
        pos = np.zeros(len(prices), dtype=int)
        for sig, t in zip(preds, days):
            pos[t] = 1 if sig==1 else -1
        pos = pd.Series(pos).ffill().values

        # plot
        fig, (ax1, ax2) = plt.subplots(2,1,figsize=(12,8), sharex=True)

        ax1.plot(days, smooth.iloc[days], color="black", label="EMA Price")
        ax1.fill_between(days, smooth.min(), smooth.max(), where=preds==1,
                         facecolor="green", alpha=0.3)
        ax1.fill_between(days, smooth.min(), smooth.max(), where=preds==0,
                         facecolor="red", alpha=0.3)
        ax1.set_ylabel("EMA Price")
        ax1.set_title(f"Inst {inst}: Predictions t={TEST_START}-{TEST_END}")

        all_days = np.arange(len(prices))
        ax2.plot(all_days, prices, color="black", label="Raw Price")
        ax2.fill_between(all_days, prices.min(), prices.max(), where=pos==1,
                         facecolor="green", alpha=0.3)
        ax2.fill_between(all_days, prices.min(), prices.max(), where=pos==-1,
                         facecolor="red", alpha=0.3)
        ax2.set_ylabel("Raw Price")
        ax2.set_xlabel("Day")

        plt.tight_layout()
        plt.show()

# ─── MAIN ──────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    start = time.time()
    train_and_save()
    plot_test_predictions()
    print(f"Done in {time.time()-start:.1f}s")

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def load_prices(prices_file: str, instrument: int) -> np.ndarray:
    """
    Auto-discover `prices_file` in cwd or any parent, read it,
    and return the price series for the given instrument index.
    """
    cwd = Path.cwd()
    for folder in (cwd, *cwd.parents):
        candidate = folder / prices_file
        if candidate.exists():
            df = pd.read_csv(candidate, sep=r'\s+', header=None)
            break
    else:
        raise FileNotFoundError(f"'{prices_file}' not found")

    if not (0 <= instrument < df.shape[1]):
        raise IndexError(f"Instrument must be in [0, {df.shape[1]-1}]")

    return df.iloc[:, instrument].values

def wma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Causal weighted moving average with weights 1,2,...,period.
    """
    n = len(prices)
    out = np.full(n, np.nan)
    weights = np.arange(1, period+1)
    denom = weights.sum()

    # compute WMA for each point using only past data
    for i in range(period-1, n):
        window = prices[i-period+1 : i+1]
        out[i] = (weights * window).sum() / denom

    # for the first period-1 points, just copy the raw price
    out[:period-1] = prices[:period-1]
    return out

def hma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Causal Hull Moving Average:
      HMA(n) = WMA(2*WMA(n/2) - WMA(n), sqrt(n))
    """
    half   = max(1, period // 2)
    sqrt_p = max(1, int(np.sqrt(period)))

    wma_half = wma(prices, half)
    wma_full = wma(prices, period)
    diff     = 2 * wma_half - wma_full

    return wma(diff, sqrt_p)

def zero_lag_hma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Approximate zero-lag correction of HMA by leading it by its own lag.
    """
    h = hma(prices, period)
    lag = (period - 1) // 2
    zl = np.empty_like(h)

    # before we have enough data, just copy HMA
    zl[:lag] = h[:lag]
    # lead the HMA by adding the difference with its lagged value
    zl[lag:] = h[lag:] + (h[lag:] - h[:-lag])

    return zl

if __name__ == "__main__":
    # ─── User parameters ───────────────────────────────────────────────────────
    prices_file = "prices.txt"
    instrument  = 33
    period       = 7   # use HMA(11)
    t0, t1       = 200, 400

    # ─── Load and slice prices ─────────────────────────────────────────────────
    prices = load_prices(prices_file, instrument)
    n = len(prices)
    if t1 > n:
        t1 = n

    x       = np.arange(t0, t1)
    price_slice = prices[t0:t1]

    # ─── Compute HMA and zero-lag HMA ─────────────────────────────────────────
    h = hma(prices, period)
    zl = zero_lag_hma(prices, period)

    # ─── Plot price vs HMA vs zero-lag HMA ────────────────────────────────────
    plt.figure(figsize=(12, 5))
    plt.plot(x, price_slice, label="Raw Price")
    plt.plot(x, h[t0:t1],   "--", label=f"HMA({period})")
    plt.plot(x, zl[t0:t1],  "-.", label=f"Zero-Lag HMA({period})")
    plt.title(f"Instrument {instrument}: Price vs HMA & Zero-Lag HMA (t={t0}–{t1})")
    plt.xlabel("Time Step")
    plt.ylabel("Price")
    plt.legend()
    plt.tight_layout()
    plt.show()


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def load_prices(prices_file: str, instrument: int) -> np.ndarray:
    """
    Auto-discover `prices_file` in cwd or any parent, load it,
    and return the price series for `instrument` (0-based).
    """
    cwd = Path.cwd()
    for folder in (cwd, *cwd.parents):
        candidate = folder / prices_file
        if candidate.exists():
            df = pd.read_csv(candidate, sep=r'\s+', header=None)
            break
    else:
        raise FileNotFoundError(f"'{prices_file}' not found")

    if not (0 <= instrument < df.shape[1]):
        raise IndexError(f"Instrument must be in [0, {df.shape[1]-1}]")

    return df.iloc[:, instrument].values

def wma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Causal weighted moving average with weights 1…period.
    """
    n = len(prices)
    out = np.full(n, np.nan)
    w = np.arange(1, period+1)
    denom = w.sum()
    for i in range(period-1, n):
        window = prices[i-period+1:i+1]
        out[i] = (w * window).sum() / denom
    out[:period-1] = prices[:period-1]
    return out

def hma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Causal Hull Moving Average:
      HMA(n) = WMA(2*WMA(n/2) - WMA(n), sqrt(n))
    """
    half   = max(1, period//2)
    sqrt_p = max(1, int(np.sqrt(period)))
    wma_half = wma(prices, half)
    wma_full = wma(prices, period)
    diff     = 2*wma_half - wma_full
    return wma(diff, sqrt_p)

def zero_lag_hma(prices: np.ndarray, period: int) -> np.ndarray:
    """
    Approximate zero-lag correction of HMA by leading it by its own lag.
    """
    h = hma(prices, period)
    lag = (period - 1)//2
    zl = np.empty_like(h)
    zl[:lag] = h[:lag]
    zl[lag:] = h[lag:] + (h[lag:] - h[:-lag])
    return zl

def plot_zlhma_signals(
    instrument: int,
    prices_file: str = 'prices.txt',
    period: int = 11,
    T1: int = 200,
    T2: int = 400,
    pos_thres: float = 0.0005,
    neg_thres: float = -0.0005
) -> None:
    """
    Plot raw price and zero-lag HMA with green/red/neutral shading
    based on normalized gradient at each t (uses only past & present data).
    """
    prices = load_prices(prices_file, instrument)
    n = len(prices)
    if T2 > n:
        T2 = n

    # compute zero-lag HMA
    smooth = zero_lag_hma(prices, period)

    # normalized gradient: (smooth[t] - smooth[t-1]) / smooth[t-1]
    grad = np.zeros(n, dtype=float)
    grad[1:] = (smooth[1:] - smooth[:-1]) / smooth[:-1]

    # slice for plotting
    x = np.arange(T1, T2)
    p_slice   = prices[T1:T2]
    s_slice   = smooth[T1:T2]
    g_slice   = grad[T1:T2]

    # signals:  1=long, -1=short, 0=neutral
    signals = np.zeros_like(g_slice, dtype=int)
    signals[g_slice >  pos_thres] =  1
    signals[g_slice <  neg_thres] = -1

    # plotting
    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(x, p_slice, label='Raw Price', alpha=0.8)
    ax.plot(x, s_slice, '--', label=f'Zero-Lag HMA({period})', linewidth=2)

    y_min, y_max = p_slice.min(), p_slice.max()
    ax.fill_between(x, y_min, y_max,
                    where=(signals ==  1), color='green', alpha=0.2, step='pre')
    ax.fill_between(x, y_min, y_max,
                    where=(signals == -1), color='red',   alpha=0.2, step='pre')

    ax.set_title(f'Instrument {instrument}: Price + ZL-HMA({period}) Signals (t={T1}-{T2})')
    ax.set_xlabel('Time Step')
    ax.set_ylabel('Price')
    ax.legend()
    plt.tight_layout()
    plt.show()

# Example usage
if __name__ == "__main__":
    plot_zlhma_signals(
        instrument=0,
        prices_file='prices.txt',
        period=70,
        T1=200,
        T2=800,
        pos_thres=0.0000,
        neg_thres=-0.000
    )
