Create folders & a placeholder package

In [1]:
from pathlib import Path

root = Path.cwd()  # you are in .../Finance Data OS/notebooks
pkg_dir = root / "src" / "fdos"
pkg_dir.mkdir(parents=True, exist_ok=True)

# __init__.py
init_py = pkg_dir / "__init__.py"
if not init_py.exists():
    init_py.write_text("", encoding="utf-8")
    print("created:", init_py)
else:
    print("exists:", init_py)

# paths.py (create only if missing)
paths_py = pkg_dir / "paths.py"
if not paths_py.exists():
    paths_py.write_text(
        """from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path

class LakeDiscoveryError(RuntimeError):
    ...

@dataclass(frozen=True)
class LakePaths:
    lake_root: Path

def discover_lake(start: Path | None = None) -> Path:
    \"\"\"Walk upward to find 'lake' that contains our parquet marts. Guard against ./notebooks/lake.\"\"\"
    here = Path.cwd() if start is None else Path(start)
    # guard: if a lake folder lives directly under notebooks, reject it
    bad = here / "lake"
    if bad.exists() and "notebooks" in str(here).lower():
        raise LakeDiscoveryError(f"Refusing to use {bad} — remove accidental 'notebooks/lake'.")
    for cand in [here, *here.parents]:
        lk = cand / "lake"
        if lk.exists():
            return lk
    raise LakeDiscoveryError(f"Could not find a 'lake' folder walking up from {here}")

def feature_mart(lake_root: Path) -> Path:
    return lake_root / "feature_mart.parquet"

def signals_dir(lake_root: Path, output_version: str) -> Path:
    return lake_root / f"signals_mart_{output_version}.parquet"

def backtest_dir(lake_root: Path, output_version: str) -> Path:
    return lake_root / f"backtest_mart_{output_version}"

def summary_path(backtest_dir: Path) -> Path:
    return backtest_dir / "_summary.parquet"

def tuning_path(lake_root: Path) -> Path:
    return lake_root / "tuning_mart.parquet"
""",
        encoding="utf-8",
    )
    print("created:", paths_py)
else:
    print("exists:", paths_py)

# config.py (create only if missing)
config_py = pkg_dir / "config.py"
if not config_py.exists():
    config_py.write_text(
        r"""from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import hashlib, json, yaml  # type: ignore

from .paths import discover_lake, feature_mart, signals_dir, backtest_dir, summary_path, tuning_path

@dataclass(frozen=True)
class Config:
    run_id: str
    lake_root: Path
    output_version: str
    sma_fast: int
    sma_slow: int
    vol_window: int
    vol_threshold_pct: float
    costs_bps: int
    tickers: list[str]
    grids: dict

    @property
    def hash(self) -> str:
        h = {
            "run_id": self.run_id,
            "out": self.output_version,
            "sma_fast": self.sma_fast,
            "sma_slow": self.sma_slow,
            "vol_window": self.vol_window,
            "vol_threshold_pct": self.vol_threshold_pct,
            "costs_bps": self.costs_bps,
            "tickers": self.tickers,
        }
        raw = json.dumps(h, sort_keys=True).encode()
        return hashlib.md5(raw).hexdigest()[:8]

def load_config(yaml_path: str | Path) -> Config:
    ypath = Path(yaml_path)
    data = yaml.safe_load(ypath.read_text(encoding="utf-8"))

    # fill lake_root: if yaml says "\\lake", resolve relative to repo
    lake_root = Path(data.get("lake_root", "lake"))
    if str(lake_root) == r"\\lake":
        # repo root is two levels up from src/fdos when imported
        here = Path(__file__).resolve()
        repo = here.parents[2]  # .../notebooks
        lake_root = (repo / "lake").resolve()

    return Config(
        run_id=data["run_id"],
        lake_root=Path(lake_root).resolve(),
        output_version=data.get("output_version", "v3"),
        sma_fast=int(data["sma_fast"]),
        sma_slow=int(data["sma_slow"]),
        vol_window=int(data["vol_window"]),
        vol_threshold_pct=float(data["vol_threshold_pct"]),
        costs_bps=int(data["costs_bps"]),
        tickers=list(map(str, data["tickers"])),
        grids=data.get("grids", {}),
    )
""",
        encoding="utf-8",
    )
    print("created:", config_py)
else:
    print("exists:", config_py)


exists: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\src\fdos\__init__.py
exists: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\src\fdos\paths.py
exists: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\src\fdos\config.py


In [2]:
from pathlib import Path
import sys

# Find repo root by locating the package we just created
def find_repo_root(start: Path | None = None) -> Path:
    here = Path.cwd() if start is None else Path(start)
    for cand in [here, *here.parents]:
        if (cand / "src" / "fdos" / "__init__.py").exists():
            return cand
    raise RuntimeError(f"Could not find repo root from {here} (looking for src/fdos/__init__.py)")

ROOT = find_repo_root()
SRC = ROOT / "src"
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

print("repo root:", ROOT)
print("sys.path[0]:", sys.path[0])

# Now imports should work
from fdos.config import load_config
from fdos.paths import discover_lake, feature_mart, signals_dir, backtest_dir, summary_path, tuning_path

cfg = load_config(ROOT / "configs" / "base.yaml")
print("RUN:", cfg.run_id, "HASH:", cfg.hash)
print("LAKE (from config):", cfg.lake_root)

bt_dir = backtest_dir(cfg.lake_root, cfg.output_version)
print("feature_mart:", feature_mart(cfg.lake_root))
print("signals_dir :", signals_dir(cfg.lake_root, cfg.output_version))
print("backtest_dir:", bt_dir)
print("summary_path:", summary_path(bt_dir))
print("tuning_path :", tuning_path(cfg.lake_root))


repo root: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks
sys.path[0]: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\src
RUN: week6-base HASH: a427440c
LAKE (from config): C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake
feature_mart: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake\feature_mart.parquet
signals_dir : C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake\signals_mart_v3.parquet
backtest_dir: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake\backtest_mart_v3
summary_path: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake\backtest_mart_v3\_summary.parquet
tuning_path : C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake\tuning_mart.parquet


In [3]:
from fdos.paths import discover_lake
print("discover:", discover_lake())

from fdos.config import load_config
from pathlib import Path
print("config:", load_config(Path.cwd() / "configs" / "base.yaml"))


discover: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake
config: Config(run_id='week6-base', lake_root=WindowsPath('C:/Users/TJs PC/OneDrive/Desktop/Finance Data OS/lake'), output_version='v3', sma_fast=25, sma_slow=100, vol_window=20, vol_threshold_pct=20.0, costs_bps=5, tickers=['AAPL', 'MSFT', 'NVDA', 'TSLA'], grids={'fast': [10, 15, 20, 25, 30], 'slow': [50, 100, 150, 200], 'vol': [15.0, 20.0, 25.0]})


In [4]:
import importlib, fdos.validate as v
importlib.reload(v)
print(v.SCHEMAS["signals_mart_v3"])


<Schema DataFrameSchema(
    columns={
        'date': <Schema Column(name=date, type=DataType(datetime64[ns]))>
        'ticker': <Schema Column(name=ticker, type=DataType(str))>
        'sma_fast': <Schema Column(name=sma_fast, type=DataType(float64))>
        'sma_slow': <Schema Column(name=sma_slow, type=DataType(float64))>
        'long_rule': <Schema Column(name=long_rule, type=DataType(int64))>
        'exit_rule': <Schema Column(name=exit_rule, type=DataType(int64))>
        'high_vol': <Schema Column(name=high_vol, type=DataType(bool))>
    },
    checks=[],
    parsers=[],
    coerce=False,
    dtype=None,
    index=None,
    strict=False,
    name=None,
    ordered=False,
    unique_column_names=False,
    metadata=None, 
    add_missing_columns=False
)>


In [5]:
# 1) reload the module so your notebook sees the new code
import importlib, fdos.validate as v
importlib.reload(v)

# 3) validate feature + signals (adjust the two paths to your machine if needed)
from fdos.paths import discover_lake, feature_mart, signals_dir
from fdos.io import read_parquet
from fdos.config import load_config
from pathlib import Path

cfg = load_config(Path.cwd() / "configs" / "base.yaml")
lake = discover_lake()

fm_path  = feature_mart(lake)
sig_path = signals_dir(lake, cfg.output_version).with_suffix(".parquet")

fm  = read_parquet(fm_path)
sig = read_parquet(sig_path) if sig_path.exists() else None

# Validate feature mart
v.SCHEMAS["feature_mart_v3"].validate(v.quick_sample(fm))
print("[OK] feature_mart_v3 schema passed")

# If you only have a v2 signals file, normalize first
if sig is not None:
    sig = v.normalize_signals_columns(sig)
    v.SCHEMAS["signals_mart_v3"].validate(v.quick_sample(sig))
    print("[OK] signals_mart_v3 schema passed")
else:
    print("[WARN] no signals file found yet")


[OK] feature_mart_v3 schema passed


SchemaError: expected series 'long_rule' to have type int64, got int32

In [None]:
print("Feature columns:", sorted(fm.columns.tolist())[:20])
print("Signals columns:", sorted(sig.columns.tolist())[:20] if sig is not None else None)


In [6]:
# JUPYTER CELL — reload & rebuild signals v3
import importlib
import pandas as pd

from fdos.config import load_config
from fdos.paths import discover_lake, feature_mart, signals_dir
import fdos.signals as s
importlib.reload(s)            # <- pick up your file edits
from fdos.signals import build_signals_v2

from fdos.validate import SCHEMAS, normalize_signals_columns
from fdos.io import write_parquet_safe
from pathlib import Path

cfg = load_config(Path.cwd() / "configs" / "base.yaml")
lake = discover_lake()

fm_path  = feature_mart(lake)
sig_path = signals_dir(lake, cfg.output_version).with_suffix(".parquet")

fm = pd.read_parquet(fm_path)

sig = build_signals_v2(
    fm,
    sma_fast=cfg.sma_fast,
    sma_slow=cfg.sma_slow,
    vol_window=cfg.vol_window,
    vol_threshold_pct=cfg.vol_threshold_pct,
)

# Optional normalization (ensures dtypes/bool/int exactly match schema)
sig_norm = normalize_signals_columns(sig)

# Sanity prints before writing
print("sig head:")
print(sig_norm.head(5))
print("null counts:\n", sig_norm.isna().sum())

# Validate a small sample (fast) — should PASS now
SCHEMAS["signals_mart_v3"].validate(sig_norm.sample(min(1000, len(sig_norm)), random_state=42))

# Atomic write + manifest
write_parquet_safe(
    df=sig_norm,
    path=sig_path,
    schema=SCHEMAS["signals_mart_v3"],
    manifest={"artifact": "signals_mart_v3", "config_hash": cfg.hash},
)

print("[OK] signals_mart_v3 written ->", sig_path)


sig head:
        date ticker   return1  sma_fast  sma_slow  long_rule  exit_rule  \
0 2019-01-30   AAPL  0.068335       NaN       NaN          0          0   
1 2019-01-31   AAPL  0.007202       NaN       NaN          0          0   
2 2019-02-01   AAPL  0.000480       NaN       NaN          0          0   
3 2019-02-04   AAPL  0.028405       NaN       NaN          0          0   
4 2019-02-05   AAPL  0.017110       NaN       NaN          0          0   

   high_vol  
0     False  
1     False  
2     False  
3     False  
4     False  
null counts:
 date           0
ticker         0
return1        0
sma_fast      96
sma_slow     396
long_rule      0
exit_rule      0
high_vol       0
dtype: int64


SchemaError: non-nullable series 'sma_fast' contains null values:
23     NaN
3100   NaN
14     NaN
17     NaN
3103   NaN
19     NaN
3102   NaN
3101   NaN
1562   NaN
1569   NaN
4644   NaN
3105   NaN
15     NaN
8      NaN
1550   NaN
1559   NaN
1561   NaN
3113   NaN
1557   NaN
1554   NaN
4649   NaN
1566   NaN
Name: sma_fast, dtype: float64

In [7]:
import inspect, fdos.signals as s
print("signals.py at:", inspect.getsourcefile(s))
print("validate.py at:", inspect.getsourcefile(v))

print("Using schema object:", v.SCHEMAS["signals_mart_v3"])
print("sig head:\n", sig_norm.head())

from pathlib import Path
p = Path(sig_path)
print("Parent exists?", p.parent.exists(), " | Will write to:", p)


signals.py at: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\src\fdos\signals.py
validate.py at: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\src\fdos\validate.py
Using schema object: <Schema DataFrameSchema(
    columns={
        'date': <Schema Column(name=date, type=DataType(datetime64[ns]))>
        'ticker': <Schema Column(name=ticker, type=DataType(str))>
        'sma_fast': <Schema Column(name=sma_fast, type=DataType(float64))>
        'sma_slow': <Schema Column(name=sma_slow, type=DataType(float64))>
        'long_rule': <Schema Column(name=long_rule, type=DataType(int64))>
        'exit_rule': <Schema Column(name=exit_rule, type=DataType(int64))>
        'high_vol': <Schema Column(name=high_vol, type=DataType(bool))>
    },
    checks=[],
    parsers=[],
    coerce=False,
    dtype=None,
    index=None,
    strict=False,
    name=None,
    ordered=False,
    unique_column_names=False,
    metadata=None, 
    add_missing_columns=False
)>
sig head:
   

In [8]:
from pathlib import Path
from fdos.paths import discover_lake, signals_dir
from fdos.validate import SCHEMAS
import pandas as pd
from fdos.config import load_config

cfg = load_config(Path.cwd() / "configs" / "base.yaml")
lake = discover_lake()
sig_path = signals_dir(lake, cfg.output_version).with_suffix(".parquet")
print("Exists?", Path(sig_path).exists(), "->", sig_path)

sig = pd.read_parquet(sig_path)
print(sig.head())
print("NaNs:", sig.isna().sum().to_dict())

# full validate (can be slower)
SCHEMAS["signals_mart_v3"].validate(sig)
print("[OK] full schema validate passed")


Exists? True -> C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake\signals_mart_v3.parquet
        date ticker   return1   sma_fast   sma_slow  long_rule  exit_rule  \
0 2019-06-21   AAPL -0.003409  44.875113  44.797405          1          0   
1 2019-06-24   AAPL -0.001006  44.967035  44.880564          1          0   
2 2019-06-25   AAPL -0.015158  45.086783  44.953671          1          0   
3 2019-06-26   AAPL  0.021629  45.213439  45.036734          1          0   
4 2019-06-27   AAPL -0.000300  45.376173  45.108399          1          0   

   high_vol  
0     False  
1     False  
2     False  
3     False  
4     False  
NaNs: {'date': 0, 'ticker': 0, 'return1': 0, 'sma_fast': 0, 'sma_slow': 0, 'long_rule': 0, 'exit_rule': 0, 'high_vol': 0}
[OK] full schema validate passed


Cell A — imports + config/paths

In [9]:
from pathlib import Path
import pandas as pd

from fdos.config import load_config
from fdos.paths import discover_lake, signals_dir, backtest_dir, summary_path
from fdos.backtest import run_backtest_with_costs, kpi
from fdos.validate import SCHEMAS
from fdos.io import read_parquet, write_parquet_safe

cfg = load_config(Path.cwd() / "configs" / "base.yaml")
lake = discover_lake()

sig_path = signals_dir(lake, cfg.output_version).with_suffix(".parquet")
bt_dir   = backtest_dir(lake, cfg.output_version)
sum_path = summary_path(bt_dir)

print("Signals file :", sig_path)
print("Backtest dir :", bt_dir)
print("Summary file :", sum_path)


Signals file : C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake\signals_mart_v3.parquet
Backtest dir : C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake\backtest_mart_v3
Summary file : C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\lake\backtest_mart_v3\_summary.parquet


In [10]:
import importlib, fdos.validate as v
importlib.reload(v)


<module 'fdos.validate' from 'C:\\Users\\TJs PC\\OneDrive\\Desktop\\Finance Data OS\\notebooks\\src\\fdos\\validate.py'>

In [11]:
from pathlib import Path
from fdos.config import load_config
from fdos.paths import discover_lake, backtest_dir
from fdos.io import read_parquet, write_parquet_safe
import fdos.trades as t
import importlib; importlib.reload(t)  # in case you just edited trades.py

cfg   = load_config(Path.cwd() / "configs" / "base.yaml")
lake  = discover_lake()
btdir = backtest_dir(lake, cfg.output_version)

daily  = read_parquet(btdir / "_daily.parquet")
trades = t.trades_from_position(daily, cfg.costs_bps)

trades_path = Path(lake) / "trades_mart.parquet"

# NOTE: use the new key name here
v.SCHEMAS["trades_mart_v3"].validate(trades.sample(min(1000, len(trades))))

write_parquet_safe(
    df=trades,
    path=trades_path,
    schema=v.SCHEMAS["trades_mart_v3"],
    manifest={"artifact": "trades_mart", "config_hash": cfg.hash},
)

print("[OK] trades mart written:", trades_path)
trades.head()


ValueError: trades_from_position: missing columns: {'exit_rule', 'long_rule'}

In [12]:
# 1) Reload the module so the notebook sees your fix
import importlib, fdos.validate as v
importlib.reload(v)

# 2) Point pytest at the tests dir (relative to your notebook)
from pathlib import Path
import sys, pytest

repo = Path.cwd()
if not (repo / "notebooks").exists():
    repo = repo.parent  # if you're inside notebooks/, this is a no-op

tests_dir = repo / "notebooks" / "src" / "tests"
if str(repo / "notebooks" / "src") not in sys.path:
    sys.path.insert(0, str(repo / "notebooks" / "src"))

print("Using tests_dir:", tests_dir)

# 3) Run just one test first
pytest.main(["-q", str(tests_dir), "-k", "test_sma_alignment"])


Using tests_dir: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\notebooks\src\tests

..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.NO_TESTS_COLLECTED: 5>

In [13]:
from pathlib import Path
import sys, pytest

def find_repo_root(start: Path | None = None) -> Path:
    """Walk upward until we find a folder that contains notebooks/src/tests."""
    here = Path.cwd() if start is None else Path(start)
    for cand in (here, *here.parents):
        if (cand / "notebooks" / "src" / "tests").exists():
            return cand
    raise FileNotFoundError(f"Could not find 'notebooks/src/tests' walking up from {here}")

# 1) Locate repo root and tests dir once – no more double 'notebooks'
ROOT = find_repo_root()
TESTS_DIR = ROOT / "notebooks" / "src" / "tests"
SRC_DIR = ROOT / "notebooks" / "src"

# 2) Ensure imports work for fdos/*
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

print("ROOT:", ROOT)
print("TESTS_DIR:", TESTS_DIR)
print("SYS.PATH head:", sys.path[:2])

# 3) (Optional) hot-reload validate after your edit so tests see the fix
import importlib, fdos.validate as v
importlib.reload(v)

# 4) Run one test first (fastest feedback)
pytest.main(["-q", str(TESTS_DIR), "-k", "test_sma_alignment"])


ROOT: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks
TESTS_DIR: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\notebooks\src\tests
SYS.PATH head: ['C:\\Users\\TJs PC\\OneDrive\\Desktop\\Finance Data OS\\notebooks\\notebooks\\src', 'C:\\Users\\TJs PC\\OneDrive\\Desktop\\Finance Data OS\\notebooks\\src']

..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.NO_TESTS_COLLECTED: 5>

In [14]:
import importlib, fdos.signals as s
importlib.reload(s)


<module 'fdos.signals' from 'C:\\Users\\TJs PC\\OneDrive\\Desktop\\Finance Data OS\\notebooks\\src\\fdos\\signals.py'>

In [15]:
import pytest
pytest.main(["-q", str(TESTS_DIR), "-k", "test_sma_alignment"])



..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.NO_TESTS_COLLECTED: 5>

In [16]:
one = sig[sig["ticker"] == sig["ticker"].iloc[0]].head(max(60, cfg.sma_slow+5))
one[["date","ticker","return1","sma_fast","sma_slow"]].head(cfg.sma_slow+2)


Unnamed: 0,date,ticker,return1,sma_fast,sma_slow
0,2019-06-21,AAPL,-0.003409,44.875113,44.797405
1,2019-06-24,AAPL,-0.001006,44.967035,44.880564
2,2019-06-25,AAPL,-0.015158,45.086783,44.953671
3,2019-06-26,AAPL,0.021629,45.213439,45.036734
4,2019-06-27,AAPL,-0.000300,45.376173,45.108399
...,...,...,...,...,...
97,2019-11-07,AAPL,0.011541,58.127937,52.114928
98,2019-11-08,AAPL,0.002737,58.454554,52.268552
99,2019-11-11,AAPL,0.007919,58.800591,52.423337
100,2019-11-12,AAPL,-0.000915,59.169929,52.579174


In [17]:
from pathlib import Path
import pandas as pd
from fdos.config import load_config
from fdos.paths import feature_mart
from fdos.signals import build_signals_v2
from fdos.validate import normalize_signals_columns

cfg = load_config(Path.cwd() / "configs" / "base.yaml")
fm  = pd.read_parquet(feature_mart(cfg.lake_root))

sig = build_signals_v2(
    fm,
    sma_fast=cfg.sma_fast,
    sma_slow=cfg.sma_slow,
    vol_window=cfg.vol_window,
    vol_threshold_pct=cfg.vol_threshold_pct,
)
sig = normalize_signals_columns(sig)

# NaNs expected in the leading rows per ticker:
head_block = sig.groupby("ticker").head(cfg.sma_slow)
print("NaNs in first slow-window rows per ticker:")
print(head_block[["sma_fast","sma_slow"]].isna().sum())


NaNs in first slow-window rows per ticker:
sma_fast     96
sma_slow    396
dtype: int64


In [18]:
import importlib, fdos.signals as s
importlib.reload(s)


<module 'fdos.signals' from 'C:\\Users\\TJs PC\\OneDrive\\Desktop\\Finance Data OS\\notebooks\\src\\fdos\\signals.py'>

In [19]:
import importlib, fdos.signals as s
importlib.reload(s)

from pathlib import Path
import pandas as pd
from fdos.config import load_config
from fdos.paths import feature_mart
from fdos.validate import normalize_signals_columns

cfg = load_config(Path.cwd() / "configs" / "base.yaml")
fm = pd.read_parquet(feature_mart(cfg.lake_root))

sig = s.build_signals_v2(
    fm,
    sma_fast=cfg.sma_fast,
    sma_slow=cfg.sma_slow,
    vol_window=cfg.vol_window,
    vol_threshold_pct=cfg.vol_threshold_pct,
)
sig = normalize_signals_columns(sig)

# quick sanity: first slow-window rows per ticker should have NaNs in SMAs
head_block = sig.groupby("ticker").head(cfg.sma_slow)
print(head_block[["sma_fast","sma_slow"]].isna().sum())


sma_fast     96
sma_slow    396
dtype: int64


In [20]:
# 1) Force-reload your module that defines build_signals_v2
import importlib, pandas as pd
from pathlib import Path

import fdos.signals as s
importlib.reload(s)

from fdos.config import load_config
from fdos.paths import feature_mart
from fdos.validate import normalize_signals_columns

cfg = load_config(Path.cwd() / "configs" / "base.yaml")
fm  = pd.read_parquet(feature_mart(cfg.lake_root))

sig = s.build_signals_v2(
    fm,
    sma_fast=cfg.sma_fast,
    sma_slow=cfg.sma_slow,
    vol_window=cfg.vol_window,
    vol_threshold_pct=cfg.vol_threshold_pct,
)
sig = normalize_signals_columns(sig)

head = sig.groupby("ticker").head(cfg.sma_slow).isna().sum().sum()
print("NaNs in first slow-window block across all columns:", head)

print(sig.columns.tolist())
print(sig.dtypes)


NaNs in first slow-window block across all columns: 492
['date', 'ticker', 'return1', 'sma_fast', 'sma_slow', 'long_rule', 'exit_rule', 'high_vol']
date         datetime64[ns]
ticker               object
return1             float64
sma_fast            float64
sma_slow            float64
long_rule             int32
exit_rule             int32
high_vol               bool
dtype: object


In [21]:
import importlib, fdos.signals as s
importlib.reload(s)


<module 'fdos.signals' from 'C:\\Users\\TJs PC\\OneDrive\\Desktop\\Finance Data OS\\notebooks\\src\\fdos\\signals.py'>

In [22]:
import importlib, fdos.signals as s
importlib.reload(s)

from pathlib import Path
import sys, pytest

repo = Path.cwd() if Path.cwd().name == "notebooks" else Path.cwd() / "notebooks"
tests_dir = repo / "src" / "tests"
src_dir   = repo / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

pytest.main(["-q", str(tests_dir), "-k", "test_sma_alignment"])


[32m.[0m[33m                                                                                                            [100%][0m
..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.OK: 0>

In [23]:
# drawdown/rolling
pytest.main(["-q", str(tests_dir), "-k", "test_drawdown_roll"])

# cost-sign
pytest.main(["-q", str(tests_dir), "-k", "test_cost_application_sign"])

# tiny end-to-end
pytest.main(["-q", str(tests_dir), "-k", "test_e2e_small"])

# KPI parity (reads fixture parquet)
pytest.main(["-q", str(tests_dir), "-k", "test_kpi_parity"])


[32m.[0m[33m                                                                                                            [100%][0m
..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)

src/tests/test_drawdown_roll.py::test_drawdown_and_rolling
    d = d.groupby("ticker", group_keys=False).apply(per_ticker)

[31mF[0m[31m                                                                                                            [100%][0m
[31m[1m___________________________________________ test_cost_application_sign ____________________________________________[0m

feature_df =            date ticker       close   return1       sma10      vol20
0    2019-01-30   AAPL   39.319290  0.068335   37....290  338.982001   8.553405
6183 2025-09-05   TSLA  350.839996  0.036363  342.055002   8.564378

[6184 rows x 6 columns]
cfg = Config(run_id='week6-base', lake_root=WindowsPath('C:/Users/TJs PC/OneDrive/Desktop/Finance Data OS

<ExitCode.OK: 0>

In [24]:
from pathlib import Path
from fdos.config import load_config
from fdos.paths import discover_lake, backtest_dir, summary_path
from fdos.io import read_parquet, write_parquet_safe

# 1) Load your current summary (the one you already wrote earlier)
cfg   = load_config(Path.cwd() / "configs" / "base.yaml")
lake  = discover_lake()
btdir = backtest_dir(lake, cfg.output_version)
summary_df = read_parquet(summary_path(btdir))   # this is backtest_mart_v3/_summary.parquet

# 2) Save it as the test fixture the parity test expects
fixtures_dir = Path("notebooks/src/tests/fixtures")
fixtures_dir.mkdir(parents=True, exist_ok=True)
fixture_path = fixtures_dir / "week5_summary.parquet"

write_parquet_safe(
    df=summary_df,
    path=fixture_path,
    manifest={"artifact": "test_fixture", "source": "local summary_v3"}
)
print("Fixture written to:", fixture_path)


Fixture written to: notebooks\src\tests\fixtures\week5_summary.parquet


In [25]:
import pytest, sys
from pathlib import Path

# point pytest at repo tests (same pattern you used before)
repo = Path.cwd() / "notebooks" if Path.cwd().name == "notebooks" else Path.cwd() / "notebooks"
tests_dir = repo / "src" / "tests"

pytest.main(["-q", str(tests_dir), "-k", "test_kpi_parity"])



..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.NO_TESTS_COLLECTED: 5>

In [26]:
pytest.main(["-q", str(tests_dir), "-k", "test_drawdown_roll or test_cost_application_sign or test_e2e_small or test_kpi_parity"])



..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.NO_TESTS_COLLECTED: 5>

In [27]:
import pytest, sys
from pathlib import Path

# point to the repo's notebooks dir (no double "notebooks")
repo = Path.cwd() if Path.cwd().name == "notebooks" else Path.cwd() / "notebooks"
tests_dir = repo / "src" / "tests"
src_dir   = repo / "src"

# ensure src is importable (safe no-op if already there)
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# run just the KPI parity test
pytest.main(["-q", str(tests_dir), "-k", "test_kpi_parity"])


[32m.[0m[33m                                                                                                            [100%][0m
..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.OK: 0>

In [28]:
from pathlib import Path
import pandas as pd

# fdos helpers (already in your repo)
from fdos.config import load_config
from fdos.paths import discover_lake, backtest_dir, summary_path
from fdos.io import read_parquet, write_parquet_safe

# --- locate repo paths (handles running inside notebooks/ or repo root) ---
repo_root = Path.cwd() if Path.cwd().name == "notebooks" else Path.cwd() / "notebooks"
src_dir   = repo_root / "src"
tests_dir = src_dir / "tests"
fixtures_dir = tests_dir / "fixtures"

# also drop a copy "in the fdos folder" as requested
fdos_dir = src_dir / "fdos"

# --- read your latest summary (the v3 summary you already wrote earlier) ---
cfg   = load_config(Path.cwd() / "configs" / "base.yaml")
lake  = discover_lake()
btdir = backtest_dir(lake, cfg.output_version)
summary_df = read_parquet(summary_path(btdir))  # this is notebooks/lake/backtest_mart_v3/_summary.parquet

# --- ensure output folders exist ---
fixtures_dir.mkdir(parents=True, exist_ok=True)
fdos_dir.mkdir(parents=True, exist_ok=True)

# --- write the two copies safely (with tiny manifest) ---
fixture_path = fixtures_dir / "week5_summary.parquet"
fdos_copy    = fdos_dir     / "week5_summary.parquet"

manifest = {"artifact": "test_fixture", "source": "local_summary_v3"}

write_parquet_safe(df=summary_df, path=fixture_path, manifest=manifest)
write_parquet_safe(df=summary_df, path=fdos_copy,    manifest=manifest)

print("[OK] Fixture written to:", fixture_path)
print("[OK] Extra copy written to:", fdos_copy)
print("Exists? ->", fixture_path.exists(), fdos_copy.exists())


[OK] Fixture written to: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\src\tests\fixtures\week5_summary.parquet
[OK] Extra copy written to: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\src\fdos\week5_summary.parquet
Exists? -> True True


In [29]:
import pytest, sys
from pathlib import Path

repo = Path.cwd() if Path.cwd().name == "notebooks" else Path.cwd() / "notebooks"
tests_dir = repo / "src" / "tests"
src_dir   = repo / "src"

# make sure src is importable
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

pytest.main(["-q", str(tests_dir), "-k", "test_kpi_parity"])


[32m.[0m[33m                                                                                                            [100%][0m
..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.OK: 0>

In [30]:
from pathlib import Path
from fdos.config import load_config
from fdos.paths import discover_lake, backtest_dir, summary_path
from fdos.io import read_parquet, write_parquet_safe

# Locate repo (works whether you're in notebooks/ or repo root)
repo = Path.cwd() if Path.cwd().name == "notebooks" else Path.cwd() / "notebooks"

tests_fixtures = repo / "src" / "tests" / "fixtures" / "week5_summary.parquet"   # where you already wrote
root_fixtures  = repo / "fixtures" / "week5_summary.parquet"                     # ALSO write here (fixes relative path)

# Pull your current v3 summary
cfg   = load_config(Path.cwd() / "configs" / "base.yaml")
lake  = discover_lake()
btdir = backtest_dir(lake, cfg.output_version)
summary_df = read_parquet(summary_path(btdir))

# Ensure dirs and write both copies
tests_fixtures.parent.mkdir(parents=True, exist_ok=True)
root_fixtures.parent.mkdir(parents=True, exist_ok=True)
manifest = {"artifact": "test_fixture", "source": "local_summary_v3"}

write_parquet_safe(df=summary_df, path=tests_fixtures, manifest=manifest)
write_parquet_safe(df=summary_df, path=root_fixtures,  manifest=manifest)

print("[OK] wrote:", tests_fixtures)
print("[OK] wrote:", root_fixtures)
print("Exists? ->", tests_fixtures.exists(), root_fixtures.exists())


[OK] wrote: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\src\tests\fixtures\week5_summary.parquet
[OK] wrote: C:\Users\TJs PC\OneDrive\Desktop\Finance Data OS\notebooks\fixtures\week5_summary.parquet
Exists? -> True True


In [31]:
import pytest, sys
from pathlib import Path

repo = Path.cwd() if Path.cwd().name == "notebooks" else Path.cwd() / "notebooks"
tests_dir = repo / "src" / "tests"
src_dir   = repo / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

pytest.main(["-q", str(tests_dir), "-k", "test_kpi_parity"])


[32m.[0m[33m                                                                                                            [100%][0m
..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.OK: 0>

In [32]:
import pytest, sys
from pathlib import Path

repo = Path.cwd() if Path.cwd().name == "notebooks" else Path.cwd() / "notebooks"
tests_dir = repo / "src" / "tests"
src_dir   = repo / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# drawdown/rolling
pytest.main(["-q", str(tests_dir), "-k", "test_drawdown_roll"])

# transaction-cost sign
pytest.main(["-q", str(tests_dir), "-k", "test_cost_application_sign"])

# tiny end-to-end smoke (writes v3 marts + validates)
pytest.main(["-q", str(tests_dir), "-k", "test_e2e_small"])


[32m.[0m[33m                                                                                                            [100%][0m
..\.venv\Lib\site-packages\_pytest\config\__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)

src/tests/test_drawdown_roll.py::test_drawdown_and_rolling
    d = d.groupby("ticker", group_keys=False).apply(per_ticker)

[31mF[0m[31m                                                                                                            [100%][0m
[31m[1m___________________________________________ test_cost_application_sign ____________________________________________[0m

feature_df =            date ticker       close   return1       sma10      vol20
0    2019-01-30   AAPL   39.319290  0.068335   37....290  338.982001   8.553405
6183 2025-09-05   TSLA  350.839996  0.036363  342.055002   8.564378

[6184 rows x 6 columns]
cfg = Config(run_id='week6-base', lake_root=WindowsPath('C:/Users/TJs PC/OneDrive/Desktop/Finance Data OS

<ExitCode.NO_TESTS_COLLECTED: 5>

In [34]:
from pathlib import Path
import sys, importlib, pytest

# 1) Make sure we’re importing your local src/
repo = Path.cwd() if Path.cwd().name == "notebooks" else Path.cwd() / "notebooks"
src_dir  = repo / "src"
tests_dir = src_dir / "tests"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# 2) (optional) reload trades/backtest after your edits
import fdos.trades as t, fdos.backtest as bt
importlib.reload(t); importlib.reload(bt)

# 3) Run only the cost-application test
pytest.main(["-q", str(tests_dir), "-k", "test_cost_application_sign"])


[31mF[0m[31m                                                                                                            [100%][0m
[31m[1m___________________________________________ test_cost_application_sign ____________________________________________[0m

feature_df =            date ticker       close   return1       sma10      vol20
0    2019-01-30   AAPL   39.319290  0.068335   37....290  338.982001   8.553405
6183 2025-09-05   TSLA  350.839996  0.036363  342.055002   8.564378

[6184 rows x 6 columns]
cfg = Config(run_id='week6-base', lake_root=WindowsPath('C:/Users/TJs PC/OneDrive/Desktop/Finance Data OS/lake'), output_ver... 'MSFT', 'NVDA', 'TSLA'], grids={'fast': [10, 15, 20, 25, 30], 'slow': [50, 100, 150, 200], 'vol': [15.0, 20.0, 25.0]})

    [0m[94mdef[39;49;00m[90m [39;49;00m[92mtest_cost_application_sign[39;49;00m(feature_df, cfg):[90m[39;49;00m
        sig = build_signals_v2(feature_df, cfg.sma_fast, cfg.sma_slow, cfg.vol_window, cfg.vol_threshold_pct)[90

<ExitCode.TESTS_FAILED: 1>