### Setup

In [2]:
import sys
import os

# Add the project root to Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [3]:
from src.idspy.core.state import State
from src.idspy.core.step import Step, ConditionalStep, FitAwareStep

### Happy path: `provides` present

In [4]:
class MakeSum(Step):
    def __init__(self):
        super().__init__(requires=["data"], provides=["sum"])

    def run(self, state: State) -> None:
        state["sum"] = sum(state["data"])


s = State({"data": [1, 2, 3]})
MakeSum()(s)
s

State(size=2, data={'data': [1, 2, 3], 'sum': 6})

### Error if `provides` missing

In [5]:
class NoopProvides(Step):
    def __init__(self):
        super().__init__(requires=[], provides=["x"])  # claims it provides "x"

    def run(self, state: State) -> None:
        pass  # forgets to set "x"


s = State()
try:
    NoopProvides()(s)
except KeyError as e:
    print(e)  # -> NoopProvides: missing provided keys ['x']

"NoopProvides: missing ['x']"


### Create a `ConditionalStep` that skips when a flag is false

In [None]:
class MaybeNormalize(ConditionalStep):
    def __init__(self):
        super().__init__(requires=["data"], provides=["data"])

    def should_run(self, state: State) -> bool:
        return bool(state.get("normalize", False))

    def run(self, state: State) -> None:
        xs = state["data"]
        m = sum(xs) / len(xs)
        state["data"] = [x - m for x in xs]

    def on_skip(self, state: State) -> None:
        print(f"[skip] {self.name} because normalize flag is False")


s = State({"data": [1, 2, 3], "normalize": False})
MaybeNormalize()(s)  # skipped → prints message
s.to_dict()
# {'data': [1, 2, 3], 'normalize': False}


[skip] MaybeNormalize because normalize flag is False


{'data': [-1.0, 0.0, 1.0], 'normalize': True}

In [7]:
s = State({"data": [1, 2, 3], "normalize": True})
MaybeNormalize()(s)  # runs
s.to_dict()
# {'data': [-1.0, 0.0, 1.0], 'normalize': True}

{'data': [-1.0, 0.0, 1.0], 'normalize': True}

### Create a `ConditionalStep` based on the state’s content

In [8]:
class TrainIfEnoughData(ConditionalStep):
    def __init__(self, min_len: int = 3):
        super().__init__(requires=["data"], provides=["trained"])
        self.min_len = min_len

    def should_run(self, state: State) -> bool:
        return len(state.get("data", [])) >= self.min_len

    def run(self, state: State) -> None:
        # pretend training...
        state["trained"] = True


s = State({"data": [1, 2]})
TrainIfEnoughData(min_len=3)(s)  # skipped
s.get("trained", None)
# None

s["data"] = [1, 2, 3, 4]
TrainIfEnoughData(min_len=3)(s)  # runs
s["trained"]
# True

True

### Build a `FitAwareStep` that learns the mean in `fit_impl` and applies it in `run`

In [9]:
class MeanCenter(FitAwareStep):
    def __init__(self):
        super().__init__(requires=["data"], provides=["data"])

    def fit_impl(self, state: State) -> None:
        xs = state["data"]
        state["preproc.mean"] = sum(xs) / len(xs)

    def run(self, state: State) -> None:
        m = state["preproc.mean"]
        state["data"] = [x - m for x in state["data"]]


#### Error: try running before fitting

In [10]:
s = State({"data": [1.0, 2.0, 3.0]})
step = MeanCenter()

try:
    step(s)
except RuntimeError as e:
    print(e)  # 'MeanCenter' is not fitted.

'MeanCenter' is not fitted.


#### Fit first, then run

In [11]:
step.fit(s)  # computes and stores 'preproc.mean'
print(s.to_dict())  # {'data': [1.0, 2.0, 3.0], 'preproc.mean': 2.0}

step(s)  # apply centering
print(s.to_dict())  # {'data': [-1.0, 0.0, 1.0], 'preproc.mean': 2.0}

{'data': [1.0, 2.0, 3.0], 'preproc.mean': 2.0}
{'data': [-1.0, 0.0, 1.0], 'preproc.mean': 2.0}
