In [None]:
# make_pbn_env.py

import pickle
from pathlib import Path

import numpy as np
import gymnasium as gym
import gym_PBN


# ──────────────────────────────────────────────────────────────────────────────
# 1) Paths to the prepared data
# ──────────────────────────────────────────────────────────────────────────────
# DATA_DIR          = Path('/Users/yzc/Desktop/Spring2025/CSCI1470/Final_Project/RL-in-GRNs/data_prepared')
DATA_DIR         = Path('./data_prepared')
TRAIN_STATES_PATH = DATA_DIR / "train_states.npy"
LOGIC_PATH       = DATA_DIR / "logic_func_data_safe.pkl"
GENE_NAMES_PATH  = DATA_DIR / "gene_names_safe.txt"

In [3]:
# ──────────────────────────────────────────────────────────────────────────────
# 2) Load node names & logic functions
# ──────────────────────────────────────────────────────────────────────────────
with open(GENE_NAMES_PATH, "r") as f:
    node_names = [line.strip() for line in f if line.strip()]

with open(LOGIC_PATH, "rb") as f:
    logic_map = pickle.load(f)

logic_funcs     = [logic_map[name] for name in node_names]
logic_func_data = (node_names, logic_funcs)


In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 3) Decide the target domain
#    Here: we’ll treat “all‐off” of some subset of genes as ‘success’. 
#    can replace this with whatever tuple(s) we need.
# ──────────────────────────────────────────────────────────────────────────────
#    For example, say we want genes 0,1,2 all = 0 in the target:
target_nodes        = [0, 1, 2]                 # indices into node_names
target_node_values  = ((0, 0, 0),)              # a tuple‐of‐tuples
undesired_node_values = tuple()                 # no “wrong” attractors
horizon             = 20                        # max steps

goal_config = {
    "target_nodes":         target_nodes,
    "target_node_values":   target_node_values,
    "undesired_node_values":undesired_node_values,
    "intervene_on":         list(range(len(node_names))),  # all nodes are flippable
    "horizon":              horizon,
}


In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 4) Instantiate the **base** PBN environment
# ──────────────────────────────────────────────────────────────────────────────
env = gym.make(
        "gym-PBN/PBN-v0",
        logic_func_data=(node_names, logic_funcs),
        goal_config={"all_attractors": [], "target": set()},
        reward_config={
            "successful_reward":   5,
            "wrong_attractor_cost":2,
            "action_cost":         1
        },
)

# set per-episode step-limit (the env calls it `horizon`)
env.horizon = 20          # or whatever value we want



  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 5) Override reset‐pool to use train states
# ──────────────────────────────────────────────────────────────────────────────
env.train_states = np.load(TRAIN_STATES_PATH)

# ──────────────────────────────────────────────────────────────────────────────
# 6) Smoke test: reset & take a few random steps
# ──────────────────────────────────────────────────────────────────────────────
obs, info = env.reset(seed=0)
print("Reset obs:", obs, "info:", info)

for t in range(5):
    a = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(a)
    print(f" step {t:2d}: action={a:3d}, reward={reward:4.1f}, done={terminated or truncated}")
    if terminated or truncated:
        break

print("\nObs space:", env.observation_space)
print("Act space:", env.action_space)
print("Horizon  :", env.horizon)
print("Rewards  :", {
    "success" : env.successful_reward,
    "wrong"   : env.wrong_attractor_cost,
    "cost"    : env.action_cost
})

Reset obs: [ True  True  True  True False  True False  True  True False  True  True
  True  True False False False  True  True  True  True  True False  True
 False  True False  True  True False False  True False  True False  True
  True  True  True  True False False  True False  True  True False False
 False False  True False  True False False False  True False False False
 False False  True False False False  True False  True False  True False
  True  True  True False False False False False False False False  True
  True False  True False  True  True False  True False  True False False
  True False  True False] info: {'observation_idx': 1216827158475929188165224607050}
 step  0: action= 90, reward=-1.0, done=False
 step  1: action=100, reward=-1.0, done=False
 step  2: action= 41, reward=-1.0, done=False
 step  3: action=  0, reward= 0.0, done=False
 step  4: action= 70, reward=-1.0, done=False

Obs space: MultiBinary(100)
Act space: Discrete(101)
Horizon  : 20
Rewards  : {'success':

  if not isinstance(terminated, (bool, np.bool8)):
