In [None]:
# ============================================================
# run_universal_constraints.py â€” FULL WORKING SCRIPT
# ============================================================
import os
import sys
module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)

from gridworld_env_layout import GridWorldMDPFromLayoutEnv
from gridworld_env import NoisyLinearRewardFeaturizedGridWorldEnv
import numpy as np
from agent.q_learning_agent import ValueIteration, PolicyEvaluation
from scipy.optimize import linprog
from utils import generate_random_gridworld_envs

from utils import simulate_all_feedback
from utils import (
    compute_successor_features_family,
    derive_constraints_from_q_family,
    derive_constraints_from_atoms,
)

# 1) Generate envs + solve with Value Iteration
W_TRUE = np.array([-10, -2]) / np.linalg.norm([-10, -2])

envs, meta = generate_random_gridworld_envs(
    n_envs=50,
    rows=3, cols=3,
    color_to_feature_map={"red":[1.0,0.0], "blue":[0.0,1.0]},
    palette=("red","blue"),
    p_color_range={"red":(0.2,0.6), "blue":(0.4,0.8)},
    terminal_policy=dict(kind="random_k", k_min=0, k_max=1, p_no_terminal=0.1),
    gamma_range=(0.98, 0.995),
    noise_prob_range=(0.0, 0.0),
    w_mode="fixed",
    W_fixed=W_TRUE,
    seed=45,
    GridEnvClass=GridWorldMDPFromLayoutEnv,
)

vis = [ValueIteration(e) for e in envs]
for v in vis:
    v.run_value_iteration(epsilon=1e-10)
Q_list = [v.get_q_values() for v in vis]


# 2) Successor features
SFs = compute_successor_features_family(
    envs,
    Q_list,
    convention="entering",
    zero_terminal_features=True,
    tol=1e-10,
    max_iters=10000,
)


# 3) Q-only constraints
## I probably need to make this parallel
U_q_per_env, U_q_global = derive_constraints_from_q_family(
    SFs,
    Q_list,
    envs,
    tie_eps=1e-10,
    skip_terminals=True,
    normalize=True,
    tol=1e-12,
    precision=1e-3,
    lp_epsilon=1e-4,
)



## I probably need to make this parallel
# 4) Simulate feedback atoms (pairwise, estop, improvement, demo)
atoms_per_env = simulate_all_feedback(envs, Q_list, n_base_trajs=1000, n_improvements=1000, n_pairwise=1000, n_estops=1000)


## I probably need to make this parallel
# 5) Atom-based constraints
U_atoms_per_env, U_atoms_global = derive_constraints_from_atoms(
    atoms_per_env,
    SFs,
    envs,
    precision=1e-3,
    lp_epsilon=1e-4,
)


# 6) Final Universal Set = union of Q-only + atom constraints
import numpy as np
from utils import remove_redundant_constraints

all_global = []
if len(U_q_global) > 0:
    all_global.append(U_q_global)
if len(U_atoms_global) > 0:
    all_global.append(U_atoms_global)

if all_global:
    stacked = np.vstack(all_global)
    U_universal = remove_redundant_constraints(stacked, epsilon=1e-4)
else:
    d = SFs[0][0].shape[-1]
    U_universal = np.zeros((0, d))

print("Universal constraint set size:", len(U_universal))

Universal constraint set size: 2


In [None]:
U_atoms_global

array([[-0.70710678,  0.70710678],
       [ 0.        , -1.        ],
       [-0.09053575, -0.99589321],
       [ 0.19611614, -0.98058068],
       [-0.20952909,  0.97780241],
       [-0.4472136 , -0.89442719],
       [-0.64594224, -0.76338629],
       [-0.19611614, -0.98058068],
       [-0.31622777, -0.9486833 ],
       [-0.38074981, -0.9246781 ]])

In [4]:
U_q_global

array([[ 0.        , -1.        ],
       [-0.32017536,  0.9473583 ]])

In [5]:
U_universal

[array([ 0.19611614, -0.98058068]), array([-0.20952909,  0.97780241])]