In [13]:
import numpy as np
import matplotlib.pyplot as plt
from rashomon.hasse import enumerate_policies
from rashomon.aggregate import RAggregate
from first_wave import compute_boundary_probs, allocate_first_wave, assign_first_wave_treatments
from data_gen import get_beta_underlying_causal, generate_outcomes

## First-wave allocation and outcomes

In [14]:
# get lattice
M = 8
R = 5
policies = enumerate_policies(M, R)
K = len(policies)
print(f"Found K = {K} policies (each policy is an {M}-tuple).")

Found K = 390625 policies (each policy is an 8-tuple).


**Compute first‐wave allocation**: We need R_i for each feature (here R_i = R for i=0,…,M-1), then we call `compute_boundary_probs` -> `allocate_first_wave`. We get `n1_alloc`: an array of length K summing to n1.

In [15]:
H = 3  # sparsity parameter used inside compute_boundary_probs TODO choice
n1 = 500  # total first‐wave sample size
boundary_probs = compute_boundary_probs(policies, R, H)
n1_alloc = allocate_first_wave(boundary_probs, n1)
print(f"First‐wave allocation sums to {int(n1_alloc.sum())} (should be {n1}).")

First‐wave allocation sums to 500 (should be 500).


### Simulating outcomes

We generate a np.array `beta` of true effects for each node. We pass our lattice `policies`, `M` and `R`, and then specify a `kind` of underlying causal model.

There are a range of options, all of which are continuous and non-trivial: they exhibit locally correlated effects and avoid brittle cancellations in effects. The options range from simple (polynomial, gaussian, basic interaction) to complex (radial basis function, mimic of a simple neural-net-like function)

In [16]:
beta = get_beta_underlying_causal(policies, M, R, kind="poly")

In [17]:
# Not in use: different distribution for each true pool from a random 'true' partition sigma_true. Not used in this simulation due to our specifications on the underlying causal model (e.g. continuous, locally correlated effects, etc). Also needs changes on how it constructs a true partition.

# partition_seed = 123
# sigma_true, pi_pools_true, pi_policies_true = generate_true_partition(policies, R,random_seed=partition_seed)
# beta = get_beta_piecewise(policies, sigma_true, pi_pools_true, pi_policies_true, 0.5, 1, 10)

**Get outcomes**: we now track the first-wave assignment and generate the outcomes with additional noise

In [18]:
# now build first-wave assignment vector D
D1 = assign_first_wave_treatments(n_alloc=n1_alloc)
N1 = D1.shape[0]

print("Length of D1:", N1)  # should equal sum n1_alloc == n1

Length of D1: 500


In [19]:
# generate outcomes y1
sigma_noise = 10
outcome_seed = 53
y1 = generate_outcomes(D=D1, beta=beta, sigma_noise=sigma_noise, random_seed=outcome_seed)

print("Overall mean outcome:", np.mean(y1))
print("Overall std outcome:", np.std(y1))

Overall mean outcome: 3.250637410378799
Overall std outcome: 10.720606201785039


## Find optimal parameters for the RPS

In [20]:
# H = np.inf # TODO start with no cap on number of pools, rely on loss-threshold to prune partitions?
# theta = 100 # TODO get from find_params.ipynb
# lambda_reg = 1 # TODO get from find_params.ipynb

We then build the RPS with the optimal parameters, selecting for an error tolerance and size.

In [21]:
# R_set, R_profiles = RAggregate(M, R, H, D1, y1, theta, reg=lambda_reg)