In [1]:
import copy
import pickle
from datetime import datetime as dtdt

import attrs
import numpy as np

from hetero.config import DTYPE, AlgoConfig, DataGenConfig, GroupingConfig
from hetero.datagen import generate_data_from_config
from hetero.policies import AlternativePolicy
from hetero.tasks import (
    beta_estimate_from,
    beta_estimate_from_e2e_learning,
    beta_estimate_from_nongrouped,
    compute_UV_truths,
    compute_V_estimate,
)

group_reward_coeff = np.array(
    [
        [-2.68, 2.68],
        [2.68, -2.68],
    ],
    dtype=DTYPE,
)

action_reward_coeff = [-2.89, 2.89]

FEATURE_TYPE = "NONE"

data_config_init = dict(
    num_trajectories=100,
    num_time_steps=30,
    group_reward_coeff_override=group_reward_coeff,
    action_reward_coeff=action_reward_coeff,
    num_burnin_steps=100,
    basis_expansion_method=FEATURE_TYPE,
    add_intercept_column=True,
)


# First time runner: set COMPUTE_TRUTH = True
# Change the flag to False after generated the truth file.
COMPUTE_TRUTH = True
# Change the truth file name if settings are changed.
TRUTH_FILE = f"hetero/data/{FEATURE_TYPE}_truth_20230528_2.68_2.89.pkl"
print("truth file name =", TRUTH_FILE)

time_tag = dtdt.now().strftime("%Y%m%d_%H-%M-%S")
tag = f'N={data_config_init["num_trajectories"]}_T={data_config_init["num_time_steps"]}_{time_tag}'
RESULT_FILE = f"hetero/data/{FEATURE_TYPE}_result_20230528_2.68_2.89_{tag}.pkl"
print("result file name =", RESULT_FILE)

SAVE_RESULT = True
if not SAVE_RESULT:
    print("Result will NOT be saved, only use this for experimental runs!!!")

NUM_EXPERIMENTS = 100

truth file name = hetero/data/NONE_truth_20230528_2.68_2.89.pkl
result file name = hetero/data/NONE_result_20230528_2.68_2.89_N=100_T=30_20230602_14-26-56.pkl


=====================================================================================================
# Algorithm 

- Set configure below.

In [2]:

algo_config = AlgoConfig(
    max_num_iters=2,
    gam=2.7,
    lam=1.6,
    rho=2.0,
    should_remove_outlier=True,
    outlier_lower_perc=2,
    outlier_upper_perc=98,
    nu_coeff=1e-5,
    delta_coeff=1e-5,
    use_group_wise_regression_init=True,
)

pi_eval = AlternativePolicy(2)

grouping_config = GroupingConfig()

In [3]:
if COMPUTE_TRUTH:
    truth_data_config_init = copy.copy(data_config_init)
    truth_data_config_init.pop("num_trajectories")
    us, vs = compute_UV_truths(
        truth_data_config_init,
        algo_config.discount,
        pi_eval,
        num_repeats=10,
        num_trajectories=1000,
    )  # For best results, use num_repeats=10
    u_truth = us.mean(axis=0)
    v_truth = vs.mean(axis=0)

    with open(TRUTH_FILE, "wb") as f:
        pickle.dump(
            dict(
                u_truth=u_truth,
                v_truth=v_truth,
                data_config_dict=truth_data_config_init,
                algo_config_dict=attrs.asdict(algo_config),
            ),
            f,
        )
else:
    with open(TRUTH_FILE, "rb") as f:
        loaded = pickle.load(f)
        u_truth = loaded["u_truth"]
        v_truth = loaded["v_truth"]

In [4]:
beta_ng_list = []
beta_learned_list = []

for i in range(NUM_EXPERIMENTS):
    data_config = DataGenConfig(seed=7531 * (i + 1), **data_config_init)
    data = generate_data_from_config(data_config)
    beta_ng_list.append(
        beta_estimate_from_nongrouped(data, pi_eval, algo_config.discount)
    )
    beta_learned_list.append(
        beta_estimate_from_e2e_learning(data, algo_config, grouping_config, pi_eval)
    )

new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = 0.79219455
MCPImpl: num_above=10000, num_below=9900
beta_solver, min eigen of left matrix = 0.79219455
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.55689985 -3.71195488  2.21229497  3.74026552 -1.53753243  5.05142405]
 [-1.49334849  3.81070706 -2.14349852 -3.82565017  1.63590548 -5.05871561]] and inertia = 105.8365931776716
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.79236805-0.0017966615j)
MCPImpl: num_above=10021, num_below=9879
beta_solver, min eigen of left matrix = (0.79236805-0.0017966615j)
MCPImpl: num_above=10001, num_below=9899
kmeans center = [[ 1.58642907 -3.74810798  2.18167425  3.68206888 -1.4524233   5.07754673

kmeans center = [[ 1.58772813 -3.82136147  2.07204416  3.87432207 -1.55620283  5.03096021]
 [-1.48190493  3.7324859  -2.25499056 -3.89884098  1.54967401 -5.09222808]] and inertia = 112.85424137573659
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.7788243+0j)
MCPImpl: num_above=10008, num_below=9892
beta_solver, min eigen of left matrix = (0.7788243+0j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.45236167 -3.82617066  2.21296661  3.96336766 -1.65142214  5.13460562]
 [-1.57018993  3.81104433 -2.17516042 -3.77497972  1.55129032 -5.01096583]] and inertia = 109.30419379698596
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
new_labels.length=6000 matche

MCPImpl: num_above=10001, num_below=9899
beta_solver, min eigen of left matrix = (0.78487325+0j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.48441156 -3.68459211  2.19950975  3.77680779 -1.53237222  5.03032232]
 [-1.57614439  3.8366388  -2.14181429 -3.82302727  1.62703572 -5.12079499]] and inertia = 121.26775708709094
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.77221286+0j)
MCPImpl: num_above=10003, num_below=9897
beta_solver, min eigen of left matrix = (0.77221286+0j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.54161017 -3.75452788  2.33788927  3.75432722 -1.60309104  5.15499205]
 [-1.55596819  3.61501395 -2.35839088 -3.84300768  1.54264487 -5.12869448]] and inertia = 114.44531562724184
Label mismatch = 0
new_labels.length=200 matches num_u

new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.7858209+0j)
MCPImpl: num_above=10001, num_below=9899
beta_solver, min eigen of left matrix = (0.7858209+0j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.58944523 -3.73702181  2.25190745  3.79762095 -1.52308933  5.21218681]
 [-1.53236284  3.70572936 -2.05110421 -3.88816975  1.59569584 -5.03065754]] and inertia = 120.09338519119422
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.7940731+0j)
MCPImpl: num_above=10000, num_below=9900
beta_solver, min eigen of left matrix = (0.7940731+0j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.5951181  -3.79094908  2.10590303  3.69949312 -1.45609361  4.995961  ]
 [-1.56838764

new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.79105854-0.00018353778j)
MCPImpl: num_above=10009, num_below=9891
beta_solver, min eigen of left matrix = (0.79105854-0.00018353778j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.54146889 -3.73589184  2.36907683  3.87271731 -1.6155946   5.31855177]
 [-1.46660016  3.79204435 -2.25914984 -3.80623971  1.66349449 -5.10934206]] and inertia = 106.23054210018533
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.7900795-0.0032597561j)
MCPImpl: num_above=10000, num_below=9900
beta_solver, min eigen of left matrix = (0.7900795-0.0032597561j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.46459918 -3.73226968  2.14935709  3

MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.4512104  -3.79382117  2.30409489  3.82243325 -1.56855779  5.12043349]
 [-1.4848187   3.77313851 -2.14907283 -3.76111305  1.53483182 -5.00081347]] and inertia = 109.55850238966129
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.78216106+0j)
MCPImpl: num_above=10000, num_below=9900
beta_solver, min eigen of left matrix = (0.78216106+0j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.55814705 -3.74619382  2.08777459  3.82787635 -1.61337488  5.11549206]
 [-1.55741955  3.80270689 -2.1830485  -3.66798153  1.47077002 -5.03204107]] and inertia = 126.36200868015743
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches numb

beta_solver, min eigen of left matrix = 0.79010934
MCPImpl: num_above=10000, num_below=9900
beta_solver, min eigen of left matrix = 0.79010934
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.53834222 -3.76429842  2.07616234  3.72509249 -1.49291307  4.96708982]
 [-1.50374936  3.79974116 -2.20973898 -3.82229654  1.60823665 -5.06656985]] and inertia = 116.19276890528829
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = 0.7773216
MCPImpl: num_above=10000, num_below=9900
beta_solver, min eigen of left matrix = 0.7773216
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.53621062 -3.78951374  2.13044963  3.77483181 -1.55322444  4.98460814]
 [-1.49808526  3.85233479 -2.11119295 -3.76968639  1.59654418 -5.15267802]] and inertia = 103.85046769475235
Label mismatch = 0
n

new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.79139894+0j)
MCPImpl: num_above=10000, num_below=9900
beta_solver, min eigen of left matrix = (0.79139894+0j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.61468955 -3.74418742  2.04479499  3.80955322 -1.58070828  5.10833682]
 [-1.47704     3.6849829  -2.05534132 -3.92698219  1.55695312 -5.00103051]] and inertia = 126.38180107608012
Label mismatch = 0
new_labels.length=200 matches num_unique_labels
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
new_labels.length=6000 matches number of records
beta_solver, min eigen of left matrix = (0.7899801+0j)
MCPImpl: num_above=10000, num_below=9900
beta_solver, min eigen of left matrix = (0.7899801+0j)
MCPImpl: num_above=10000, num_below=9900
kmeans center = [[ 1.55358237 -3.82964998  2.01789812  3.78461825 -1.53346935  4.88516282]
 [-1.596194

In [5]:
beta_learned_list[0].betas

[array([-1.446637 ,  3.920033 , -2.1800213, -3.9628756,  1.5962642,
        -5.12349  ], dtype=float32),
 array([ 1.4990822, -3.8002539,  2.2158315,  3.7819986, -1.4555093,
         5.0253725], dtype=float32)]

In [8]:
mu_learned_list = []
sigma_learned_list = []
z_score_learned_list = []
mu_ng_list = []
sigma_ng_list = []
z_score_ng_list = []

for beta_learned, beta_ng in zip(beta_learned_list, beta_ng_list):
    v_mus, v_sigmas = compute_V_estimate(u_truth, beta_learned)
    z_score_learned = [
        (mu - truth) / sigma for mu, truth, sigma in zip(v_mus, v_truth, v_sigmas)
    ]
    mu_learned_list.append(v_mus)
    sigma_learned_list.append(v_sigmas)
    z_score_learned_list.append(z_score_learned)

    ngv_mus, ngv_sigmas = compute_V_estimate(u_truth, beta_ng)
    z_score_ng = [
        (mu - truth) / sigma
        for mu, truth, sigma in zip(
            ngv_mus * len(v_truth), v_truth, ngv_sigmas * len(v_truth)
        )
    ]
    mu_ng_list.append(ngv_mus)
    sigma_ng_list.append(ngv_sigmas)
    z_score_ng_list.append(z_score_ng)

# Reports that average over two groups

In [9]:
z_score_learned = np.array(z_score_learned_list)
z_score_ng = np.array(z_score_ng_list)
Z_THRESHOLD = 1.96
learned_in_threshold = np.abs(z_score_learned) < Z_THRESHOLD
ng_in_threshold = np.abs(z_score_ng) < Z_THRESHOLD
learned_in_threshold_perc = learned_in_threshold.sum() / learned_in_threshold.size
ng_in_threshold_perc = ng_in_threshold.sum() / ng_in_threshold.size
print(
    "learned_in_threshold_perc=",
    learned_in_threshold_perc,
    ", ng_in_threshold_perc=",
    ng_in_threshold_perc,
)
if SAVE_RESULT:
    with open(RESULT_FILE, "wb") as f:
        pickle.dump(
            dict(
                mu_learned_list=mu_learned_list,
                sigma_learned_list=sigma_learned_list,
                z_score_learned=z_score_learned,
                mu_ng_list=mu_ng_list,
                sigma_ng_list=sigma_ng_list,
                z_score_ng=z_score_ng,
                beta_learned_list=beta_learned_list,
                beta_ng_list=beta_ng_list,
            ),
            f,
        )

learned_in_threshold_perc= 0.94 , ng_in_threshold_perc= 0.0


In [10]:
ac_acl = 2*Z_THRESHOLD*np.mean(sigma_learned_list)
ac_mse = np.mean( (mu_learned_list - v_truth)**2 )

mv_acl = 2*Z_THRESHOLD*np.mean(sigma_ng_list)
mv_mse = np.mean( (mu_ng_list - v_truth)**2 )

In [11]:
print("ACPE results: (average over groups)")
print(f"ACL: {ac_acl}") 
print(f"MSE: {ac_mse}")
print(f"ECP: {learned_in_threshold_perc}")


print("MVPE results: (average over groups)")
print(f"ACL: {mv_acl}") 
print(f"MSE: {mv_mse}")
print(f"ECP: {ng_in_threshold_perc}")

ACPE results: (average over groups)
ACL: 0.2903089975935917
MSE: 0.0051134065724909306
ECP: 0.94
MVPE results: (average over groups)
ACL: 0.44467453582272765
MSE: 12.95175552368164
ECP: 0.0


# Reports that seperate two groups

In [12]:
z_score_learned = np.array(z_score_learned_list)
z_score_ng = np.array(z_score_ng_list)
Z_THRESHOLD = 1.96
learned_in_threshold = np.abs(z_score_learned) < Z_THRESHOLD
ng_in_threshold = np.abs(z_score_ng) < Z_THRESHOLD

learned_in_threshold_perc = np.sum(learned_in_threshold, axis=0) / learned_in_threshold.shape[0]
ng_in_threshold_perc = np.sum(ng_in_threshold, axis=0) / ng_in_threshold.shape[0]

print(
    "learned_in_threshold_perc=",
    learned_in_threshold_perc,
    ", ng_in_threshold_perc=",
    ng_in_threshold_perc,
)

if SAVE_RESULT:
    with open(RESULT_FILE, "wb") as f:
        pickle.dump(
            dict(
                mu_learned_list=mu_learned_list,
                sigma_learned_list=sigma_learned_list,
                z_score_learned=z_score_learned,
                mu_ng_list=mu_ng_list,
                sigma_ng_list=sigma_ng_list,
                z_score_ng=z_score_ng,
                beta_learned_list=beta_learned_list,
                beta_ng_list=beta_ng_list,
            ),
            f,
        )

learned_in_threshold_perc= [0.92 0.96] , ng_in_threshold_perc= [0. 0.]


In [13]:
ac_acl = 2*Z_THRESHOLD*np.mean(sigma_learned_list, axis=0)
ac_mse = np.mean((mu_learned_list - v_truth)**2, axis=0)

mv_acl = 2*Z_THRESHOLD*np.mean(sigma_ng_list, axis=0)
mv_mse = np.mean( (mu_ng_list - v_truth)**2, axis=0 )

In [14]:
print("ACPE results: Group1, Group 2")
print(f"MSE: {ac_mse}")
print(f"ACL: {ac_acl}") 
print(f"ECP: {learned_in_threshold_perc}")

print("===")
print("MVPE results: ")
print(f"MSE: {mv_mse}")
print(f"ACL: {mv_acl}") 
print(f"ECP: {ng_in_threshold_perc}")

ACPE results: Group1, Group 2
MSE: [0.00520017 0.00502664]
ACL: [0.29082336 0.28979463]
ECP: [0.92 0.96]
===
MVPE results: 
MSE: [12.905413 12.9981  ]
ACL: [0.44467454]
ECP: [0. 0.]
