In [1]:
import warnings
warnings.filterwarnings("ignore")
from copy import deepcopy
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import sys

sys.path.append("/code")

from tqdm import tqdm
import torch
# device = torch.device('cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# import gym
# import recogym

import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

torch.backends.cudnn.benchmark = torch.cuda.is_available()
if torch.cuda.is_available():
    torch.set_float32_matmul_precision("high")  # TF32 = big speedup on Ada


from sklearn.utils import check_random_state

# implementing OPE of the IPWLearner using synthetic bandit data
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt

from scipy.special import softmax
import optuna
# from memory_profiler import profile


from estimators import (
    DirectMethod as DM
)

from simulation_utils import (
    eval_policy,
    generate_dataset,
    create_simulation_data_from_pi,
    get_train_data,
    get_opl_results_dict,
    CustomCFDataset,
    calc_reward,
    get_weights_info
)

from models import (    
    CFModel,
    NeighborhoodModel,
    BPRModel, 
    RegressionModel
)

from training_utils import (
    train,
    validation_loop, 
    cv_score_model
 )

from custom_losses import (
    SNDRPolicyLoss
    )

random_state=12345
random_ = check_random_state(random_state)

pd.options.display.float_format = '{:,.8f}'.format

Using device: cpu
Using device: cpu
Using device: cpu


In [2]:
def get_trial_results(
    our_x, 
    our_a, 
    emb_x, 
    emb_a, 
    original_x, 
    original_a, 
    dataset, 
    val_data, 
    original_policy_prob, 
    neighberhoodmodel, 
    regression_model, 
    dm
):
    policy = np.expand_dims(softmax(our_x @ our_a.T, axis=1), -1)
    policy_reward = calc_reward(dataset, policy)
    eval_metrics = eval_policy(neighberhoodmodel, val_data, original_policy_prob, policy)
    action_diff_to_real = np.sqrt(np.mean((emb_a - our_a) ** 2))
    action_delta = np.sqrt(np.mean((original_a - our_a) ** 2))
    context_diff_to_real = np.sqrt(np.mean((emb_x - our_x) ** 2))
    context_delta = np.sqrt(np.mean((original_x - our_x) ** 2))

    row = np.concatenate([
        np.atleast_1d(policy_reward),
        np.atleast_1d(eval_metrics),
        np.atleast_1d(action_diff_to_real),
        np.atleast_1d(action_delta),
        np.atleast_1d(context_diff_to_real),
        np.atleast_1d(context_delta)
    ])
    reg_dm = dm.estimate_policy_value(policy[val_data['x_idx']], regression_model.predict(val_data['x']))
    reg_results = np.array([reg_dm])
    conv_results = np.array([row])
    return get_opl_results_dict(reg_results, conv_results)

## `trainer_trial` Function

This function runs policy learning experiments using offline bandit data and evaluates various estimators.

### Parameters
- **num_runs** (int): Number of experimental runs per training size
- **num_neighbors** (int): Number of neighbors to consider in the neighborhood model
- **num_rounds_list** (list): List of training set sizes to evaluate
- **dataset** (dict): Contains dataset information including embeddings, action probabilities, and reward probabilities
- **batch_size** (int): Batch size for training the policy model
- **num_epochs** (int): Number of training epochs for each experiment
- **lr** (float, default=0.001): Learning rate for the optimizer

### Process Flow
1. Initializes result structures and retrieval models
2. For each training size in `num_rounds_list`:
   - Creates a uniform logging policy and simulates data
   - Generates training data for offline learning
   - Fits regression and neighborhood models for reward estimation
   - Initializes and trains a counterfactual policy model
   - Evaluates policy performance using various estimators
   - Collects metrics on policy reward and embedding quality

### Returns
- **DataFrame**: Results table with rows indexed by training size and columns for various metrics:
  - `policy_rewards`: True expected reward of the learned policy
  - Various estimator errors (`ipw`, `reg_dm`, `conv_dm`, `conv_dr`, `conv_sndr`)
  - Variance metrics for each estimator
  - Embedding quality metrics comparing learned representations to ground truth

### Implementation Notes
- Uses uniform random logging policy for collecting offline data
- Employs Self-Normalized Doubly Robust (SNDR) policy learning
- Measures embedding quality via RMSE to original/ground truth embeddings

In [3]:
def trainer_trial(
    num_runs,
    num_neighbors,
    num_rounds_list,
    dataset,
    batch_size,
    val_size=2000,
    n_trials=10,    
    prev_best_params=None
):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    torch.backends.cudnn.benchmark = torch.cuda.is_available()
    if torch.cuda.is_available():
        torch.set_float32_matmul_precision("high")

    dm = DM()
    results = {}

    our_x, our_a = dataset["our_x"], dataset["our_a"]
    emb_x, emb_a = dataset["emb_x"], dataset["emb_a"]

    original_x, original_a = dataset["original_x"], dataset["original_a"]
    n_users, n_actions, emb_dim = dataset["n_users"], dataset["n_actions"], dataset["emb_dim"]

    all_user_indices = np.arange(n_users, dtype=np.int64)

    def T(x):
        return torch.as_tensor(x, device=device, dtype=torch.float32)

    def _mean_dict(dicts):
        """
        Robust mean over a list of dicts with numeric/scalar/1D-array values.
        Returns a single dict with elementwise means.
        """
        if not dicts:
            return {}
        keys = dicts[0].keys()
        out = {}
        for k in keys:
            vals = [d[k] for d in dicts if k in d]
            # try to convert each to np.array and average
            arrs = [np.asarray(v) for v in vals]
            # broadcast to same shape if scalars/1D
            stacked = np.stack(arrs, axis=0)
            out[k] = np.mean(stacked, axis=0)
        return out

    # ===== unpack dataset (keep originals safe) =====
    our_x_orig, our_a_orig = our_x, our_a
    emb_x, emb_a = emb_x, emb_a
    original_x, original_a = original_x, original_a
    n_users, n_actions, emb_dim = n_users, n_actions, emb_dim
    all_user_indices = np.arange(n_users, dtype=np.int64)

    dm = DM()
    results = {}
    best_hyperparams_by_size = {}
    last_best_params = prev_best_params if prev_best_params is not None else None

    # ===== baseline (sample size = 0) using get_trial_results =====
    pi_0 = softmax(our_x_orig @ our_a_orig.T, axis=1)
    original_policy_prob = np.expand_dims(pi_0, -1)

    simulation_data = create_simulation_data_from_pi(
        dataset, pi_0, val_size, random_state=0
    )

    # use same data for train/val just to generate the baseline row
    train_data = get_train_data(n_actions, val_size, simulation_data, np.arange(val_size), our_x_orig)
    val_data   = get_train_data(n_actions, val_size, simulation_data, np.arange(val_size), our_x_orig)

    regression_model = RegressionModel(
        n_actions=n_actions, action_context=our_x_orig,
        base_model=LogisticRegression(random_state=12345)
    )

    regression_model.fit(train_data['x'], train_data['a'], train_data['r'])

    neighberhoodmodel = NeighborhoodModel(
        train_data['x_idx'], train_data['a'],
        our_a_orig, our_x_orig, train_data['r'],
        num_neighbors=num_neighbors
    )

    # baseline row produced via get_trial_results
    results[0] = get_trial_results(
        our_x_orig, our_a_orig, emb_x, emb_a, original_x, original_a,
        dataset, val_data, original_policy_prob,
        neighberhoodmodel, regression_model, dm
    )

    # ===== main loop over training sizes =====
    for train_size in num_rounds_list:

        # we’ll collect per-run trial dicts generated by get_trial_results
        trial_dicts_this_size = []
        best_hyperparams_by_size[train_size] = {}

        # --- prepare a resampling for Optuna’s objective (shared loaders built per-run inside objective) ---
        # We’ll do Optuna per-run (fresh resample + search), then final fit with best params, then get_trial_results.

        for run in range(num_runs):

            # --- resample for this run ---
            pi_0 = softmax(our_x_orig @ our_a_orig.T, axis=1)
            original_policy_prob = np.expand_dims(pi_0, -1)

            simulation_data = create_simulation_data_from_pi(
                dataset, pi_0, train_size + val_size,
                random_state=(run + 1) * (train_size + 17)
            )

            idx_train = np.arange(train_size)
            train_data = get_train_data(n_actions, train_size, simulation_data, idx_train, our_x_orig)
            val_idx   = np.arange(val_size) + train_size
            val_data  = get_train_data(n_actions, val_size, simulation_data, val_idx, our_x_orig)

            num_workers = 4 if torch.cuda.is_available() else 0

            cf_dataset = CustomCFDataset(
                train_data['x_idx'], train_data['a'], train_data['r'], original_policy_prob
            )

            val_dataset = CustomCFDataset(
                val_data['x_idx'], val_data['a'], val_data['r'], original_policy_prob
            )

            # val_loader = DataLoader(
            #     val_dataset, batch_size=val_size, shuffle=False,
            #     pin_memory=torch.cuda.is_available(),
            #     num_workers=num_workers, persistent_workers=bool(num_workers)
            # )


            # --- Optuna objective bound to this run's data ---
            def objective(trial):
                lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
                epochs = trial.suggest_int("num_epochs", 1, 10)
                trial_batch_size = trial.suggest_categorical("batch_size", [64, 128, 256, 512])
                trial_num_neighbors = trial.suggest_int("num_neighbors", 3, 15)
                lr_decay = trial.suggest_float("lr_decay", 0.8, 1.0)

                trial_neigh_model = NeighborhoodModel(
                    train_data['x_idx'], train_data['a'],
                    our_a_orig, our_x_orig, train_data['r'],
                    num_neighbors=trial_num_neighbors
                )

                trial_scores_all = torch.as_tensor(
                    trial_neigh_model.predict(all_user_indices),
                    device=device, dtype=torch.float32
                )

                trial_model = CFModel(
                    n_users, n_actions, emb_dim,
                    initial_user_embeddings=T(our_x_orig),
                    initial_actions_embeddings=T(our_a_orig)
                ).to(device)

                assert (not torch.cuda.is_available()) or next(trial_model.parameters()).is_cuda

                final_train_loader = DataLoader(
                    cf_dataset, batch_size=trial_batch_size, shuffle=True,
                    pin_memory=torch.cuda.is_available(),
                    num_workers=num_workers, persistent_workers=bool(num_workers)
                )

                current_lr = lr
                for epoch in range(epochs):
                    if epoch > 0:
                        current_lr *= lr_decay
                        
                    train(
                        trial_model, final_train_loader, trial_scores_all,
                        criterion=SNDRPolicyLoss(), num_epochs=1, lr=current_lr, device=str(device)
                    )

                trial_x, trial_a = trial_model.get_params()
                trial_x = trial_x.detach().cpu().numpy()
                trial_a = trial_a.detach().cpu().numpy()

                pi_i = softmax(trial_x @ trial_a.T, axis=1)

                # print(get_weights_info(pi_i, original_policy_prob))
                # validation reward for selection
                return cv_score_model(val_data, trial_scores_all, pi_i)


            # --- run Optuna for this run ---
            study = optuna.create_study(direction="maximize")
            
            if last_best_params is not None:
                study.enqueue_trial(last_best_params)

            study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

            best_params = study.best_params
            last_best_params = best_params  # optional warm-start to next run
            best_hyperparams_by_size[train_size][run] = {
                "params": best_params,
                "reward": study.best_value
            }


            # --- final training with best params on this run’s data ---
            regression_model = RegressionModel(
                n_actions=n_actions, action_context=our_x_orig,
                base_model=LogisticRegression(random_state=12345)
            )
            regression_model.fit(
                train_data['x'], train_data['a'], train_data['r'],
                original_policy_prob[train_data['x_idx'], train_data['a']].squeeze()
            )

            neighberhoodmodel = NeighborhoodModel(
                train_data['x_idx'], train_data['a'],
                our_a_orig, our_x_orig, train_data['r'],
                num_neighbors=best_params['num_neighbors']
            )
            scores_all = torch.as_tensor(
                neighberhoodmodel.predict(all_user_indices),
                device=device, dtype=torch.float32
            )

            model = CFModel(
                n_users, n_actions, emb_dim,
                initial_user_embeddings=T(our_x_orig),
                initial_actions_embeddings=T(our_a_orig)
            ).to(device)
            assert (not torch.cuda.is_available()) or next(model.parameters()).is_cuda

            train_loader = DataLoader(
                cf_dataset, batch_size=batch_size, shuffle=True,
                pin_memory=torch.cuda.is_available(),
                num_workers=num_workers, persistent_workers=bool(num_workers)
            )

            current_lr = best_params['lr']
            for epoch in range(best_params['num_epochs']):
                if epoch > 0:
                    current_lr *= best_params['lr_decay']
                train(
                    model, train_loader, scores_all,
                    criterion=SNDRPolicyLoss(), num_epochs=1, lr=current_lr, device=str(device)
                )

            # learned embeddings (do NOT overwrite originals)
            learned_x_t, learned_a_t = model.get_params()
            learned_x = learned_x_t.detach().cpu().numpy()
            learned_a = learned_a_t.detach().cpu().numpy()

            # --- produce the per-run result via get_trial_results ---
            trial_res = get_trial_results(
                learned_x, learned_a,          # learned (policy) embeddings
                emb_x, emb_a,                  # ground-truth embedding refs
                original_x, original_a,        # original clean refs
                dataset,
                val_data,                      # use this run's val split
                original_policy_prob,
                neighberhoodmodel,
                regression_model,
                dm
            )

            trial_dicts_this_size.append(trial_res)

            # memory hygiene
            torch.cuda.empty_cache()

        # === aggregate per-run results (mean) and store under this train_size ===
        results[train_size] = _mean_dict(trial_dicts_this_size)

    return pd.DataFrame.from_dict(results, orient='index'), best_hyperparams_by_size

## Learning

We will run several simulations on a generated dataset, the dataset is generated like this:
$$ \text{We have users U and actions A } u_i \sim N(0, I_{emb_dim}) \ a_i \sim N(0, I_{emb_dim})$$
$$ p_{ij} = 1 / (5 + e^{-(u_i.T a_j)}) $$
$$r_{ij} \sim Bin(p_{ij})$$

We have a policy $\pi$
and it's ground truth reward is calculated by
$$R_{gt} = \sum_{i}{\sum_{j}{\pi_{ij} * p_{ij}}} $$

Our parameters for the dataset will be
$$EmbDim = 5$$
$$NumActions= 150$$
$$NumUsers = 150$$
$$NeighborhoodSize = 6$$

to learn a new policy from $\pi$ we will sample from:
$$\pi_{start} = (1-\epsilon)*\pi + \epsilon * \pi_{random}$$

In [4]:
dataset_params = dict(
                    n_actions= 500,
                    n_users = 500,
                    emb_dim = 16,
                    # sigma = 0.1,
                    eps = 0.6, # this is the epsilon for the noise in the ground truth policy representation
                    ctr = 0.1
                    )

train_dataset = generate_dataset(dataset_params)

Random Item CTR: 0.07066414727263938
Optimal greedy CTR: 0.09999926940951757
Optimal Stochastic CTR: 0.09995326955796031
Our Initial CTR: 0.08610747363354625


In [5]:
num_runs = 1
batch_size = 200
num_neighbors = 6
n_trials_for_optuna = 10
num_rounds_list = [500, 1000, 2000, 10000, 20000]
# num_rounds_list = [20000]


# Manually define your best parameters
best_params_to_use = {
    "lr": 0.0095,  # Learning rate
    "num_epochs": 5,  # Number of training epochs
    "batch_size": 64,  # Batch size for training
    "num_neighbors": 8,  # Number of neighbors for neighborhood model
    "lr_decay": 0.85  # Learning rate decay factor
}

### 1

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.005$$
$$n_{epochs} = 1$$
$$BatchSize=50$$

In [6]:
print("Value of num_rounds_list:", num_rounds_list)

# Run the optimization
df4, best_hyperparams_by_size = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size, val_size=10000, n_trials=n_trials_for_optuna, prev_best_params=best_params_to_use)

# Print best hyperparameters for each training size
print("\n=== BEST HYPERPARAMETERS BY TRAINING SIZE ===")
for train_size, params in best_hyperparams_by_size.items():
    print(f"\nTraining Size: {train_size}")
    # print(f"Best Reward: {params['reward']:.6f}")
    print("Parameters:")
    for param_name, value in params['params'].items():
        print(f"  {param_name}: {value}")
print("===========================\n")

# Show the performance metrics
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]

Value of num_rounds_list: [500, 1000, 2000, 10000, 20000]


[I 2025-10-03 09:04:17,516] A new study created in memory with name: no-name-169f4baa-7250-48a8-9ce6-557d5afac78f
Best trial: 0. Best value: 0.0973868:  10%|█         | 1/10 [00:02<00:20,  2.29s/it]

{'gini': np.float64(0.5408763652844771), 'ess': np.float64(4882.082356225308), 'max_wi': np.float64(4.974309042682669), 'min_wi': np.float64(0.013609885142103548)}
Cross-validated error: 0.011980501583473658
[I 2025-10-03 09:04:19,808] Trial 0 finished with value: 0.09738678028043174 and parameters: {'lr': 0.0095, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.85}. Best is trial 0 with value: 0.09738678028043174.


Best trial: 1. Best value: 0.105499:  20%|██        | 2/10 [00:03<00:15,  1.91s/it] 

{'gini': np.float64(0.01797197100637786), 'ess': np.float64(9990.19152789681), 'max_wi': np.float64(1.0791301635348256), 'min_wi': np.float64(0.9243344014957211)}
Cross-validated error: 0.014381907371354133
[I 2025-10-03 09:04:21,451] Trial 1 finished with value: 0.10549875215643725 and parameters: {'lr': 0.0001680352115528969, 'num_epochs': 8, 'batch_size': 64, 'num_neighbors': 11, 'lr_decay': 0.9308811113151718}. Best is trial 1 with value: 0.10549875215643725.


Best trial: 1. Best value: 0.105499:  30%|███       | 3/10 [00:05<00:12,  1.80s/it]

{'gini': np.float64(0.0201928040463493), 'ess': np.float64(9987.701644253675), 'max_wi': np.float64(1.0818307274434076), 'min_wi': np.float64(0.9003839504892713)}
Cross-validated error: 0.01194431988393677
[I 2025-10-03 09:04:23,109] Trial 2 finished with value: 0.09735736027960769 and parameters: {'lr': 0.0005485072991226246, 'num_epochs': 7, 'batch_size': 256, 'num_neighbors': 3, 'lr_decay': 0.9662284722279784}. Best is trial 1 with value: 0.10549875215643725.


Best trial: 3. Best value: 0.106241:  40%|████      | 4/10 [00:07<00:10,  1.77s/it]

{'gini': np.float64(0.010559841214505115), 'ess': np.float64(9996.61196735597), 'max_wi': np.float64(1.0447450974118473), 'min_wi': np.float64(0.9524222284415562)}
Cross-validated error: 0.014595625731384428
[I 2025-10-03 09:04:24,847] Trial 3 finished with value: 0.10624061845361474 and parameters: {'lr': 0.00014018682938261441, 'num_epochs': 8, 'batch_size': 128, 'num_neighbors': 13, 'lr_decay': 0.9901095476172761}. Best is trial 3 with value: 0.10624061845361474.


Best trial: 4. Best value: 0.106289:  50%|█████     | 5/10 [00:09<00:08,  1.75s/it]

{'gini': np.float64(0.006949232534461349), 'ess': np.float64(9998.530438444272), 'max_wi': np.float64(1.026580048958152), 'min_wi': np.float64(0.9673608858825848)}
Cross-validated error: 0.014623277272925008
[I 2025-10-03 09:04:26,560] Trial 4 finished with value: 0.10628884565246671 and parameters: {'lr': 0.000331827788386619, 'num_epochs': 9, 'batch_size': 512, 'num_neighbors': 13, 'lr_decay': 0.8715655000146059}. Best is trial 4 with value: 0.10628884565246671.


Best trial: 4. Best value: 0.106289:  60%|██████    | 6/10 [00:10<00:06,  1.72s/it]

{'gini': np.float64(0.05332044370167832), 'ess': np.float64(9914.267456687096), 'max_wi': np.float64(1.2373627480683238), 'min_wi': np.float64(0.7870482997733796)}
Cross-validated error: 0.014217848397240603
[I 2025-10-03 09:04:28,218] Trial 5 finished with value: 0.10494907391059234 and parameters: {'lr': 0.0010066848267542376, 'num_epochs': 9, 'batch_size': 128, 'num_neighbors': 11, 'lr_decay': 0.8668150702751021}. Best is trial 4 with value: 0.10628884565246671.


Best trial: 4. Best value: 0.106289:  70%|███████   | 7/10 [00:12<00:05,  1.72s/it]

{'gini': np.float64(0.006825065815823183), 'ess': np.float64(9998.59068736535), 'max_wi': np.float64(1.02896471423402), 'min_wi': np.float64(0.967890785066115)}
Cross-validated error: 0.012554534846184406
[I 2025-10-03 09:04:29,947] Trial 6 finished with value: 0.09943372124681256 and parameters: {'lr': 0.0001357264339290163, 'num_epochs': 7, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.9002068748413597}. Best is trial 4 with value: 0.10628884565246671.


Best trial: 4. Best value: 0.106289:  80%|████████  | 8/10 [00:14<00:03,  1.72s/it]

{'gini': np.float64(0.07593168708910926), 'ess': np.float64(9828.842675022173), 'max_wi': np.float64(1.3384013560125647), 'min_wi': np.float64(0.6868544118608172)}
Cross-validated error: 0.012715365936292933
[I 2025-10-03 09:04:31,658] Trial 7 finished with value: 0.1000792221735954 and parameters: {'lr': 0.0019157629581769138, 'num_epochs': 8, 'batch_size': 256, 'num_neighbors': 5, 'lr_decay': 0.9473713801775637}. Best is trial 4 with value: 0.10628884565246671.


Best trial: 8. Best value: 0.106733:  90%|█████████ | 9/10 [00:15<00:01,  1.68s/it]

{'gini': np.float64(0.0018928118497259195), 'ess': np.float64(9999.89176128932), 'max_wi': np.float64(1.0073736460633846), 'min_wi': np.float64(0.9911324970867667)}
Cross-validated error: 0.014752217551892268
[I 2025-10-03 09:04:33,252] Trial 8 finished with value: 0.10673330974576295 and parameters: {'lr': 0.00010345022701272071, 'num_epochs': 1, 'batch_size': 64, 'num_neighbors': 14, 'lr_decay': 0.8881793895906743}. Best is trial 8 with value: 0.10673330974576295.


Best trial: 8. Best value: 0.106733: 100%|██████████| 10/10 [00:17<00:00,  1.75s/it]

{'gini': np.float64(0.28543640009430443), 'ess': np.float64(7974.400590125582), 'max_wi': np.float64(2.5293325590846303), 'min_wi': np.float64(0.19175063846143017)}
Cross-validated error: 0.013476597817346378
[I 2025-10-03 09:04:34,975] Trial 9 finished with value: 0.1025708583967847 and parameters: {'lr': 0.005907569351334126, 'num_epochs': 7, 'batch_size': 128, 'num_neighbors': 14, 'lr_decay': 0.8695297774912722}. Best is trial 8 with value: 0.10673330974576295.



[I 2025-10-03 09:04:37,424] A new study created in memory with name: no-name-701a1824-b821-4631-bbae-b2290acd80c8
Best trial: 0. Best value: 0.0795214:  10%|█         | 1/10 [00:02<00:22,  2.47s/it]

{'gini': np.float64(0.002772831362944684), 'ess': np.float64(9999.76038383962), 'max_wi': np.float64(1.0148871984109997), 'min_wi': np.float64(0.9880227690415693)}
Cross-validated error: 0.007595475893131367
[I 2025-10-03 09:04:39,895] Trial 0 finished with value: 0.07952137913336883 and parameters: {'lr': 0.00010345022701272071, 'num_epochs': 1, 'batch_size': 64, 'num_neighbors': 14, 'lr_decay': 0.8881793895906743}. Best is trial 0 with value: 0.07952137913336883.


Best trial: 1. Best value: 0.0833523:  20%|██        | 2/10 [00:05<00:21,  2.68s/it]

{'gini': np.float64(0.5012463984508964), 'ess': np.float64(5275.617609694113), 'max_wi': np.float64(6.79090547613333), 'min_wi': np.float64(0.030999036488344334)}
Cross-validated error: 0.00843274946194202
[I 2025-10-03 09:04:42,725] Trial 1 finished with value: 0.08335233560141393 and parameters: {'lr': 0.0073321584275482885, 'num_epochs': 8, 'batch_size': 256, 'num_neighbors': 9, 'lr_decay': 0.98230026520774}. Best is trial 1 with value: 0.08335233560141393.


Best trial: 1. Best value: 0.0833523:  30%|███       | 3/10 [00:08<00:19,  2.76s/it]

{'gini': np.float64(0.056131073158623444), 'ess': np.float64(9905.757412637833), 'max_wi': np.float64(1.2910781279506485), 'min_wi': np.float64(0.7543757642766973)}
Cross-validated error: 0.007687618122982198
[I 2025-10-03 09:04:45,578] Trial 2 finished with value: 0.0799538351983148 and parameters: {'lr': 0.0015154769612196809, 'num_epochs': 7, 'batch_size': 512, 'num_neighbors': 14, 'lr_decay': 0.9810747837583759}. Best is trial 1 with value: 0.08335233560141393.


Best trial: 1. Best value: 0.0833523:  40%|████      | 4/10 [00:10<00:16,  2.78s/it]

{'gini': np.float64(0.0754080369523069), 'ess': np.float64(9811.3362479222), 'max_wi': np.float64(1.554697768138436), 'min_wi': np.float64(0.6839322972349889)}
Cross-validated error: 0.007596106861375402
[I 2025-10-03 09:04:48,387] Trial 3 finished with value: 0.07954878221727979 and parameters: {'lr': 0.0006195777575620426, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 12, 'lr_decay': 0.9886195024209409}. Best is trial 1 with value: 0.08335233560141393.


Best trial: 4. Best value: 0.0839697:  50%|█████     | 5/10 [00:13<00:13,  2.80s/it]

{'gini': np.float64(0.5819087511493397), 'ess': np.float64(4027.7198110287563), 'max_wi': np.float64(8.965068074465362), 'min_wi': np.float64(0.015154260531354434)}
Cross-validated error: 0.008571513529906573
[I 2025-10-03 09:04:51,219] Trial 4 finished with value: 0.08396972735897931 and parameters: {'lr': 0.00795480543369898, 'num_epochs': 6, 'batch_size': 128, 'num_neighbors': 8, 'lr_decay': 0.8913347707036113}. Best is trial 4 with value: 0.08396972735897931.


Best trial: 4. Best value: 0.0839697:  60%|██████    | 6/10 [00:16<00:11,  2.83s/it]

{'gini': np.float64(0.018427597558064085), 'ess': np.float64(9989.283196470373), 'max_wi': np.float64(1.1119075026120946), 'min_wi': np.float64(0.9137309545430227)}
Cross-validated error: 0.0071568737159690525
[I 2025-10-03 09:04:54,110] Trial 5 finished with value: 0.07740312503936109 and parameters: {'lr': 0.0003147622027196243, 'num_epochs': 8, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.8074892044111905}. Best is trial 4 with value: 0.08396972735897931.


Best trial: 4. Best value: 0.0839697:  70%|███████   | 7/10 [00:19<00:08,  2.81s/it]

{'gini': np.float64(0.04449917007831878), 'ess': np.float64(9940.588860548267), 'max_wi': np.float64(1.2171975862036133), 'min_wi': np.float64(0.7880997967561271)}
Cross-validated error: 0.0073065304852222455
[I 2025-10-03 09:04:56,885] Trial 6 finished with value: 0.07808736739948151 and parameters: {'lr': 0.002106320948396899, 'num_epochs': 5, 'batch_size': 512, 'num_neighbors': 3, 'lr_decay': 0.8689080785340113}. Best is trial 4 with value: 0.08396972735897931.


Best trial: 4. Best value: 0.0839697:  80%|████████  | 8/10 [00:22<00:05,  2.80s/it]

{'gini': np.float64(0.13191344186476273), 'ess': np.float64(9470.489501519469), 'max_wi': np.float64(1.9636313836902264), 'min_wi': np.float64(0.5085553412723651)}
Cross-validated error: 0.007478040196128869
[I 2025-10-03 09:04:59,659] Trial 7 finished with value: 0.07899955405917103 and parameters: {'lr': 0.002363136473210058, 'num_epochs': 4, 'batch_size': 128, 'num_neighbors': 7, 'lr_decay': 0.9369094341949561}. Best is trial 4 with value: 0.08396972735897931.


Best trial: 4. Best value: 0.0839697:  90%|█████████ | 9/10 [00:25<00:02,  2.80s/it]

{'gini': np.float64(0.15519323630230425), 'ess': np.float64(9317.429914413362), 'max_wi': np.float64(1.905046089397706), 'min_wi': np.float64(0.47692792422111147)}
Cross-validated error: 0.007454707369446352
[I 2025-10-03 09:05:02,474] Trial 8 finished with value: 0.07882138242172783 and parameters: {'lr': 0.003079982352785121, 'num_epochs': 9, 'batch_size': 512, 'num_neighbors': 7, 'lr_decay': 0.9943979053405076}. Best is trial 4 with value: 0.08396972735897931.


Best trial: 4. Best value: 0.0839697: 100%|██████████| 10/10 [00:27<00:00,  2.77s/it]

{'gini': np.float64(0.029529658558914207), 'ess': np.float64(9973.600365096714), 'max_wi': np.float64(1.16327887020564), 'min_wi': np.float64(0.8635324705313909)}
Cross-validated error: 0.007368511263111573
[I 2025-10-03 09:05:05,118] Trial 9 finished with value: 0.07844439257565412 and parameters: {'lr': 0.0017616242014086862, 'num_epochs': 2, 'batch_size': 256, 'num_neighbors': 9, 'lr_decay': 0.8158195909907242}. Best is trial 4 with value: 0.08396972735897931.



[I 2025-10-03 09:05:08,683] A new study created in memory with name: no-name-d8de4f7b-b79a-44b7-9d16-32df44e76437
Best trial: 0. Best value: 0.0893808:  10%|█         | 1/10 [00:05<00:52,  5.86s/it]

{'gini': np.float64(0.841106933792803), 'ess': np.float64(1461.9620792556957), 'max_wi': np.float64(29.591045165680463), 'min_wi': np.float64(2.619701492959311e-05)}
Cross-validated error: 0.00984201030484691
[I 2025-10-03 09:05:14,545] Trial 0 finished with value: 0.08938079104680804 and parameters: {'lr': 0.00795480543369898, 'num_epochs': 6, 'batch_size': 128, 'num_neighbors': 8, 'lr_decay': 0.8913347707036113}. Best is trial 0 with value: 0.08938079104680804.


Best trial: 0. Best value: 0.0893808:  20%|██        | 2/10 [00:11<00:48,  6.01s/it]

{'gini': np.float64(0.0362589626579962), 'ess': np.float64(9957.14608798555), 'max_wi': np.float64(1.2931966440488933), 'min_wi': np.float64(0.8547468246569041)}
Cross-validated error: 0.00730093711960858
[I 2025-10-03 09:05:20,659] Trial 1 finished with value: 0.07809690865185973 and parameters: {'lr': 0.00022585448695936015, 'num_epochs': 9, 'batch_size': 128, 'num_neighbors': 9, 'lr_decay': 0.938472947477785}. Best is trial 0 with value: 0.08938079104680804.


Best trial: 0. Best value: 0.0893808:  30%|███       | 3/10 [00:17<00:41,  5.88s/it]

{'gini': np.float64(0.009565839588718867), 'ess': np.float64(9997.095752281804), 'max_wi': np.float64(1.0662123361466949), 'min_wi': np.float64(0.9561782969395795)}
Cross-validated error: 0.007331079948580098
[I 2025-10-03 09:05:26,382] Trial 2 finished with value: 0.07825855086636581 and parameters: {'lr': 0.00021333924230114757, 'num_epochs': 2, 'batch_size': 128, 'num_neighbors': 13, 'lr_decay': 0.8882139516630656}. Best is trial 0 with value: 0.08938079104680804.


Best trial: 0. Best value: 0.0893808:  40%|████      | 4/10 [00:23<00:35,  5.90s/it]

{'gini': np.float64(0.3319954915111625), 'ess': np.float64(7400.4250391389805), 'max_wi': np.float64(3.503414721609576), 'min_wi': np.float64(0.1442109577259517)}
Cross-validated error: 0.007226816612123165
[I 2025-10-03 09:05:32,323] Trial 3 finished with value: 0.07776394292309895 and parameters: {'lr': 0.008049213927223568, 'num_epochs': 7, 'batch_size': 512, 'num_neighbors': 9, 'lr_decay': 0.8056911436491063}. Best is trial 0 with value: 0.08938079104680804.


Best trial: 0. Best value: 0.0893808:  50%|█████     | 5/10 [00:29<00:29,  5.91s/it]

{'gini': np.float64(0.20781883291472775), 'ess': np.float64(8699.616312312388), 'max_wi': np.float64(3.1070863003580165), 'min_wi': np.float64(0.32503626273832203)}
Cross-validated error: 0.00732103722799157
[I 2025-10-03 09:05:38,246] Trial 4 finished with value: 0.07818558369216848 and parameters: {'lr': 0.001089495665304142, 'num_epochs': 7, 'batch_size': 128, 'num_neighbors': 14, 'lr_decay': 0.9940761420547026}. Best is trial 0 with value: 0.08938079104680804.


Best trial: 0. Best value: 0.0893808:  60%|██████    | 6/10 [00:35<00:23,  5.90s/it]

{'gini': np.float64(0.17844606117170883), 'ess': np.float64(9106.279250692469), 'max_wi': np.float64(2.161072244802891), 'min_wi': np.float64(0.3563673005079405)}
Cross-validated error: 0.007134744648206994
[I 2025-10-03 09:05:44,117] Trial 5 finished with value: 0.0772885049832056 and parameters: {'lr': 0.004188602780854737, 'num_epochs': 5, 'batch_size': 512, 'num_neighbors': 13, 'lr_decay': 0.9293308165684702}. Best is trial 0 with value: 0.08938079104680804.


Best trial: 0. Best value: 0.0893808:  70%|███████   | 7/10 [00:41<00:17,  5.92s/it]

{'gini': np.float64(0.0385983164143349), 'ess': np.float64(9949.092019053296), 'max_wi': np.float64(1.3647130727162224), 'min_wi': np.float64(0.8426297193463127)}
Cross-validated error: 0.007342303673120169
[I 2025-10-03 09:05:50,095] Trial 6 finished with value: 0.07829908040715065 and parameters: {'lr': 0.00022126102360956916, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 13, 'lr_decay': 0.8769894531392292}. Best is trial 0 with value: 0.08938079104680804.


Best trial: 0. Best value: 0.0893808:  80%|████████  | 8/10 [00:46<00:11,  5.78s/it]

{'gini': np.float64(0.0009703734369004563), 'ess': np.float64(9999.971398785907), 'max_wi': np.float64(1.005130437694238), 'min_wi': np.float64(0.9954927761860746)}
Cross-validated error: 0.007186608468676459
[I 2025-10-03 09:05:55,559] Trial 7 finished with value: 0.07754886725775735 and parameters: {'lr': 0.00010320122395725006, 'num_epochs': 1, 'batch_size': 512, 'num_neighbors': 6, 'lr_decay': 0.8532449669716528}. Best is trial 0 with value: 0.08938079104680804.


Best trial: 0. Best value: 0.0893808:  90%|█████████ | 9/10 [00:52<00:05,  5.86s/it]

{'gini': np.float64(0.02707598863575874), 'ess': np.float64(9977.314785273156), 'max_wi': np.float64(1.1719865219133354), 'min_wi': np.float64(0.879962673491328)}
Cross-validated error: 0.007154471056209168
[I 2025-10-03 09:06:01,594] Trial 8 finished with value: 0.07738605490479913 and parameters: {'lr': 0.00029579503331995894, 'num_epochs': 10, 'batch_size': 256, 'num_neighbors': 7, 'lr_decay': 0.8991193637126067}. Best is trial 0 with value: 0.08938079104680804.


Best trial: 0. Best value: 0.0893808: 100%|██████████| 10/10 [00:58<00:00,  5.89s/it]

{'gini': np.float64(0.24527607773466578), 'ess': np.float64(8035.924173494639), 'max_wi': np.float64(4.634497004641737), 'min_wi': np.float64(0.2851315399468486)}
Cross-validated error: 0.007285909674997118
[I 2025-10-03 09:06:07,606] Trial 9 finished with value: 0.07804426067665131 and parameters: {'lr': 0.0008617702492453986, 'num_epochs': 7, 'batch_size': 64, 'num_neighbors': 14, 'lr_decay': 0.9460045917777865}. Best is trial 0 with value: 0.08938079104680804.



[I 2025-10-03 09:06:14,536] A new study created in memory with name: no-name-b0887ac0-4368-4c1d-934e-fcab8cde66df
 10%|█         | 1/10 [00:28<04:14, 28.29s/it]

{'gini': np.float64(nan), 'ess': np.float64(nan), 'max_wi': np.float64(nan), 'min_wi': np.float64(nan)}
Cross-validated error: nan
[W 2025-10-03 09:06:42,825] Trial 0 failed with parameters: {'lr': 0.00795480543369898, 'num_epochs': 6, 'batch_size': 128, 'num_neighbors': 8, 'lr_decay': 0.8913347707036113} because of the following error: The value nan is not acceptable.
[W 2025-10-03 09:06:42,827] Trial 0 failed with value np.float64(nan).


Best trial: 1. Best value: 0.0781391:  20%|██        | 2/10 [00:55<03:40, 27.56s/it]

{'gini': np.float64(0.535360828029319), 'ess': np.float64(1226.9314123033573), 'max_wi': np.float64(70.8424501327653), 'min_wi': np.float64(0.027271086248446635)}
Cross-validated error: 0.00728706668280044
[I 2025-10-03 09:07:09,872] Trial 1 finished with value: 0.07813910325219577 and parameters: {'lr': 0.0013201428501364593, 'num_epochs': 2, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.8786894696853078}. Best is trial 1 with value: 0.07813910325219577.


Best trial: 2. Best value: 0.0798523:  30%|███       | 3/10 [01:23<03:14, 27.79s/it]

{'gini': np.float64(0.12562386924181257), 'ess': np.float64(9488.51729319433), 'max_wi': np.float64(2.6562055019526416), 'min_wi': np.float64(0.4719375072087547)}
Cross-validated error: 0.007660459717774641
[I 2025-10-03 09:07:37,927] Trial 2 finished with value: 0.07985226995464018 and parameters: {'lr': 0.0009913379542560517, 'num_epochs': 3, 'batch_size': 256, 'num_neighbors': 4, 'lr_decay': 0.8797665662443057}. Best is trial 2 with value: 0.07985226995464018.


Best trial: 2. Best value: 0.0798523:  40%|████      | 4/10 [01:52<02:49, 28.23s/it]

{'gini': np.float64(0.6510579547637096), 'ess': np.float64(2194.5111535816022), 'max_wi': np.float64(28.553744843491287), 'min_wi': np.float64(0.0069484255745525785)}
Cross-validated error: 0.0071874654677722485
[I 2025-10-03 09:08:06,846] Trial 3 finished with value: 0.07760414989335102 and parameters: {'lr': 0.0008846749056693306, 'num_epochs': 10, 'batch_size': 128, 'num_neighbors': 3, 'lr_decay': 0.8807010813390942}. Best is trial 2 with value: 0.07985226995464018.


Best trial: 2. Best value: 0.0798523:  50%|█████     | 5/10 [02:20<02:21, 28.32s/it]

{'gini': np.float64(0.9453918762223533), 'ess': np.float64(552.035084401987), 'max_wi': np.float64(59.21626808539449), 'min_wi': np.float64(2.3459926472904433e-17)}
Cross-validated error: 0.0045836652969217925
[I 2025-10-03 09:08:35,304] Trial 4 finished with value: 0.06304506418162197 and parameters: {'lr': 0.005980219544379472, 'num_epochs': 9, 'batch_size': 512, 'num_neighbors': 11, 'lr_decay': 0.983311202790802}. Best is trial 2 with value: 0.07985226995464018.


Best trial: 2. Best value: 0.0798523:  60%|██████    | 6/10 [02:47<01:51, 27.93s/it]

{'gini': np.float64(0.006536197341082572), 'ess': np.float64(9998.636921312996), 'max_wi': np.float64(1.0589804899952393), 'min_wi': np.float64(0.963206047454218)}
Cross-validated error: 0.007524693880454322
[I 2025-10-03 09:09:02,492] Trial 5 finished with value: 0.07920276713616475 and parameters: {'lr': 0.00015678956842466558, 'num_epochs': 2, 'batch_size': 512, 'num_neighbors': 3, 'lr_decay': 0.9390826773947627}. Best is trial 2 with value: 0.07985226995464018.


Best trial: 2. Best value: 0.0798523:  70%|███████   | 7/10 [03:15<01:23, 27.77s/it]

{'gini': np.float64(nan), 'ess': np.float64(nan), 'max_wi': np.float64(nan), 'min_wi': np.float64(nan)}
Cross-validated error: nan
[W 2025-10-03 09:09:29,920] Trial 6 failed with parameters: {'lr': 0.007676209125218136, 'num_epochs': 4, 'batch_size': 128, 'num_neighbors': 10, 'lr_decay': 0.8592285471089017} because of the following error: The value nan is not acceptable.
[W 2025-10-03 09:09:29,923] Trial 6 failed with value np.float64(nan).


Best trial: 7. Best value: 0.0799319:  80%|████████  | 8/10 [03:44<00:56, 28.19s/it]

{'gini': np.float64(0.0788785959126813), 'ess': np.float64(9797.205637736415), 'max_wi': np.float64(2.02572265340848), 'min_wi': np.float64(0.6416322910017295)}
Cross-validated error: 0.007673947941696247
[I 2025-10-03 09:09:59,015] Trial 7 finished with value: 0.0799319150009248 and parameters: {'lr': 0.00040798363326324767, 'num_epochs': 7, 'batch_size': 256, 'num_neighbors': 4, 'lr_decay': 0.8622207092390923}. Best is trial 7 with value: 0.0799319150009248.


Best trial: 8. Best value: 0.0808558:  90%|█████████ | 9/10 [04:11<00:27, 27.89s/it]

{'gini': np.float64(0.40714000772837533), 'ess': np.float64(5091.372957985714), 'max_wi': np.float64(15.314586640816756), 'min_wi': np.float64(0.08757745002198282)}
Cross-validated error: 0.007882992605670243
[I 2025-10-03 09:10:26,262] Trial 8 finished with value: 0.08085583957715733 and parameters: {'lr': 0.001057751687141123, 'num_epochs': 4, 'batch_size': 128, 'num_neighbors': 11, 'lr_decay': 0.9292649182060168}. Best is trial 8 with value: 0.08085583957715733.


Best trial: 8. Best value: 0.0808558: 100%|██████████| 10/10 [04:40<00:00, 28.03s/it]

{'gini': np.float64(0.020943648867416242), 'ess': np.float64(9986.07946001195), 'max_wi': np.float64(1.194614630915409), 'min_wi': np.float64(0.8942020637662148)}
Cross-validated error: 0.007760523172595296
[I 2025-10-03 09:10:54,805] Trial 9 finished with value: 0.08033928584504936 and parameters: {'lr': 0.00020385431878927853, 'num_epochs': 9, 'batch_size': 512, 'num_neighbors': 9, 'lr_decay': 0.8218954825453086}. Best is trial 8 with value: 0.08085583957715733.



[I 2025-10-03 09:11:23,170] A new study created in memory with name: no-name-c77a1850-28bb-46c7-b70f-c715a39ae206
Best trial: 0. Best value: 0.0817541:  10%|█         | 1/10 [00:53<08:05, 53.96s/it]

{'gini': np.float64(0.8351326349754516), 'ess': np.float64(764.4131774056045), 'max_wi': np.float64(106.2450102190157), 'min_wi': np.float64(0.0005061625204629458)}
Cross-validated error: 0.00808339079554762
[I 2025-10-03 09:12:17,129] Trial 0 finished with value: 0.08175410476620434 and parameters: {'lr': 0.001057751687141123, 'num_epochs': 4, 'batch_size': 128, 'num_neighbors': 11, 'lr_decay': 0.9292649182060168}. Best is trial 0 with value: 0.08175410476620434.


Best trial: 0. Best value: 0.0817541:  20%|██        | 2/10 [01:49<07:21, 55.14s/it]

{'gini': np.float64(nan), 'ess': np.float64(nan), 'max_wi': np.float64(nan), 'min_wi': np.float64(nan)}
Cross-validated error: nan
[W 2025-10-03 09:13:13,088] Trial 1 failed with parameters: {'lr': 0.0061661661678437075, 'num_epochs': 6, 'batch_size': 256, 'num_neighbors': 10, 'lr_decay': 0.9626892878404031} because of the following error: The value nan is not acceptable.
[W 2025-10-03 09:13:13,089] Trial 1 failed with value np.float64(nan).


Best trial: 0. Best value: 0.0817541:  30%|███       | 3/10 [02:43<06:22, 54.58s/it]

{'gini': np.float64(0.01968092083239589), 'ess': np.float64(9987.115034806928), 'max_wi': np.float64(1.3207703663012333), 'min_wi': np.float64(0.888080271983963)}
Cross-validated error: 0.00796695965062322
[I 2025-10-03 09:14:07,014] Trial 2 finished with value: 0.08125670376573482 and parameters: {'lr': 0.00012367679189982797, 'num_epochs': 4, 'batch_size': 256, 'num_neighbors': 8, 'lr_decay': 0.882107293352424}. Best is trial 0 with value: 0.08175410476620434.


Best trial: 0. Best value: 0.0817541:  40%|████      | 4/10 [03:37<05:24, 54.10s/it]

{'gini': np.float64(0.9120560137664325), 'ess': np.float64(744.3561817022219), 'max_wi': np.float64(39.8967183943896), 'min_wi': np.float64(1.4790333330551482e-05)}
Cross-validated error: 0.006859825674867071
[I 2025-10-03 09:15:00,364] Trial 3 finished with value: 0.07597110207377238 and parameters: {'lr': 0.006328923045314836, 'num_epochs': 1, 'batch_size': 256, 'num_neighbors': 7, 'lr_decay': 0.8940341041961013}. Best is trial 0 with value: 0.08175410476620434.


Best trial: 0. Best value: 0.0817541:  50%|█████     | 5/10 [04:33<04:34, 54.94s/it]

{'gini': np.float64(nan), 'ess': np.float64(nan), 'max_wi': np.float64(nan), 'min_wi': np.float64(nan)}
Cross-validated error: nan
[W 2025-10-03 09:15:56,803] Trial 4 failed with parameters: {'lr': 0.007576345780520827, 'num_epochs': 5, 'batch_size': 256, 'num_neighbors': 15, 'lr_decay': 0.8234595841193371} because of the following error: The value nan is not acceptable.
[W 2025-10-03 09:15:56,805] Trial 4 failed with value np.float64(nan).


Best trial: 0. Best value: 0.0817541:  60%|██████    | 6/10 [05:31<03:44, 56.08s/it]

{'gini': np.float64(0.1002654283614938), 'ess': np.float64(9510.438546016274), 'max_wi': np.float64(6.920661326347152), 'min_wi': np.float64(0.49662729217687646)}
Cross-validated error: 0.007758852180265147
[I 2025-10-03 09:16:55,096] Trial 5 finished with value: 0.08031600881136369 and parameters: {'lr': 0.00020382149357573188, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 5, 'lr_decay': 0.8026911032402457}. Best is trial 0 with value: 0.08175410476620434.


Best trial: 0. Best value: 0.0817541:  70%|███████   | 7/10 [06:28<02:48, 56.31s/it]

{'gini': np.float64(0.010500674427019033), 'ess': np.float64(9996.277883025088), 'max_wi': np.float64(1.171213011436049), 'min_wi': np.float64(0.9310729472454724)}
Cross-validated error: 0.007953930698028737
[I 2025-10-03 09:17:51,871] Trial 6 finished with value: 0.08119313127832697 and parameters: {'lr': 0.00012752215572038188, 'num_epochs': 2, 'batch_size': 256, 'num_neighbors': 9, 'lr_decay': 0.8364311284807797}. Best is trial 0 with value: 0.08175410476620434.


Best trial: 0. Best value: 0.0817541:  80%|████████  | 8/10 [07:28<01:54, 57.34s/it]

{'gini': np.float64(0.05046990367033756), 'ess': np.float64(9915.400119613267), 'max_wi': np.float64(1.8631653683862253), 'min_wi': np.float64(0.6918292872884597)}
Cross-validated error: 0.007830978076501276
[I 2025-10-03 09:18:51,421] Trial 7 finished with value: 0.08061945093113865 and parameters: {'lr': 0.00018578800977892202, 'num_epochs': 8, 'batch_size': 256, 'num_neighbors': 7, 'lr_decay': 0.8573844525961813}. Best is trial 0 with value: 0.08175410476620434.


Best trial: 0. Best value: 0.0817541:  90%|█████████ | 9/10 [08:26<00:57, 57.66s/it]

{'gini': np.float64(0.5021395341181711), 'ess': np.float64(1040.8720809691808), 'max_wi': np.float64(139.36428228469367), 'min_wi': np.float64(0.03457216270397974)}
Cross-validated error: 0.0074079715325781155
[I 2025-10-03 09:19:49,798] Trial 8 finished with value: 0.07863817698892177 and parameters: {'lr': 0.00040899236168407907, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 7, 'lr_decay': 0.8595473920713887}. Best is trial 0 with value: 0.08175410476620434.


Best trial: 0. Best value: 0.0817541: 100%|██████████| 10/10 [09:25<00:00, 56.53s/it]

{'gini': np.float64(0.0538048045794857), 'ess': np.float64(9893.110912276441), 'max_wi': np.float64(2.5949051633638556), 'min_wi': np.float64(0.6948755192897506)}
Cross-validated error: 0.007703712748694646
[I 2025-10-03 09:20:48,423] Trial 9 finished with value: 0.08005771635175683 and parameters: {'lr': 0.0003482629812230131, 'num_epochs': 2, 'batch_size': 128, 'num_neighbors': 3, 'lr_decay': 0.9148168178474805}. Best is trial 0 with value: 0.08175410476620434.






=== BEST HYPERPARAMETERS BY TRAINING SIZE ===

Training Size: 500
Parameters:


KeyError: 'params'

In [7]:
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.08610747,0.08611765,0.08751241,0.09178279,0.09119538,0.08897455,0.7569287,0.0,0.87627132,0.0
500,0.08610803,0.0851855,0.10581378,0.12150808,0.11026441,0.06799217,0.75692486,0.00025455,0.87627243,0.00012422
1000,0.08654208,0.0851555,0.08791228,0.0901107,0.09101994,0.09363095,0.76531032,0.12496686,0.87972439,0.05553527
2000,0.08734578,0.10292059,0.08114754,0.08605725,0.09373308,0.1089554,0.78380759,0.21309293,0.89112363,0.09057986
10000,0.08616457,0.08526466,0.08415045,0.08838887,0.08738445,0.08521387,0.75946979,0.05993718,0.87631002,0.02535974
20000,0.08658768,0.10000923,0.08665215,0.09042937,0.09040613,0.09037048,0.76479957,0.09941219,0.88121865,0.04027442


In [None]:
# Show the performance metrics
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]


Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.08610747,0.1069709,0.09051612,0.09112201,0.09452505,0.10672373,0.7569287,0.0,0.87627132,0.0
500,0.08705604,0.09221834,,0.08299331,0.08198609,0.07599146,0.79170973,0.24615559,0.88427728,0.08661758
1000,0.08939145,0.11301958,,0.08679147,0.09052395,0.10668506,1.01853061,0.76340735,0.91464524,0.19321758
2000,0.09251861,0.10603409,,0.09028676,0.15397776,0.10628439,1.73862067,1.70789298,0.99652312,0.34170287
10000,0.09268524,0.09704712,,0.09829317,0.09539229,0.09257621,2.18938809,2.22344507,1.03555944,0.40151858
20000,0.09264639,0.09493701,,0.09216787,0.09134166,0.09056984,2.21242505,2.24879912,1.03520993,0.40065441
