In [15]:
import warnings
warnings.filterwarnings("ignore")
from copy import deepcopy
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import sys

sys.path.append("/code")

from tqdm import tqdm
import torch
# device = torch.device('cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# import gym
# import recogym

import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

torch.backends.cudnn.benchmark = torch.cuda.is_available()
if torch.cuda.is_available():
    torch.set_float32_matmul_precision("high")  # TF32 = big speedup on Ada


from sklearn.utils import check_random_state

# implementing OPE of the IPWLearner using synthetic bandit data
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt

from scipy.special import softmax
import optuna
# from memory_profiler import profile


from estimators import (
    DirectMethod as DM
)

from simulation_utils import (
    eval_policy,
    generate_dataset,
    create_simulation_data_from_pi,
    get_train_data,
    get_opl_results_dict,
    CustomCFDataset,
    calc_reward,
    get_weights_info
)

from models import (    
    LinearCFModel,
    NeighborhoodModel,
    BPRModel, 
    RegressionModel
)

from training_utils import (
    train,
    validation_loop, 
    cv_score_model
 )

from custom_losses import (
    SNDRPolicyLoss,
    IPWPolicyLoss
    )

random_state=12345
random_ = check_random_state(random_state)

pd.options.display.float_format = '{:,.8f}'.format

Using device: cpu


In [16]:
def get_trial_results(
    our_x, 
    our_a, 
    emb_x, 
    emb_a, 
    original_x, 
    original_a, 
    dataset, 
    val_data, 
    original_policy_prob, 
    neighberhoodmodel, 
    regression_model, 
    dm
):
    policy = np.expand_dims(softmax(our_x @ our_a.T, axis=1), -1)
    policy_reward = calc_reward(dataset, policy)
    eval_metrics = eval_policy(neighberhoodmodel, val_data, original_policy_prob, policy)
    action_diff_to_real = np.sqrt(np.mean((emb_a - our_a) ** 2))
    action_delta = np.sqrt(np.mean((original_a - our_a) ** 2))
    context_diff_to_real = np.sqrt(np.mean((emb_x - our_x) ** 2))
    context_delta = np.sqrt(np.mean((original_x - our_x) ** 2))

    row = np.concatenate([
        np.atleast_1d(policy_reward),
        np.atleast_1d(eval_metrics),
        np.atleast_1d(action_diff_to_real),
        np.atleast_1d(action_delta),
        np.atleast_1d(context_diff_to_real),
        np.atleast_1d(context_delta)
    ])
    reg_dm = dm.estimate_policy_value(policy[val_data['x_idx']], regression_model.predict(val_data['x']))
    reg_results = np.array([reg_dm])
    conv_results = np.array([row])
    return get_opl_results_dict(reg_results, conv_results)

## `trainer_trial` Function

This function runs policy learning experiments using offline bandit data and evaluates various estimators.

### Parameters
- **num_runs** (int): Number of experimental runs per training size
- **num_neighbors** (int): Number of neighbors to consider in the neighborhood model
- **num_rounds_list** (list): List of training set sizes to evaluate
- **dataset** (dict): Contains dataset information including embeddings, action probabilities, and reward probabilities
- **batch_size** (int): Batch size for training the policy model
- **num_epochs** (int): Number of training epochs for each experiment
- **lr** (float, default=0.001): Learning rate for the optimizer

### Process Flow
1. Initializes result structures and retrieval models
2. For each training size in `num_rounds_list`:
   - Creates a uniform logging policy and simulates data
   - Generates training data for offline learning
   - Fits regression and neighborhood models for reward estimation
   - Initializes and trains a counterfactual policy model
   - Evaluates policy performance using various estimators
   - Collects metrics on policy reward and embedding quality

### Returns
- **DataFrame**: Results table with rows indexed by training size and columns for various metrics:
  - `policy_rewards`: True expected reward of the learned policy
  - Various estimator errors (`ipw`, `reg_dm`, `conv_dm`, `conv_dr`, `conv_sndr`)
  - Variance metrics for each estimator
  - Embedding quality metrics comparing learned representations to ground truth

### Implementation Notes
- Uses uniform random logging policy for collecting offline data
- Employs Self-Normalized Doubly Robust (SNDR) policy learning
- Measures embedding quality via RMSE to original/ground truth embeddings

In [17]:
def trainer_trial(
    num_runs,
    num_neighbors,
    train_sizes,
    dataset,
    batch_size,
    val_size=2000,
    n_trials=10,    
    prev_best_params=None
):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    torch.backends.cudnn.benchmark = torch.cuda.is_available()
    if torch.cuda.is_available():
        torch.set_float32_matmul_precision("high")

    dm = DM()
    results = {}

    our_x, our_a = dataset["our_x"], dataset["our_a"]
    emb_x, emb_a = dataset["emb_x"], dataset["emb_a"]

    original_x, original_a = dataset["original_x"], dataset["original_a"]
    n_users, n_actions, emb_dim = dataset["n_users"], dataset["n_actions"], dataset["emb_dim"]

    all_user_indices = np.arange(n_users, dtype=np.int64)

    def T(x):
        return torch.as_tensor(x, device=device, dtype=torch.float32)

    def _mean_dict(dicts):
        """
        Robust mean over a list of dicts with numeric/scalar/1D-array values.
        Returns a single dict with elementwise means.
        """
        if not dicts:
            return {}
        keys = dicts[0].keys()
        out = {}
        for k in keys:
            vals = [d[k] for d in dicts if k in d]
            # try to convert each to np.array and average
            arrs = [np.asarray(v) for v in vals]
            # broadcast to same shape if scalars/1D
            stacked = np.stack(arrs, axis=0)
            out[k] = np.mean(stacked, axis=0)
        return out

    # ===== unpack dataset (keep originals safe) =====
    our_x_orig, our_a_orig = our_x, our_a
    emb_x, emb_a = emb_x, emb_a
    original_x, original_a = original_x, original_a
    n_users, n_actions, emb_dim = n_users, n_actions, emb_dim
    all_user_indices = np.arange(n_users, dtype=np.int64)

    dm = DM()
    results = {}
    best_hyperparams_by_size = {}
    last_best_params = prev_best_params if prev_best_params is not None else None

    # ===== baseline (sample size = 0) using get_trial_results =====
    pi_0 = softmax(our_x_orig @ our_a_orig.T, axis=1)
    original_policy_prob = np.expand_dims(pi_0, -1)

    simulation_data = create_simulation_data_from_pi(
        dataset, pi_0, val_size, random_state=0
    )

    # use same data for train/val just to generate the baseline row
    train_data = get_train_data(n_actions, val_size, simulation_data, np.arange(val_size), our_x_orig)
    val_data   = get_train_data(n_actions, val_size, simulation_data, np.arange(val_size), our_x_orig)

    regression_model = RegressionModel(
        n_actions=n_actions, action_context=our_x_orig,
        base_model=LogisticRegression(random_state=12345)
    )

    regression_model.fit(train_data['x'], train_data['a'], train_data['r'])

    neighberhoodmodel = NeighborhoodModel(
        train_data['x_idx'], train_data['a'],
        our_a_orig, our_x_orig, train_data['r'],
        num_neighbors=num_neighbors
    )

    # baseline row produced via get_trial_results
    results[0] = get_trial_results(
        our_x_orig, our_a_orig, emb_x, emb_a, original_x, original_a,
        dataset, val_data, original_policy_prob,
        neighberhoodmodel, regression_model, dm
    )

    # ===== main loop over training sizes =====
    for train_size in train_sizes:

        # we’ll collect per-run trial dicts generated by get_trial_results
        trial_dicts_this_size = []
        best_hyperparams_by_size[train_size] = {}

        # --- prepare a resampling for Optuna’s objective (shared loaders built per-run inside objective) ---
        # We’ll do Optuna per-run (fresh resample + search), then final fit with best params, then get_trial_results.

        for run in range(num_runs):

            # --- resample for this run ---
            pi_0 = softmax(our_x_orig @ our_a_orig.T, axis=1)
            original_policy_prob = np.expand_dims(pi_0, -1)

            simulation_data = create_simulation_data_from_pi(
                dataset, pi_0, train_size + val_size,
                random_state=(run + 1) * (train_size + 17)
            )

            idx_train = np.arange(train_size)
            train_data = get_train_data(n_actions, train_size, simulation_data, idx_train, our_x_orig)
            val_idx   = np.arange(val_size) + train_size
            val_data  = get_train_data(n_actions, val_size, simulation_data, val_idx, our_x_orig)

            num_workers = 4 if torch.cuda.is_available() else 0

            cf_dataset = CustomCFDataset(
                train_data['x_idx'], train_data['a'], train_data['r'], original_policy_prob
            )

            # val_loader = DataLoader(
            #     val_dataset, batch_size=val_size, shuffle=False,
            #     pin_memory=torch.cuda.is_available(),
            #     num_workers=num_workers, persistent_workers=bool(num_workers)
            # )


            # --- Optuna objective bound to this run's data ---
            def objective(trial):
                lr = trial.suggest_float("lr", 1e-4, 1e-1, log=True)
                epochs = trial.suggest_int("num_epochs", 1, 10)
                trial_batch_size = trial.suggest_categorical("batch_size", [64, 128, 256, 512])
                trial_num_neighbors = trial.suggest_int("num_neighbors", 3, 15)
                lr_decay = trial.suggest_float("lr_decay", 0.8, 1.0)

                trial_neigh_model = NeighborhoodModel(
                    train_data['x_idx'], train_data['a'],
                    our_a_orig, our_x_orig, train_data['r'],
                    num_neighbors=trial_num_neighbors
                )

                trial_scores_all = torch.as_tensor(
                    trial_neigh_model.predict(all_user_indices),
                    device=device, dtype=torch.float32
                )

                trial_model = LinearCFModel(
                    n_users, n_actions, emb_dim,
                    initial_user_embeddings=T(our_x_orig),
                    initial_actions_embeddings=T(our_a_orig)
                ).to(device)

                assert (not torch.cuda.is_available()) or next(trial_model.parameters()).is_cuda

                final_train_loader = DataLoader(
                    cf_dataset, batch_size=trial_batch_size, shuffle=True,
                    pin_memory=torch.cuda.is_available(),
                    num_workers=num_workers, persistent_workers=bool(num_workers)
                )

                current_lr = lr
                for epoch in range(epochs):
                    if epoch > 0:
                        current_lr *= lr_decay
                        
                    train(
                        trial_model, final_train_loader, trial_scores_all,
                        criterion=SNDRPolicyLoss(), num_epochs=1, lr=current_lr, device=str(device)
                    )

                trial_x, trial_a = trial_model.get_params()
                trial_x = trial_x.detach().cpu().numpy()
                trial_a = trial_a.detach().cpu().numpy()

                pi_i = softmax(trial_x @ trial_a.T, axis=1)
                train_actions = train_data['a']
                train_users = train_data['x_idx']

                print("Train wi info: {}".format(get_weights_info(pi_i[train_users, train_actions], original_policy_prob[train_users, train_actions])))
                print(f"actual reward: {calc_reward(dataset, np.expand_dims(pi_i, -1))}")

                # print(get_weights_info(pi_i, original_policy_prob))
                # validation reward for selection
                return cv_score_model(val_data, trial_scores_all, pi_i)


            # --- run Optuna for this run ---
            study = optuna.create_study(direction="maximize")
            
            if last_best_params is not None:
                study.enqueue_trial(last_best_params)

            study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

            best_params = study.best_params
            last_best_params = best_params  # optional warm-start to next run
            best_hyperparams_by_size[train_size][run] = {
                "params": best_params,
                "reward": study.best_value
            }


            # --- final training with best params on this run’s data ---
            regression_model = RegressionModel(
                n_actions=n_actions, action_context=our_x_orig,
                base_model=LogisticRegression(random_state=12345)
            )
            regression_model.fit(
                train_data['x'], train_data['a'], train_data['r'],
                original_policy_prob[train_data['x_idx'], train_data['a']].squeeze()
            )

            neighberhoodmodel = NeighborhoodModel(
                train_data['x_idx'], train_data['a'],
                our_a_orig, our_x_orig, train_data['r'],
                num_neighbors=best_params['num_neighbors']
            )
            scores_all = torch.as_tensor(
                neighberhoodmodel.predict(all_user_indices),
                device=device, dtype=torch.float32
            )

            model = LinearCFModel(
                n_users, n_actions, emb_dim,
                initial_user_embeddings=T(our_x_orig),
                initial_actions_embeddings=T(our_a_orig)
            ).to(device)
            assert (not torch.cuda.is_available()) or next(model.parameters()).is_cuda

            train_loader = DataLoader(
                cf_dataset, batch_size=batch_size, shuffle=True,
                pin_memory=torch.cuda.is_available(),
                num_workers=num_workers, persistent_workers=bool(num_workers)
            )

            current_lr = best_params['lr']
            for epoch in range(best_params['num_epochs']):
                if epoch > 0:
                    current_lr *= best_params['lr_decay']
                train(
                    model, train_loader, scores_all,
                    criterion=SNDRPolicyLoss(), num_epochs=1, lr=current_lr, device=str(device)
                )

            # learned embeddings (do NOT overwrite originals)
            learned_x_t, learned_a_t = model.get_params()
            learned_x = learned_x_t.detach().cpu().numpy()
            learned_a = learned_a_t.detach().cpu().numpy()

            # --- produce the per-run result via get_trial_results ---
            trial_res = get_trial_results(
                learned_x, learned_a,          # learned (policy) embeddings
                emb_x, emb_a,                  # ground-truth embedding refs
                original_x, original_a,        # original clean refs
                dataset,
                val_data,                      # use this run's val split
                original_policy_prob,
                neighberhoodmodel,
                regression_model,
                dm
            )

            trial_dicts_this_size.append(trial_res)

            # memory hygiene
            torch.cuda.empty_cache()

        # === aggregate per-run results (mean) and store under this train_size ===
        results[train_size] = _mean_dict(trial_dicts_this_size)

    return pd.DataFrame.from_dict(results, orient='index'), best_hyperparams_by_size

## Learning

We will run several simulations on a generated dataset, the dataset is generated like this:
$$ \text{We have users U and actions A } u_i \sim N(0, I_{emb_dim}) \ a_i \sim N(0, I_{emb_dim})$$
$$ p_{ij} = 1 / (5 + e^{-(u_i.T a_j)}) $$
$$r_{ij} \sim Bin(p_{ij})$$

We have a policy $\pi$
and it's ground truth reward is calculated by
$$R_{gt} = \sum_{i}{\sum_{j}{\pi_{ij} * p_{ij}}} $$

Our parameters for the dataset will be
$$EmbDim = 5$$
$$NumActions= 150$$
$$NumUsers = 150$$
$$NeighborhoodSize = 6$$

to learn a new policy from $\pi$ we will sample from:
$$\pi_{start} = (1-\epsilon)*\pi + \epsilon * \pi_{random}$$

In [18]:
dataset_params = dict(
                    n_actions= 500,
                    n_users = 500,
                    emb_dim = 16,
                    # sigma = 0.1,
                    eps = 0.6, # this is the epsilon for the noise in the ground truth policy representation
                    ctr = 0.1
                    )

train_dataset = generate_dataset(dataset_params)

Random Item CTR: 0.07066414727263938
Optimal greedy CTR: 0.09999926940951757
Optimal Stochastic CTR: 0.09995326955796031
Our Initial CTR: 0.08610747363354625


In [19]:
num_runs = 1
batch_size = 200
num_neighbors = 6
n_trials_for_optuna = 130
# num_rounds_list = [500, 1000, 2000, 10000, 20000]
# num_rounds_list = [500, 1000, 2000]
num_rounds_list = [10000]


# Manually define your best parameters
best_params_to_use = {
    "lr": 0.096,  # Learning rate
    "num_epochs": 5,  # Number of training epochs
    "batch_size": 64,  # Batch size for training
    "num_neighbors": 8,  # Number of neighbors for neighborhood model
    "lr_decay": 0.85  # Learning rate decay factor
}

### 1

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.005$$
$$n_{epochs} = 1$$
$$BatchSize=50$$

In [20]:
print("Value of num_rounds_list:", num_rounds_list)

# Run the optimization
df4, best_hyperparams_by_size = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size, val_size=10000, n_trials=n_trials_for_optuna, prev_best_params=best_params_to_use)

# # Print best hyperparameters for each training size
# print("\n=== BEST HYPERPARAMETERS BY TRAINING SIZE ===")
# for train_size, params in best_hyperparams_by_size.items():
#     print(f"\nTraining Size: {train_size}")
#     # print(f"Best Reward: {params['reward']:.6f}")
#     print("Parameters:")
#     for param_name, value in params['params'].items():
#         print(f"  {param_name}: {value}")
# print("===========================\n")

# Show the performance metrics
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]

Value of num_rounds_list: [10000]
{'gini': np.float64(0.47685131792927526), 'ess': np.float64(4330.110147965239), 'max_wi': np.float64(24.891555218309605), 'min_wi': np.float64(0.01099593205545304)}


[I 2025-10-22 19:25:29,579] A new study created in memory with name: no-name-5a72e571-27a6-4a27-ac37-185bca074dda
Best trial: 0. Best value: 0.155775:   1%|          | 1/130 [00:28<1:01:26, 28.58s/it]

Train wi info: {'gini': np.float64(0.9996856823952184), 'ess': np.float64(3.2630568587040774), 'max_wi': np.float64(1434.2099184445567), 'min_wi': np.float64(0.0)}
actual reward: [0.07809204]
{'gini': np.float64(0.9987791248591928), 'ess': np.float64(13.366944535759918), 'max_wi': np.float64(1521.3761295683719), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.03731290091635575
[I 2025-10-22 19:25:58,157] Trial 0 finished with value: 0.15577512422940984 and parameters: {'lr': 0.096, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.85}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   2%|▏         | 2/130 [00:56<1:00:13, 28.23s/it]

Train wi info: {'gini': np.float64(0.06754522384704473), 'ess': np.float64(9839.738849305188), 'max_wi': np.float64(2.1188916939816917), 'min_wi': np.float64(0.623718830576928)}
actual reward: [0.08617363]
{'gini': np.float64(0.07764902593568582), 'ess': np.float64(9727.268948988665), 'max_wi': np.float64(4.487088834408382), 'min_wi': np.float64(0.6431959289309364)}
Cross-validated error: 0.006964275225297747
[I 2025-10-22 19:26:26,137] Trial 1 finished with value: 0.07648213451423591 and parameters: {'lr': 0.00029666197068718686, 'num_epochs': 3, 'batch_size': 64, 'num_neighbors': 15, 'lr_decay': 0.959404683515283}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   2%|▏         | 3/130 [01:26<1:01:50, 29.22s/it]

Train wi info: {'gini': np.float64(0.9998999999145446), 'ess': np.float64(1.0000008526009097), 'max_wi': np.float64(260.7208470164333), 'min_wi': np.float64(0.0)}
actual reward: [0.07327086]
{'gini': np.float64(0.9998996303570483), 'ess': np.float64(1.0037019041008046), 'max_wi': np.float64(3016.762385830918), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.011026041138672634
[I 2025-10-22 19:26:56,532] Trial 2 finished with value: 0.09401657028924039 and parameters: {'lr': 0.04681531262931554, 'num_epochs': 8, 'batch_size': 256, 'num_neighbors': 7, 'lr_decay': 0.8283186142530213}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   3%|▎         | 4/130 [01:55<1:00:30, 28.82s/it]

Train wi info: {'gini': np.float64(0.9999000000000002), 'ess': np.float64(1.000000000000154), 'max_wi': np.float64(126.59656580050371), 'min_wi': np.float64(0.0)}
actual reward: [0.07385806]
{'gini': np.float64(0.9998802578647803), 'ess': np.float64(1.2148380354131636), 'max_wi': np.float64(3016.762385830918), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.009876388102965374
[I 2025-10-22 19:27:24,734] Trial 3 finished with value: 0.08939465582685147 and parameters: {'lr': 0.07771145163872377, 'num_epochs': 4, 'batch_size': 256, 'num_neighbors': 3, 'lr_decay': 0.934593417029115}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   4%|▍         | 5/130 [02:22<58:58, 28.31s/it]  

Train wi info: {'gini': np.float64(0.7222747494334051), 'ess': np.float64(2294.777286331136), 'max_wi': np.float64(11.559098109759079), 'min_wi': np.float64(0.0012137101508526698)}
actual reward: [0.08734076]
{'gini': np.float64(0.6992609901554078), 'ess': np.float64(2731.318587831988), 'max_wi': np.float64(13.010761831594847), 'min_wi': np.float64(0.0012137101508526698)}
Cross-validated error: 0.007672334824683485
[I 2025-10-22 19:27:52,137] Trial 4 finished with value: 0.07990724666208575 and parameters: {'lr': 0.009362481224345603, 'num_epochs': 2, 'batch_size': 512, 'num_neighbors': 7, 'lr_decay': 0.9050304996631072}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   5%|▍         | 6/130 [02:53<1:00:37, 29.33s/it]

Train wi info: {'gini': np.float64(0.3390092414721803), 'ess': np.float64(6780.599879311425), 'max_wi': np.float64(4.814206515656831), 'min_wi': np.float64(0.06931725870374461)}
actual reward: [0.08636229]
{'gini': np.float64(0.3521290137236276), 'ess': np.float64(6599.212454840106), 'max_wi': np.float64(11.376317355587672), 'min_wi': np.float64(0.08885742933898988)}
Cross-validated error: 0.007594278837929313
[I 2025-10-22 19:28:23,461] Trial 5 finished with value: 0.07954118440584688 and parameters: {'lr': 0.0009347257326737729, 'num_epochs': 8, 'batch_size': 256, 'num_neighbors': 7, 'lr_decay': 0.9456944425454763}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   5%|▌         | 7/130 [03:24<1:01:10, 29.84s/it]

Train wi info: {'gini': np.float64(0.032149979476869366), 'ess': np.float64(9967.975148944319), 'max_wi': np.float64(1.2173771878707684), 'min_wi': np.float64(0.8320777215781443)}
actual reward: [0.0861227]
{'gini': np.float64(0.0339149623151697), 'ess': np.float64(9963.320624105128), 'max_wi': np.float64(1.4075894422931294), 'min_wi': np.float64(0.8320777215781443)}
Cross-validated error: 0.006982634565534203
[I 2025-10-22 19:28:54,347] Trial 6 finished with value: 0.07659570282122788 and parameters: {'lr': 0.0001814549987537259, 'num_epochs': 10, 'batch_size': 512, 'num_neighbors': 5, 'lr_decay': 0.9756937471625889}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   6%|▌         | 8/130 [03:56<1:01:36, 30.30s/it]

Train wi info: {'gini': np.float64(0.9997600358926475), 'ess': np.float64(2.641281883714857), 'max_wi': np.float64(2064.102429750472), 'min_wi': np.float64(5.454082468130782e-38)}
actual reward: [0.07722529]
{'gini': np.float64(0.9995497028336594), 'ess': np.float64(4.83528487947567), 'max_wi': np.float64(712.8785855477456), 'min_wi': np.float64(3.6666230995813864e-37)}
Cross-validated error: 0.0029525411091424108
[I 2025-10-22 19:29:25,621] Trial 7 finished with value: 0.051426900346947226 and parameters: {'lr': 0.021137116629194277, 'num_epochs': 10, 'batch_size': 64, 'num_neighbors': 10, 'lr_decay': 0.8591300678763523}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   7%|▋         | 9/130 [04:25<1:00:34, 30.04s/it]

Train wi info: {'gini': np.float64(0.38070468629798926), 'ess': np.float64(6570.99140857599), 'max_wi': np.float64(3.7142737337843017), 'min_wi': np.float64(0.06052061892808605)}
actual reward: [0.08641592]
{'gini': np.float64(0.3780785971034834), 'ess': np.float64(6685.437198106068), 'max_wi': np.float64(7.301227579693666), 'min_wi': np.float64(0.07735644468699568)}
Cross-validated error: 0.007275144127960875
[I 2025-10-22 19:29:55,093] Trial 8 finished with value: 0.07803456040908213 and parameters: {'lr': 0.0021939906331934823, 'num_epochs': 9, 'batch_size': 512, 'num_neighbors': 11, 'lr_decay': 0.8394647632786477}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   8%|▊         | 10/130 [04:54<59:10, 29.58s/it] 

Train wi info: {'gini': np.float64(0.9452351747343198), 'ess': np.float64(543.3801724471361), 'max_wi': np.float64(44.880011701827875), 'min_wi': np.float64(1.6014438317084853e-10)}
actual reward: [0.08447478]
{'gini': np.float64(0.9306986356386232), 'ess': np.float64(671.4776892826493), 'max_wi': np.float64(58.05366260988869), 'min_wi': np.float64(2.1307522758758392e-10)}
Cross-validated error: 0.009509846811695382
[I 2025-10-22 19:30:23,660] Trial 9 finished with value: 0.08805413017962838 and parameters: {'lr': 0.008961124988001409, 'num_epochs': 4, 'batch_size': 512, 'num_neighbors': 7, 'lr_decay': 0.9978867552310002}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   8%|▊         | 11/130 [05:23<58:17, 29.39s/it]

Train wi info: {'gini': np.float64(0.9956302859971466), 'ess': np.float64(34.34062012006947), 'max_wi': np.float64(237.79155694868712), 'min_wi': np.float64(1.0943111639459285e-14)}
actual reward: [0.0880949]
{'gini': np.float64(0.9904563798775345), 'ess': np.float64(83.47001919202698), 'max_wi': np.float64(403.90602135170104), 'min_wi': np.float64(3.4956056261100856e-14)}
Cross-validated error: 0.010393960372659947
[I 2025-10-22 19:30:52,614] Trial 10 finished with value: 0.09150859865084668 and parameters: {'lr': 0.003589390202659133, 'num_epochs': 6, 'batch_size': 128, 'num_neighbors': 13, 'lr_decay': 0.8739340861067478}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:   9%|▉         | 12/130 [05:52<58:04, 29.53s/it]

Train wi info: {'gini': np.float64(0.9990884759499955), 'ess': np.float64(5.9631468246352215), 'max_wi': np.float64(4122.3729638007335), 'min_wi': np.float64(0.0)}
actual reward: [0.07558322]
{'gini': np.float64(0.9989187527600397), 'ess': np.float64(12.039357449227822), 'max_wi': np.float64(795.4791639443399), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.013460700709983424
[I 2025-10-22 19:31:22,446] Trial 11 finished with value: 0.10249454440467645 and parameters: {'lr': 0.07999167239668471, 'num_epochs': 7, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.8099931534776409}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  10%|█         | 13/130 [06:22<57:36, 29.54s/it]

Train wi info: {'gini': np.float64(0.9994785742072266), 'ess': np.float64(5.564201104147319), 'max_wi': np.float64(4122.3729638007335), 'min_wi': np.float64(0.0)}
actual reward: [0.07998644]
{'gini': np.float64(0.9996688383846023), 'ess': np.float64(3.3122030843895702), 'max_wi': np.float64(3910.7114110370076), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004338826962830659
[I 2025-10-22 19:31:52,023] Trial 12 finished with value: 0.06148146329878484 and parameters: {'lr': 0.09859480932033006, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.8110678368121389}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  11%|█         | 14/130 [06:51<57:01, 29.49s/it]

Train wi info: {'gini': np.float64(0.9994707384556779), 'ess': np.float64(5.025082778407032), 'max_wi': np.float64(3006.7809075401096), 'min_wi': np.float64(2.624108236632245e-35)}
actual reward: [0.07768458]
{'gini': np.float64(0.9993690814268574), 'ess': np.float64(6.824466841430676), 'max_wi': np.float64(1276.0418862234912), 'min_wi': np.float64(1.077541931847414e-36)}
Cross-validated error: 0.008048733794556026
[I 2025-10-22 19:32:21,400] Trial 13 finished with value: 0.0816517363744166 and parameters: {'lr': 0.0257809150935247, 'num_epochs': 7, 'batch_size': 64, 'num_neighbors': 12, 'lr_decay': 0.8005487342102906}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  12%|█▏        | 15/130 [07:21<56:38, 29.55s/it]

Train wi info: {'gini': np.float64(0.9997453750135514), 'ess': np.float64(2.734313696010562), 'max_wi': np.float64(2272.768980697348), 'min_wi': np.float64(0.0)}
actual reward: [0.06759659]
{'gini': np.float64(0.9997541857630575), 'ess': np.float64(2.3727142250709194), 'max_wi': np.float64(3501.2712125636963), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.012254796650292374
[I 2025-10-22 19:32:51,087] Trial 14 finished with value: 0.09843386739569106 and parameters: {'lr': 0.028087068853344853, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.873115690256264}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  12%|█▏        | 16/130 [07:48<54:50, 28.87s/it]

Train wi info: {'gini': np.float64(0.9813120691096441), 'ess': np.float64(126.20156160022692), 'max_wi': np.float64(183.71791507297903), 'min_wi': np.float64(8.728536344046455e-11)}
actual reward: [0.08830834]
{'gini': np.float64(0.9747908579026764), 'ess': np.float64(204.20392918265063), 'max_wi': np.float64(168.2607098540004), 'min_wi': np.float64(2.1917774757917944e-10)}
Cross-validated error: 0.0130819924454561
[I 2025-10-22 19:33:18,365] Trial 15 finished with value: 0.10133979748589354 and parameters: {'lr': 0.009884806863927424, 'num_epochs': 1, 'batch_size': 128, 'num_neighbors': 5, 'lr_decay': 0.8415037081448169}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  13%|█▎        | 17/130 [08:17<54:29, 28.93s/it]

Train wi info: {'gini': np.float64(0.9988686427138215), 'ess': np.float64(11.41490016088526), 'max_wi': np.float64(1025.6118785076742), 'min_wi': np.float64(0.0)}
actual reward: [0.07225123]
{'gini': np.float64(0.9991346050960437), 'ess': np.float64(9.53953426265318), 'max_wi': np.float64(966.72623461283), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.007877028432900467
[I 2025-10-22 19:33:47,446] Trial 16 finished with value: 0.08077226577365437 and parameters: {'lr': 0.09720374040181927, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.9019620311624309}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  14%|█▍        | 18/130 [08:47<54:38, 29.27s/it]

Train wi info: {'gini': np.float64(0.9991794953018187), 'ess': np.float64(8.256996840703842), 'max_wi': np.float64(4122.371489525277), 'min_wi': np.float64(1.6995330790642836e-41)}
actual reward: [0.07998218]
{'gini': np.float64(0.9995980266364531), 'ess': np.float64(3.8232088379764377), 'max_wi': np.float64(3908.2594682753065), 'min_wi': np.float64(2.8260791888621683e-42)}
Cross-validated error: 0.003555895272439787
[I 2025-10-22 19:34:17,521] Trial 17 finished with value: 0.05605985881083216 and parameters: {'lr': 0.03576986328509208, 'num_epochs': 7, 'batch_size': 64, 'num_neighbors': 14, 'lr_decay': 0.8201180365353083}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  15%|█▍        | 19/130 [09:16<53:55, 29.15s/it]

Train wi info: {'gini': np.float64(0.9995349988913298), 'ess': np.float64(4.812393883702236), 'max_wi': np.float64(915.9327923223857), 'min_wi': np.float64(6.108955806963592e-21)}
actual reward: [0.09009965]
{'gini': np.float64(0.9981915395095462), 'ess': np.float64(15.824653050402667), 'max_wi': np.float64(1831.7517391496754), 'min_wi': np.float64(2.2742003986907965e-20)}
Cross-validated error: 0.009203319479865467
[I 2025-10-22 19:34:46,384] Trial 18 finished with value: 0.08669347311022127 and parameters: {'lr': 0.0036672655523955064, 'num_epochs': 7, 'batch_size': 64, 'num_neighbors': 11, 'lr_decay': 0.8536612862245909}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  15%|█▌        | 20/130 [09:44<52:26, 28.61s/it]

Train wi info: {'gini': np.float64(0.9998548694298413), 'ess': np.float64(1.312487080935756), 'max_wi': np.float64(0.7841237204973945), 'min_wi': np.float64(7.446346385897233e-32)}
actual reward: [0.07570145]
{'gini': np.float64(0.999777672401083), 'ess': np.float64(1.547110471027507), 'max_wi': np.float64(2978.9009499430713), 'min_wi': np.float64(7.446346385897233e-32)}
Cross-validated error: 0.009449469491928375
[I 2025-10-22 19:35:13,727] Trial 19 finished with value: 0.08771105314362827 and parameters: {'lr': 0.015334272953273588, 'num_epochs': 4, 'batch_size': 128, 'num_neighbors': 5, 'lr_decay': 0.874998531775642}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  16%|█▌        | 21/130 [10:14<52:54, 29.13s/it]

Train wi info: {'gini': np.float64(0.633258444433714), 'ess': np.float64(1297.5839261239487), 'max_wi': np.float64(65.24080176208601), 'min_wi': np.float64(0.0012702980304176983)}
actual reward: [0.08665622]
{'gini': np.float64(0.7570781506736145), 'ess': np.float64(229.2640215353896), 'max_wi': np.float64(510.523493456957), 'min_wi': np.float64(0.0014274851401665802)}
Cross-validated error: 0.007763680224249441
[I 2025-10-22 19:35:44,061] Trial 20 finished with value: 0.0803332988058531 and parameters: {'lr': 0.0009369834889080311, 'num_epochs': 8, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.8013355099128316}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  17%|█▋        | 22/130 [10:41<51:06, 28.39s/it]

Train wi info: {'gini': np.float64(0.9994865663703955), 'ess': np.float64(5.253819403175266), 'max_wi': np.float64(650.0701462037279), 'min_wi': np.float64(8.606155233557306e-43)}
actual reward: [0.0795977]
{'gini': np.float64(0.9985618410155064), 'ess': np.float64(13.308465559331546), 'max_wi': np.float64(2565.0891297742687), 'min_wi': np.float64(8.606155233557306e-43)}
Cross-validated error: 0.0019098947160422464
[I 2025-10-22 19:36:10,742] Trial 21 finished with value: 0.041762140505416594 and parameters: {'lr': 0.047631374370212, 'num_epochs': 1, 'batch_size': 128, 'num_neighbors': 5, 'lr_decay': 0.8366915605767126}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  18%|█▊        | 23/130 [11:08<50:03, 28.07s/it]

Train wi info: {'gini': np.float64(0.9849474431453586), 'ess': np.float64(139.23795363405267), 'max_wi': np.float64(118.98506854811993), 'min_wi': np.float64(3.6548972037090236e-11)}
actual reward: [0.0892136]
{'gini': np.float64(0.9780278049777394), 'ess': np.float64(205.81730293378646), 'max_wi': np.float64(144.88465945023222), 'min_wi': np.float64(3.6548972037090236e-11)}
Cross-validated error: 0.006492126462716295
[I 2025-10-22 19:36:38,054] Trial 22 finished with value: 0.07408372712874418 and parameters: {'lr': 0.010629418866352214, 'num_epochs': 1, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.8500623638469073}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  18%|█▊        | 24/130 [11:35<49:03, 27.77s/it]

Train wi info: {'gini': np.float64(0.9996829555792236), 'ess': np.float64(3.4102821443816635), 'max_wi': np.float64(1834.539594300567), 'min_wi': np.float64(0.0)}
actual reward: [0.08255541]
{'gini': np.float64(0.998803240894307), 'ess': np.float64(12.631684331723022), 'max_wi': np.float64(1443.140627019243), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.008061663937121204
[I 2025-10-22 19:37:05,127] Trial 23 finished with value: 0.08168678242662766 and parameters: {'lr': 0.0583788367517951, 'num_epochs': 2, 'batch_size': 128, 'num_neighbors': 6, 'lr_decay': 0.8274966530286337}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  19%|█▉        | 25/130 [12:02<48:21, 27.63s/it]

Train wi info: {'gini': np.float64(0.983795484727462), 'ess': np.float64(165.654811049106), 'max_wi': np.float64(101.48750461045229), 'min_wi': np.float64(7.319259183238384e-13)}
actual reward: [0.08475853]
{'gini': np.float64(0.9787305391545067), 'ess': np.float64(209.50114091838725), 'max_wi': np.float64(207.5361091027889), 'min_wi': np.float64(1.8481390825609737e-12)}
Cross-validated error: 0.007595039023258631
[I 2025-10-22 19:37:32,428] Trial 24 finished with value: 0.07949294959667873 and parameters: {'lr': 0.005221703784944987, 'num_epochs': 3, 'batch_size': 128, 'num_neighbors': 10, 'lr_decay': 0.8857149778307266}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  20%|██        | 26/130 [12:31<48:20, 27.89s/it]

Train wi info: {'gini': np.float64(0.9228354406043626), 'ess': np.float64(149.79236849056886), 'max_wi': np.float64(130.58244809679636), 'min_wi': np.float64(1.3207278195148365e-06)}
actual reward: [0.08772829]
{'gini': np.float64(0.9547898817023207), 'ess': np.float64(90.44046445717207), 'max_wi': np.float64(476.4440041057234), 'min_wi': np.float64(1.7990594435137882e-06)}
Cross-validated error: 0.00859975653877389
[I 2025-10-22 19:38:00,918] Trial 25 finished with value: 0.084117789269057 and parameters: {'lr': 0.001411198863776249, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.8169147954660473}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  21%|██        | 27/130 [13:01<48:48, 28.43s/it]

Train wi info: {'gini': np.float64(0.9997932755603903), 'ess': np.float64(2.2611127052970295), 'max_wi': np.float64(1790.3482541623032), 'min_wi': np.float64(3.9425866744119574e-36)}
actual reward: [0.07820558]
{'gini': np.float64(0.9995152871035758), 'ess': np.float64(5.069261926522589), 'max_wi': np.float64(4605.340038243471), 'min_wi': np.float64(3.9425866744119574e-36)}
Cross-validated error: 0.004991780319608895
[I 2025-10-22 19:38:30,629] Trial 26 finished with value: 0.06566958306189728 and parameters: {'lr': 0.015724585451800454, 'num_epochs': 9, 'batch_size': 128, 'num_neighbors': 8, 'lr_decay': 0.9194094071784392}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  22%|██▏       | 28/130 [13:29<48:19, 28.43s/it]

Train wi info: {'gini': np.float64(0.9788572356033243), 'ess': np.float64(194.9115277892955), 'max_wi': np.float64(105.33171270904796), 'min_wi': np.float64(6.888425933084372e-14)}
actual reward: [0.08640991]
{'gini': np.float64(0.9667139752094631), 'ess': np.float64(328.7055109180435), 'max_wi': np.float64(106.72795615708279), 'min_wi': np.float64(1.227012391790804e-13)}
Cross-validated error: 0.009560492232529319
[I 2025-10-22 19:38:59,048] Trial 27 finished with value: 0.08818893220911726 and parameters: {'lr': 0.00634111880122032, 'num_epochs': 6, 'batch_size': 256, 'num_neighbors': 6, 'lr_decay': 0.8464927260876766}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  22%|██▏       | 29/130 [13:57<47:25, 28.17s/it]

Train wi info: {'gini': np.float64(0.9992533338089788), 'ess': np.float64(8.19793506318637), 'max_wi': np.float64(1285.3647195390197), 'min_wi': np.float64(0.0)}
actual reward: [0.07729643]
{'gini': np.float64(0.9990950167133003), 'ess': np.float64(9.948583216120175), 'max_wi': np.float64(2007.0073415947902), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004087808391151147
[I 2025-10-22 19:39:26,617] Trial 28 finished with value: 0.05983029879723827 and parameters: {'lr': 0.06070988797718032, 'num_epochs': 2, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.8611893174732134}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  23%|██▎       | 30/130 [14:25<47:00, 28.20s/it]

Train wi info: {'gini': np.float64(0.09714505205508002), 'ess': np.float64(9678.065890052403), 'max_wi': np.float64(2.602718860186639), 'min_wi': np.float64(0.5205781778189743)}
actual reward: [0.08620561]
{'gini': np.float64(0.11265646076420144), 'ess': np.float64(9404.498872272943), 'max_wi': np.float64(7.310218558043534), 'min_wi': np.float64(0.5205781778189743)}
Cross-validated error: 0.007006375841574209
[I 2025-10-22 19:39:54,893] Trial 29 finished with value: 0.07670561104796478 and parameters: {'lr': 0.0003371503226286603, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 15, 'lr_decay': 0.8859866010112527}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  24%|██▍       | 31/130 [14:52<46:12, 28.01s/it]

Train wi info: {'gini': np.float64(0.999899959650073), 'ess': np.float64(1.0002774662904188), 'max_wi': np.float64(52.68025959915734), 'min_wi': np.float64(3.098743740325863e-42)}
actual reward: [0.07887239]
{'gini': np.float64(0.999774285522977), 'ess': np.float64(2.3717787159616743), 'max_wi': np.float64(2481.478894100597), 'min_wi': np.float64(3.098743740325863e-42)}
Cross-validated error: 0.013884307833198637
[I 2025-10-22 19:40:22,437] Trial 30 finished with value: 0.10388896820647626 and parameters: {'lr': 0.03581700718714128, 'num_epochs': 3, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.8339241950398497}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  25%|██▍       | 32/130 [15:20<45:23, 27.79s/it]

Train wi info: {'gini': np.float64(0.9984408448576988), 'ess': np.float64(16.889626644571138), 'max_wi': np.float64(620.5998575197523), 'min_wi': np.float64(4.818179296666422e-42)}
actual reward: [0.0692893]
{'gini': np.float64(0.9985668157297201), 'ess': np.float64(13.371012641859274), 'max_wi': np.float64(2155.6958405477), 'min_wi': np.float64(3.0195172171243273e-42)}
Cross-validated error: 0.00901846093818111
[I 2025-10-22 19:40:49,725] Trial 31 finished with value: 0.08600720115968469 and parameters: {'lr': 0.03419360654358727, 'num_epochs': 3, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.8362421860872884}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  25%|██▌       | 33/130 [15:46<44:24, 27.47s/it]

Train wi info: {'gini': np.float64(0.9993441818847844), 'ess': np.float64(6.650007143862649), 'max_wi': np.float64(1456.9714745103097), 'min_wi': np.float64(0.0)}
actual reward: [0.07989675]
{'gini': np.float64(0.9991849212773412), 'ess': np.float64(9.010836926769048), 'max_wi': np.float64(673.749030467124), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004414060885299662
[I 2025-10-22 19:41:16,446] Trial 32 finished with value: 0.06190770462135746 and parameters: {'lr': 0.048851102510398384, 'num_epochs': 1, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.8276353220050989}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  26%|██▌       | 34/130 [16:14<44:01, 27.51s/it]

Train wi info: {'gini': np.float64(0.9997349156442843), 'ess': np.float64(2.1373746102782047), 'max_wi': np.float64(79.15658261277964), 'min_wi': np.float64(3.4281306220885774e-30)}
actual reward: [0.08309701]
{'gini': np.float64(0.999573714190393), 'ess': np.float64(3.928524493991329), 'max_wi': np.float64(1971.0133793966645), 'min_wi': np.float64(3.4281306220885774e-30)}
Cross-validated error: 0.01068252034531123
[I 2025-10-22 19:41:44,057] Trial 33 finished with value: 0.09255897142816506 and parameters: {'lr': 0.01903128139321826, 'num_epochs': 3, 'batch_size': 128, 'num_neighbors': 6, 'lr_decay': 0.8110368154302406}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  27%|██▋       | 35/130 [16:41<43:23, 27.40s/it]

Train wi info: {'gini': np.float64(0.9998977694424162), 'ess': np.float64(1.014544846794373), 'max_wi': np.float64(846.4032436952374), 'min_wi': np.float64(0.0)}
actual reward: [0.07982487]
{'gini': np.float64(0.9998298240831242), 'ess': np.float64(1.8327696566278944), 'max_wi': np.float64(3016.72066920323), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.010313507605704902
[I 2025-10-22 19:42:11,199] Trial 34 finished with value: 0.09107970773890829 and parameters: {'lr': 0.06917031150552432, 'num_epochs': 2, 'batch_size': 256, 'num_neighbors': 3, 'lr_decay': 0.8247074000774953}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  28%|██▊       | 36/130 [17:09<43:04, 27.49s/it]

Train wi info: {'gini': np.float64(0.9998297982123214), 'ess': np.float64(1.8371895515343986), 'max_wi': np.float64(72.86193459176296), 'min_wi': np.float64(1.6181443467151095e-37)}
actual reward: [0.08227707]
{'gini': np.float64(0.9996679033238045), 'ess': np.float64(2.9912767631876016), 'max_wi': np.float64(2948.50571153307), 'min_wi': np.float64(1.6181443467151095e-37)}
Cross-validated error: 0.0069714780463738055
[I 2025-10-22 19:42:38,914] Trial 35 finished with value: 0.07641271222051217 and parameters: {'lr': 0.032687500723870264, 'num_epochs': 3, 'batch_size': 128, 'num_neighbors': 6, 'lr_decay': 0.8413262573401156}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  28%|██▊       | 37/130 [17:37<42:47, 27.61s/it]

Train wi info: {'gini': np.float64(0.9908306645275659), 'ess': np.float64(92.81048277837276), 'max_wi': np.float64(173.34605143174213), 'min_wi': np.float64(2.3450984281035663e-22)}
actual reward: [0.08351167]
{'gini': np.float64(0.9889890388581583), 'ess': np.float64(105.58700055156736), 'max_wi': np.float64(253.876386604732), 'min_wi': np.float64(1.496557085226705e-21)}
Cross-validated error: 0.009287400323347014
[I 2025-10-22 19:43:06,793] Trial 36 finished with value: 0.08705639162446181 and parameters: {'lr': 0.01335474248249655, 'num_epochs': 4, 'batch_size': 256, 'num_neighbors': 10, 'lr_decay': 0.8113820363658812}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  29%|██▉       | 38/130 [18:03<41:53, 27.32s/it]

Train wi info: {'gini': np.float64(0.9831964505808898), 'ess': np.float64(123.31611205486364), 'max_wi': np.float64(239.43982691054828), 'min_wi': np.float64(1.7351966813748207e-24)}
actual reward: [0.08672223]
{'gini': np.float64(0.9767409061439016), 'ess': np.float64(169.28570990530298), 'max_wi': np.float64(230.89012246328193), 'min_wi': np.float64(2.445631611591707e-24)}
Cross-validated error: 0.005800604360935621
[I 2025-10-22 19:43:33,452] Trial 37 finished with value: 0.07036621595289103 and parameters: {'lr': 0.07378225606242049, 'num_epochs': 1, 'batch_size': 512, 'num_neighbors': 7, 'lr_decay': 0.8635299072294464}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  30%|███       | 39/130 [18:34<42:45, 28.19s/it]

Train wi info: {'gini': np.float64(0.9993190271833785), 'ess': np.float64(7.30314653467283), 'max_wi': np.float64(3331.15891077032), 'min_wi': np.float64(0.0)}
actual reward: [0.08596835]
{'gini': np.float64(0.9994975642157325), 'ess': np.float64(5.205469477988927), 'max_wi': np.float64(4144.005883522078), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.002983566490968875
[I 2025-10-22 19:44:03,661] Trial 38 finished with value: 0.05155465397076364 and parameters: {'lr': 0.04075440115423081, 'num_epochs': 8, 'batch_size': 64, 'num_neighbors': 5, 'lr_decay': 0.8330536535054988}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  31%|███       | 40/130 [19:02<42:14, 28.16s/it]

Train wi info: {'gini': np.float64(0.9997597113715012), 'ess': np.float64(2.2019119814265538), 'max_wi': np.float64(1798.5127768656896), 'min_wi': np.float64(1.784216133361127e-29)}
actual reward: [0.08067488]
{'gini': np.float64(0.999370856201846), 'ess': np.float64(6.681768052363867), 'max_wi': np.float64(2087.6944752282893), 'min_wi': np.float64(6.40377645292421e-29)}
Cross-validated error: 0.00509813134012527
[I 2025-10-22 19:44:31,763] Trial 39 finished with value: 0.06630969427854207 and parameters: {'lr': 0.021205467974733525, 'num_epochs': 2, 'batch_size': 128, 'num_neighbors': 8, 'lr_decay': 0.887066479315707}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  32%|███▏      | 41/130 [19:31<42:09, 28.42s/it]

Train wi info: {'gini': np.float64(0.999768231463194), 'ess': np.float64(2.45801878772299), 'max_wi': np.float64(312.08050553870123), 'min_wi': np.float64(0.0)}
actual reward: [0.07713258]
{'gini': np.float64(0.9995040853192508), 'ess': np.float64(5.40374313099339), 'max_wi': np.float64(3681.204551267216), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0037490499411232292
[I 2025-10-22 19:45:00,786] Trial 40 finished with value: 0.057411275634101575 and parameters: {'lr': 0.09620116634568182, 'num_epochs': 5, 'batch_size': 512, 'num_neighbors': 3, 'lr_decay': 0.9194862213637561}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  32%|███▏      | 42/130 [20:00<41:55, 28.59s/it]

Train wi info: {'gini': np.float64(0.9993371332774342), 'ess': np.float64(7.32833454197056), 'max_wi': np.float64(2206.060975600709), 'min_wi': np.float64(1.6775689002546805e-39)}
actual reward: [0.07489336]
{'gini': np.float64(0.9992044814318746), 'ess': np.float64(8.836170656007713), 'max_wi': np.float64(1407.5788247261537), 'min_wi': np.float64(3.3029479366621355e-39)}
Cross-validated error: 0.006149671016732529
[I 2025-10-22 19:45:29,766] Trial 41 finished with value: 0.07222010936188245 and parameters: {'lr': 0.026450679211348763, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.8695840742612658}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  33%|███▎      | 43/130 [20:30<42:09, 29.08s/it]

Train wi info: {'gini': np.float64(0.9995331154216011), 'ess': np.float64(5.051845757051079), 'max_wi': np.float64(798.7214938584954), 'min_wi': np.float64(0.0)}
actual reward: [0.07585534]
{'gini': np.float64(0.9987852683539641), 'ess': np.float64(13.358349922174394), 'max_wi': np.float64(1468.6401255608382), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.008869963092751546
[I 2025-10-22 19:45:59,980] Trial 42 finished with value: 0.08537999056338613 and parameters: {'lr': 0.050345090257295236, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 11, 'lr_decay': 0.8528730156851958}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  34%|███▍      | 44/130 [21:00<42:11, 29.44s/it]

Train wi info: {'gini': np.float64(0.9996568966337634), 'ess': np.float64(3.315546944257059), 'max_wi': np.float64(1776.7593909279735), 'min_wi': np.float64(4.320380701753405e-24)}
actual reward: [0.084796]
{'gini': np.float64(0.9993870941349374), 'ess': np.float64(6.158886012387804), 'max_wi': np.float64(3594.940216712588), 'min_wi': np.float64(6.090360621176172e-24)}
Cross-validated error: 0.005222065690879785
[I 2025-10-22 19:46:30,259] Trial 43 finished with value: 0.06705714004088921 and parameters: {'lr': 0.00802741045190781, 'num_epochs': 7, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.8443914955118337}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  35%|███▍      | 45/130 [21:30<41:52, 29.56s/it]

Train wi info: {'gini': np.float64(0.9995207691525964), 'ess': np.float64(5.117021151356233), 'max_wi': np.float64(3265.2079663172003), 'min_wi': np.float64(0.0)}
actual reward: [0.07814761]
{'gini': np.float64(0.9997324016670102), 'ess': np.float64(2.7680901384405376), 'max_wi': np.float64(4574.367853940445), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004820771534617209
[I 2025-10-22 19:47:00,091] Trial 44 finished with value: 0.06455335917338316 and parameters: {'lr': 0.026526395921031103, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 10, 'lr_decay': 0.8683760282835177}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  35%|███▌      | 46/130 [22:00<41:34, 29.69s/it]

Train wi info: {'gini': np.float64(0.9992847244525302), 'ess': np.float64(7.0185602013089285), 'max_wi': np.float64(3288.1667109898035), 'min_wi': np.float64(0.0)}
actual reward: [0.07365948]
{'gini': np.float64(0.9990256591589726), 'ess': np.float64(10.784731776195365), 'max_wi': np.float64(1277.6374807105783), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0027335271728638625
[I 2025-10-22 19:47:30,106] Trial 45 finished with value: 0.04956537613118054 and parameters: {'lr': 0.07928387091243126, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.8580754329591711}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  36%|███▌      | 47/130 [22:30<41:03, 29.68s/it]

Train wi info: {'gini': np.float64(0.9995171356502386), 'ess': np.float64(4.773375678701112), 'max_wi': np.float64(1236.1331364535122), 'min_wi': np.float64(3.8915760855704455e-32)}
actual reward: [0.08166156]
{'gini': np.float64(0.9990387114603575), 'ess': np.float64(9.262992975370674), 'max_wi': np.float64(2138.1934841084344), 'min_wi': np.float64(3.8915760855704455e-32)}
Cross-validated error: 0.02380321456919314
[I 2025-10-22 19:47:59,761] Trial 46 finished with value: 0.13047097017677972 and parameters: {'lr': 0.012968322354597064, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 12, 'lr_decay': 0.880321891155562}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  37%|███▋      | 48/130 [22:58<40:01, 29.29s/it]

Train wi info: {'gini': np.float64(0.9399564550425046), 'ess': np.float64(578.4810215035944), 'max_wi': np.float64(39.01669903989691), 'min_wi': np.float64(1.7566370862239506e-09)}
actual reward: [0.08445865]
{'gini': np.float64(0.9203498909313961), 'ess': np.float64(788.0295735959261), 'max_wi': np.float64(39.68274674926736), 'min_wi': np.float64(2.2203075319302013e-09)}
Cross-validated error: 0.008638284196255558
[I 2025-10-22 19:48:28,140] Trial 47 finished with value: 0.08431830866156666 and parameters: {'lr': 0.011237544621660794, 'num_epochs': 4, 'batch_size': 512, 'num_neighbors': 13, 'lr_decay': 0.8125725699873486}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  38%|███▊      | 49/130 [23:26<39:05, 28.95s/it]

Train wi info: {'gini': np.float64(0.9714993373326831), 'ess': np.float64(261.52004763784737), 'max_wi': np.float64(94.57400553002086), 'min_wi': np.float64(6.76888601631595e-12)}
actual reward: [0.08144373]
{'gini': np.float64(0.9638770757785834), 'ess': np.float64(330.73416830973514), 'max_wi': np.float64(94.23333566841522), 'min_wi': np.float64(1.4562574761282693e-11)}
Cross-validated error: 0.007691469366048453
[I 2025-10-22 19:48:56,303] Trial 48 finished with value: 0.0799830850423486 and parameters: {'lr': 0.005678567075686701, 'num_epochs': 4, 'batch_size': 256, 'num_neighbors': 12, 'lr_decay': 0.9505533797543788}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  38%|███▊      | 50/130 [23:54<38:06, 28.59s/it]

Train wi info: {'gini': np.float64(0.9958576410890108), 'ess': np.float64(22.137924049100334), 'max_wi': np.float64(478.14691539357716), 'min_wi': np.float64(3.994118582952461e-09)}
actual reward: [0.08826244]
{'gini': np.float64(0.9922723969729448), 'ess': np.float64(61.10467338403041), 'max_wi': np.float64(598.5342487348823), 'min_wi': np.float64(3.994118582952461e-09)}
Cross-validated error: 0.008673619659082556
[I 2025-10-22 19:49:24,032] Trial 49 finished with value: 0.08445728478183513 and parameters: {'lr': 0.0029319087762291218, 'num_epochs': 2, 'batch_size': 64, 'num_neighbors': 14, 'lr_decay': 0.8922210574247631}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  39%|███▉      | 51/130 [24:22<37:18, 28.34s/it]

Train wi info: {'gini': np.float64(0.9998149181863518), 'ess': np.float64(1.9756333456232062), 'max_wi': np.float64(1792.3169981694587), 'min_wi': np.float64(7.976390349431436e-31)}
actual reward: [0.08462309]
{'gini': np.float64(0.9993046836630443), 'ess': np.float64(6.769532468119693), 'max_wi': np.float64(2125.3577000722944), 'min_wi': np.float64(7.976390349431436e-31)}
Cross-validated error: 0.009372768965421758
[I 2025-10-22 19:49:51,789] Trial 50 finished with value: 0.08744800975417323 and parameters: {'lr': 0.018059298265789357, 'num_epochs': 3, 'batch_size': 128, 'num_neighbors': 12, 'lr_decay': 0.802492852428843}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  40%|████      | 52/130 [24:50<36:52, 28.36s/it]

Train wi info: {'gini': np.float64(0.9992931680840864), 'ess': np.float64(6.100726547645515), 'max_wi': np.float64(3233.2571113616186), 'min_wi': np.float64(3.4364688931239753e-41)}
actual reward: [0.07530821]
{'gini': np.float64(0.9991333974903969), 'ess': np.float64(9.410027384401248), 'max_wi': np.float64(660.0295533591985), 'min_wi': np.float64(3.4364688931239753e-41)}
Cross-validated error: 0.004552021020493923
[I 2025-10-22 19:50:20,203] Trial 51 finished with value: 0.06286982337235188 and parameters: {'lr': 0.02864130405971994, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.881575357974743}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  41%|████      | 53/130 [25:19<36:35, 28.51s/it]

Train wi info: {'gini': np.float64(0.03260221545538308), 'ess': np.float64(9964.74535001487), 'max_wi': np.float64(1.3770917120576642), 'min_wi': np.float64(0.8316219153704474)}
actual reward: [0.08614312]
{'gini': np.float64(0.03746371214034085), 'ess': np.float64(9942.845741427349), 'max_wi': np.float64(2.248001953747847), 'min_wi': np.float64(0.8389717964121541)}
Cross-validated error: 0.0069485430519727635
[I 2025-10-22 19:50:49,063] Trial 52 finished with value: 0.07639065120054389 and parameters: {'lr': 0.00010983871854693082, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 10, 'lr_decay': 0.8741558105497914}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  42%|████▏     | 54/130 [25:47<36:00, 28.43s/it]

Train wi info: {'gini': np.float64(0.9989115116809837), 'ess': np.float64(11.69422834473009), 'max_wi': np.float64(1103.4377618226636), 'min_wi': np.float64(0.0)}
actual reward: [0.0737998]
{'gini': np.float64(0.9988628882844331), 'ess': np.float64(12.603430910980844), 'max_wi': np.float64(795.4791639443399), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.014703176052109744
[I 2025-10-22 19:51:17,315] Trial 53 finished with value: 0.10642587168302531 and parameters: {'lr': 0.03896991411300133, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 7, 'lr_decay': 0.8997858989315274}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  42%|████▏     | 55/130 [26:15<35:17, 28.23s/it]

Train wi info: {'gini': np.float64(0.9991708921018617), 'ess': np.float64(9.238713503210496), 'max_wi': np.float64(1526.7229279073283), 'min_wi': np.float64(0.0)}
actual reward: [0.07366493]
{'gini': np.float64(0.9995231549625798), 'ess': np.float64(5.10797201966885), 'max_wi': np.float64(2471.34100175749), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.003654478750694346
[I 2025-10-22 19:51:45,085] Trial 54 finished with value: 0.0567546913725535 and parameters: {'lr': 0.04077916929601258, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 7, 'lr_decay': 0.9162481180143767}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  43%|████▎     | 56/130 [26:45<35:20, 28.66s/it]

Train wi info: {'gini': np.float64(0.9998681820197334), 'ess': np.float64(1.2573707291070155), 'max_wi': np.float64(8056.690339310283), 'min_wi': np.float64(0.0)}
actual reward: [0.07092759]
{'gini': np.float64(0.9995596414418835), 'ess': np.float64(4.355862114588272), 'max_wi': np.float64(7245.257107434461), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.008463580714168964
[I 2025-10-22 19:52:14,729] Trial 55 finished with value: 0.0834896632627098 and parameters: {'lr': 0.05962823886637274, 'num_epochs': 7, 'batch_size': 64, 'num_neighbors': 5, 'lr_decay': 0.9056996942402008}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  44%|████▍     | 57/130 [27:14<34:56, 28.72s/it]

Train wi info: {'gini': np.float64(0.9994480071346798), 'ess': np.float64(6.099674339025308), 'max_wi': np.float64(1074.2730069291667), 'min_wi': np.float64(0.0)}
actual reward: [0.07412725]
{'gini': np.float64(0.9990651001603632), 'ess': np.float64(10.366137494610037), 'max_wi': np.float64(1523.8576432164225), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.02284695462128169
[I 2025-10-22 19:52:43,590] Trial 56 finished with value: 0.12819146116068778 and parameters: {'lr': 0.08362339350096963, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.9305648494047466}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  45%|████▍     | 58/130 [27:43<34:42, 28.92s/it]

Train wi info: {'gini': np.float64(0.9996974720812138), 'ess': np.float64(2.6232626813936273), 'max_wi': np.float64(1776.7304070303326), 'min_wi': np.float64(0.0)}
actual reward: [0.07771811]
{'gini': np.float64(0.999263130225086), 'ess': np.float64(4.848919229584874), 'max_wi': np.float64(2822.5739076189207), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.006335121532847111
[I 2025-10-22 19:53:12,988] Trial 57 finished with value: 0.07316179156958352 and parameters: {'lr': 0.09883428236595576, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.9666815874802903}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  45%|████▌     | 59/130 [28:11<33:59, 28.73s/it]

Train wi info: {'gini': np.float64(0.9997453750135513), 'ess': np.float64(2.7343136960102794), 'max_wi': np.float64(2272.768980697348), 'min_wi': np.float64(0.0)}
actual reward: [0.06808191]
{'gini': np.float64(0.9997541857598908), 'ess': np.float64(2.3727142754980464), 'max_wi': np.float64(3501.2712125636963), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.012768658175492756
[I 2025-10-22 19:53:41,260] Trial 58 finished with value: 0.10022925041182393 and parameters: {'lr': 0.07737828515984298, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 11, 'lr_decay': 0.9414346142843403}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  46%|████▌     | 60/130 [28:39<33:15, 28.51s/it]

Train wi info: {'gini': np.float64(0.9997453750135514), 'ess': np.float64(2.7343136960103047), 'max_wi': np.float64(2272.768980697348), 'min_wi': np.float64(0.0)}
actual reward: [0.06780192]
{'gini': np.float64(0.999754185788276), 'ess': np.float64(2.3727140606459094), 'max_wi': np.float64(3501.2712125636963), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.01188644447582928
[I 2025-10-22 19:54:09,266] Trial 59 finished with value: 0.09711853698805145 and parameters: {'lr': 0.042862099893925115, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 7, 'lr_decay': 0.8968832760084218}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  47%|████▋     | 61/130 [29:08<32:45, 28.49s/it]

Train wi info: {'gini': np.float64(0.9992339621040617), 'ess': np.float64(8.415468336535383), 'max_wi': np.float64(956.2721611616859), 'min_wi': np.float64(0.0)}
actual reward: [0.07568327]
{'gini': np.float64(0.9983172225046979), 'ess': np.float64(18.21517417514163), 'max_wi': np.float64(1478.9100979216043), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004357700689067
[I 2025-10-22 19:54:37,691] Trial 60 finished with value: 0.06161565753170748 and parameters: {'lr': 0.053574019056722745, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 14, 'lr_decay': 0.9122357086606913}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  48%|████▊     | 62/130 [29:35<31:52, 28.13s/it]

Train wi info: {'gini': np.float64(0.9997913429226156), 'ess': np.float64(2.1337667547954986), 'max_wi': np.float64(1464.4294008710146), 'min_wi': np.float64(1.7967959688524816e-24)}
actual reward: [0.08352792]
{'gini': np.float64(0.9990397762895744), 'ess': np.float64(10.153103764671428), 'max_wi': np.float64(779.3798817855658), 'min_wi': np.float64(1.6788684679760316e-23)}
Cross-validated error: 0.004294444028343922
[I 2025-10-22 19:55:04,995] Trial 61 finished with value: 0.06125090434829358 and parameters: {'lr': 0.008055482664864033, 'num_epochs': 4, 'batch_size': 128, 'num_neighbors': 5, 'lr_decay': 0.9293032647973626}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  48%|████▊     | 63/130 [30:04<31:35, 28.30s/it]

Train wi info: {'gini': np.float64(0.9994077470524222), 'ess': np.float64(5.711193926919189), 'max_wi': np.float64(3605.7115869624954), 'min_wi': np.float64(0.0)}
actual reward: [0.07474804]
{'gini': np.float64(0.9989917312874339), 'ess': np.float64(11.106596373274485), 'max_wi': np.float64(1069.7392836715808), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.01944246600588576
[I 2025-10-22 19:55:33,683] Trial 62 finished with value: 0.11994213509793647 and parameters: {'lr': 0.07728870409123675, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.9950287525535533}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  49%|████▉     | 64/130 [30:34<31:54, 29.00s/it]

Train wi info: {'gini': np.float64(0.9995185443095411), 'ess': np.float64(5.321759084288324), 'max_wi': np.float64(1009.9466304938172), 'min_wi': np.float64(0.0)}
actual reward: [0.07852362]
{'gini': np.float64(0.9995924474949419), 'ess': np.float64(4.291465692709426), 'max_wi': np.float64(4605.346626244665), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.006549880839035092
[I 2025-10-22 19:56:04,326] Trial 63 finished with value: 0.07430974790625128 and parameters: {'lr': 0.07948709099405124, 'num_epochs': 7, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.9804492356542198}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  50%|█████     | 65/130 [31:03<31:29, 29.06s/it]

Train wi info: {'gini': np.float64(0.9999000000000002), 'ess': np.float64(1.0000000000001468), 'max_wi': np.float64(727.9042243452496), 'min_wi': np.float64(0.0)}
actual reward: [0.07342169]
{'gini': np.float64(0.9998994883047367), 'ess': np.float64(1.0044631916830222), 'max_wi': np.float64(3015.7629848969104), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.00950398296645948
[I 2025-10-22 19:56:33,539] Trial 64 finished with value: 0.08800882022319192 and parameters: {'lr': 0.06031073616646584, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.990227891917967}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  51%|█████     | 66/130 [31:34<31:20, 29.38s/it]

Train wi info: {'gini': np.float64(0.9997664190317563), 'ess': np.float64(2.5664341115398344), 'max_wi': np.float64(2061.660492154221), 'min_wi': np.float64(0.0)}
actual reward: [0.08542251]
{'gini': np.float64(0.9991729572426503), 'ess': np.float64(7.279836616606336), 'max_wi': np.float64(3264.1010872570573), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0092307691386607
[I 2025-10-22 19:57:03,649] Trial 65 finished with value: 0.08680092400093883 and parameters: {'lr': 0.03579662658237353, 'num_epochs': 8, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.9295061448194658}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  52%|█████▏    | 67/130 [32:03<30:48, 29.35s/it]

Train wi info: {'gini': np.float64(0.9998071361706259), 'ess': np.float64(2.0640709316491472), 'max_wi': np.float64(5021.124747795004), 'min_wi': np.float64(0.0)}
actual reward: [0.07755895]
{'gini': np.float64(0.999504745647802), 'ess': np.float64(5.436122298348391), 'max_wi': np.float64(712.8785855477456), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0025751587531023363
[I 2025-10-22 19:57:32,924] Trial 66 finished with value: 0.048059568471949164 and parameters: {'lr': 0.0988771656236644, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 7, 'lr_decay': 0.953757605650273}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  52%|█████▏    | 68/130 [32:32<30:11, 29.21s/it]

Train wi info: {'gini': np.float64(0.9999), 'ess': np.float64(1.0000000000000466), 'max_wi': np.float64(1226.6237706565523), 'min_wi': np.float64(0.0)}
actual reward: [0.077729]
{'gini': np.float64(0.9994992417995753), 'ess': np.float64(4.669569895074242), 'max_wi': np.float64(8309.841474671257), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.006807958885868689
[I 2025-10-22 19:58:01,818] Trial 67 finished with value: 0.07569074564540197 and parameters: {'lr': 0.07393697288267478, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 6, 'lr_decay': 0.9651095900216304}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  53%|█████▎    | 69/130 [32:59<29:10, 28.69s/it]

Train wi info: {'gini': np.float64(0.9998997554021005), 'ess': np.float64(1.0016850248962301), 'max_wi': np.float64(1286.6178025923468), 'min_wi': np.float64(3.288976727869711e-36)}
actual reward: [0.08210775]
{'gini': np.float64(0.9993059536931987), 'ess': np.float64(7.711682943932343), 'max_wi': np.float64(3040.9498673968005), 'min_wi': np.float64(3.288976727869711e-36)}
Cross-validated error: 0.006229281388940603
[I 2025-10-22 19:58:29,298] Trial 68 finished with value: 0.0726211184554639 and parameters: {'lr': 0.021870033213058325, 'num_epochs': 3, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.995815187807308}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  54%|█████▍    | 70/130 [33:29<28:55, 28.93s/it]

Train wi info: {'gini': np.float64(0.9992523861414185), 'ess': np.float64(8.32852361778665), 'max_wi': np.float64(1464.2496398920464), 'min_wi': np.float64(0.0)}
actual reward: [0.07459184]
{'gini': np.float64(0.9992216592287518), 'ess': np.float64(8.64960028478858), 'max_wi': np.float64(2051.2910867885917), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0017370945696914312
[I 2025-10-22 19:58:58,769] Trial 69 finished with value: 0.039834978082447986 and parameters: {'lr': 0.048041278236790545, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.8188075840142162}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  55%|█████▍    | 71/130 [33:58<28:27, 28.93s/it]

Train wi info: {'gini': np.float64(0.04734242578627221), 'ess': np.float64(9930.87911086456), 'max_wi': np.float64(1.3137038137044545), 'min_wi': np.float64(0.7519540206807444)}
actual reward: [0.08612721]
{'gini': np.float64(0.04962975818823484), 'ess': np.float64(9922.097946428261), 'max_wi': np.float64(1.608776103496318), 'min_wi': np.float64(0.7519540206807444)}
Cross-validated error: 0.006958752984446403
[I 2025-10-22 19:59:27,721] Trial 70 finished with value: 0.07646538575942094 and parameters: {'lr': 0.0004667995980193441, 'num_epochs': 9, 'batch_size': 512, 'num_neighbors': 9, 'lr_decay': 0.8318935088970927}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  55%|█████▌    | 72/130 [34:26<27:46, 28.73s/it]

Train wi info: {'gini': np.float64(0.9851768681273503), 'ess': np.float64(128.9663168735777), 'max_wi': np.float64(370.86816158172684), 'min_wi': np.float64(1.1727711367556582e-15)}
actual reward: [0.07882981]
{'gini': np.float64(0.9820796284474786), 'ess': np.float64(186.01290027525107), 'max_wi': np.float64(157.97293181791866), 'min_wi': np.float64(6.4868780767623024e-15)}
Cross-validated error: 0.009198757432415262
[I 2025-10-22 19:59:55,965] Trial 71 finished with value: 0.08658063045681276 and parameters: {'lr': 0.00438868468968211, 'num_epochs': 5, 'batch_size': 128, 'num_neighbors': 5, 'lr_decay': 0.8471888554401579}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  56%|█████▌    | 73/130 [34:54<27:09, 28.58s/it]

Train wi info: {'gini': np.float64(0.9996225821165562), 'ess': np.float64(4.088437065370961), 'max_wi': np.float64(2272.7115424401413), 'min_wi': np.float64(1.203672346438674e-32)}
actual reward: [0.06847864]
{'gini': np.float64(0.9997524782228829), 'ess': np.float64(2.3797063696425256), 'max_wi': np.float64(3499.8406287193848), 'min_wi': np.float64(1.203672346438674e-32)}
Cross-validated error: 0.011119254453306955
[I 2025-10-22 20:00:24,220] Trial 72 finished with value: 0.09419158057791634 and parameters: {'lr': 0.013671726520592753, 'num_epochs': 7, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.9047852495720031}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  57%|█████▋    | 74/130 [35:21<26:14, 28.12s/it]

Train wi info: {'gini': np.float64(0.9998080649555902), 'ess': np.float64(1.9870755556855402), 'max_wi': np.float64(476.3394904012501), 'min_wi': np.float64(0.0)}
actual reward: [0.07599091]
{'gini': np.float64(0.9996582584168974), 'ess': np.float64(3.4511943705711694), 'max_wi': np.float64(770.5265736735045), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.011125201301648945
[I 2025-10-22 20:00:51,255] Trial 73 finished with value: 0.09429927344947148 and parameters: {'lr': 0.06455019036382424, 'num_epochs': 4, 'batch_size': 256, 'num_neighbors': 6, 'lr_decay': 0.8788065371500018}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  58%|█████▊    | 75/130 [35:49<25:36, 27.94s/it]

Train wi info: {'gini': np.float64(0.9996707232588888), 'ess': np.float64(3.577668906292234), 'max_wi': np.float64(255.77166951399235), 'min_wi': np.float64(0.0)}
actual reward: [0.07397093]
{'gini': np.float64(0.9998750508386179), 'ess': np.float64(1.1665317810491684), 'max_wi': np.float64(3016.477921585218), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.01035092543661361
[I 2025-10-22 20:01:18,782] Trial 74 finished with value: 0.0911601177358071 and parameters: {'lr': 0.034623972270594064, 'num_epochs': 5, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.8242620112166423}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  58%|█████▊    | 76/130 [36:15<24:45, 27.51s/it]

Train wi info: {'gini': np.float64(0.9997057729281027), 'ess': np.float64(3.009740564049204), 'max_wi': np.float64(3222.9277304505317), 'min_wi': np.float64(0.0)}
actual reward: [0.06898605]
{'gini': np.float64(0.9994528549307512), 'ess': np.float64(5.476216704980599), 'max_wi': np.float64(6726.5432809561735), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.007825135504694403
[I 2025-10-22 20:01:45,269] Trial 75 finished with value: 0.08061976534869991 and parameters: {'lr': 0.08329627015996133, 'num_epochs': 1, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.8544340297872695}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  59%|█████▉    | 77/130 [36:43<24:21, 27.57s/it]

Train wi info: {'gini': np.float64(0.9937627242745767), 'ess': np.float64(34.29688684049847), 'max_wi': np.float64(173.9270019479785), 'min_wi': np.float64(1.1263916322928741e-10)}
actual reward: [0.08783091]
{'gini': np.float64(0.9944412434770481), 'ess': np.float64(26.539989593959607), 'max_wi': np.float64(1394.8791905225767), 'min_wi': np.float64(2.5298870660562244e-10)}
Cross-validated error: 0.008634246584659425
[I 2025-10-22 20:02:12,983] Trial 76 finished with value: 0.08425900542206977 and parameters: {'lr': 0.0020482965342314865, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 5, 'lr_decay': 0.8661408116588092}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  60%|██████    | 78/130 [37:10<23:42, 27.36s/it]

Train wi info: {'gini': np.float64(0.999690344380416), 'ess': np.float64(3.3871536687125197), 'max_wi': np.float64(2162.350908728218), 'min_wi': np.float64(0.0)}
actual reward: [0.08421694]
{'gini': np.float64(0.999549462588947), 'ess': np.float64(4.911039315033657), 'max_wi': np.float64(1837.0247568362788), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.012770486045346556
[I 2025-10-22 20:02:39,862] Trial 77 finished with value: 0.10014179242127012 and parameters: {'lr': 0.05486940310116366, 'num_epochs': 6, 'batch_size': 128, 'num_neighbors': 8, 'lr_decay': 0.8356861944415633}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  61%|██████    | 79/130 [37:37<23:19, 27.44s/it]

Train wi info: {'gini': np.float64(0.9996784602586305), 'ess': np.float64(3.490326413877644), 'max_wi': np.float64(2272.768980697348), 'min_wi': np.float64(0.0)}
actual reward: [0.06759666]
{'gini': np.float64(0.9997541857725205), 'ess': np.float64(2.372714181712416), 'max_wi': np.float64(3501.2712125636963), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.011963019250708265
[I 2025-10-22 20:03:07,473] Trial 78 finished with value: 0.09743868818864113 and parameters: {'lr': 0.03951412116469914, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 6, 'lr_decay': 0.840913049113507}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  62%|██████▏   | 80/130 [38:04<22:39, 27.20s/it]

Train wi info: {'gini': np.float64(0.9904828698557959), 'ess': np.float64(95.6978774772356), 'max_wi': np.float64(197.98048244125118), 'min_wi': np.float64(1.1132107194383043e-23)}
actual reward: [0.08330413]
{'gini': np.float64(0.9863483172253872), 'ess': np.float64(143.05694184115384), 'max_wi': np.float64(166.5604456804309), 'min_wi': np.float64(1.1132107194383043e-23)}
Cross-validated error: 0.006597203299551942
[I 2025-10-22 20:03:34,121] Trial 79 finished with value: 0.07460627330798458 and parameters: {'lr': 0.03157702196239562, 'num_epochs': 3, 'batch_size': 512, 'num_neighbors': 5, 'lr_decay': 0.8055391480688928}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  62%|██████▏   | 81/130 [38:32<22:30, 27.56s/it]

Train wi info: {'gini': np.float64(0.999638029922738), 'ess': np.float64(3.9522817763646683), 'max_wi': np.float64(565.214577887915), 'min_wi': np.float64(1.942214227695492e-38)}
actual reward: [0.07736343]
{'gini': np.float64(0.9992765337832467), 'ess': np.float64(6.474131915063271), 'max_wi': np.float64(7984.804999482179), 'min_wi': np.float64(1.0929223531796294e-37)}
Cross-validated error: 0.005398864314899121
[I 2025-10-22 20:04:02,510] Trial 80 finished with value: 0.06788978127157193 and parameters: {'lr': 0.023315982010029587, 'num_epochs': 7, 'batch_size': 256, 'num_neighbors': 4, 'lr_decay': 0.8995000180227457}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  63%|██████▎   | 82/130 [39:01<22:11, 27.75s/it]

Train wi info: {'gini': np.float64(0.9998402482980305), 'ess': np.float64(1.7211814318170673), 'max_wi': np.float64(872.7925302501958), 'min_wi': np.float64(0.0)}
actual reward: [0.08145661]
{'gini': np.float64(0.9994405240280074), 'ess': np.float64(5.476869439134833), 'max_wi': np.float64(4592.512376419779), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0045051667731354755
[I 2025-10-22 20:04:30,696] Trial 81 finished with value: 0.06258727874890141 and parameters: {'lr': 0.08159907239607861, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 11, 'lr_decay': 0.9390595889872245}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  64%|██████▍   | 83/130 [39:28<21:42, 27.70s/it]

Train wi info: {'gini': np.float64(0.9996516261337509), 'ess': np.float64(3.77619423577553), 'max_wi': np.float64(1818.0340960343947), 'min_wi': np.float64(0.0)}
actual reward: [0.07938177]
{'gini': np.float64(0.999510250662945), 'ess': np.float64(5.116047212653138), 'max_wi': np.float64(4605.346626244665), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.006067113702969118
[I 2025-10-22 20:04:58,299] Trial 82 finished with value: 0.07171697926020051 and parameters: {'lr': 0.06553966723720386, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 12, 'lr_decay': 0.9391088069250166}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  65%|██████▍   | 84/130 [39:55<21:07, 27.56s/it]

Train wi info: {'gini': np.float64(0.9996121696883152), 'ess': np.float64(4.218383105953505), 'max_wi': np.float64(4654.754204975497), 'min_wi': np.float64(0.0)}
actual reward: [0.07582226]
{'gini': np.float64(0.9997427192393317), 'ess': np.float64(2.797304818496254), 'max_wi': np.float64(1465.1734745961119), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.009774295965651013
[I 2025-10-22 20:05:25,542] Trial 83 finished with value: 0.08913835351045751 and parameters: {'lr': 0.08530362981270787, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 13, 'lr_decay': 0.9217511035578838}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  65%|██████▌   | 85/130 [40:24<20:50, 27.78s/it]

Train wi info: {'gini': np.float64(0.998961213913405), 'ess': np.float64(11.527775580565931), 'max_wi': np.float64(1468.8840516984178), 'min_wi': np.float64(0.0)}
actual reward: [0.0739851]
{'gini': np.float64(0.9989049727160282), 'ess': np.float64(12.080651203713561), 'max_wi': np.float64(1357.0155567004863), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.009783245252551248
[I 2025-10-22 20:05:53,840] Trial 84 finished with value: 0.0890548783910309 and parameters: {'lr': 0.06949238226497619, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 11, 'lr_decay': 0.9783517212280355}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  66%|██████▌   | 86/130 [40:53<20:40, 28.19s/it]

Train wi info: {'gini': np.float64(0.9996446717178671), 'ess': np.float64(3.886836886873247), 'max_wi': np.float64(4122.3729638007335), 'min_wi': np.float64(0.0)}
actual reward: [0.07904934]
{'gini': np.float64(0.9996156552829255), 'ess': np.float64(4.164527713224454), 'max_wi': np.float64(1042.2425082516224), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0026867856288003713
[I 2025-10-22 20:06:22,984] Trial 85 finished with value: 0.04911120431835747 and parameters: {'lr': 0.0472679798444869, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 11, 'lr_decay': 0.8923239390809926}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  67%|██████▋   | 87/130 [41:21<20:10, 28.15s/it]

Train wi info: {'gini': np.float64(0.9998438420609723), 'ess': np.float64(1.495762304546938), 'max_wi': np.float64(8056.690339310283), 'min_wi': np.float64(0.0)}
actual reward: [0.07347737]
{'gini': np.float64(0.9995676750457826), 'ess': np.float64(3.8485675298734536), 'max_wi': np.float64(7245.256243732509), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.009900422899872228
[I 2025-10-22 20:06:51,044] Trial 86 finished with value: 0.08955313579871427 and parameters: {'lr': 0.05656439832293826, 'num_epochs': 4, 'batch_size': 128, 'num_neighbors': 13, 'lr_decay': 0.9103035706575647}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 0. Best value: 0.155775:  68%|██████▊   | 88/130 [41:48<19:31, 27.89s/it]

Train wi info: {'gini': np.float64(0.9985438126669667), 'ess': np.float64(7.7632758773990895), 'max_wi': np.float64(1254.9380660873376), 'min_wi': np.float64(1.9930740971483718e-17)}
actual reward: [0.08613023]
{'gini': np.float64(0.9976385581437027), 'ess': np.float64(13.142714770943245), 'max_wi': np.float64(764.8551286626703), 'min_wi': np.float64(7.83196154274541e-17)}
Cross-validated error: 0.007869005148325817
[I 2025-10-22 20:07:18,333] Trial 87 finished with value: 0.08084700693562334 and parameters: {'lr': 0.00949100872430532, 'num_epochs': 1, 'batch_size': 64, 'num_neighbors': 12, 'lr_decay': 0.8574073629117762}. Best is trial 0 with value: 0.15577512422940984.


Best trial: 88. Best value: 0.19427:  68%|██████▊   | 89/130 [42:15<18:53, 27.66s/it]

Train wi info: {'gini': np.float64(0.9998657483061417), 'ess': np.float64(1.3081797383106377), 'max_wi': np.float64(1998.859522091196), 'min_wi': np.float64(7.513899190479272e-37)}
actual reward: [0.07574636]
{'gini': np.float64(0.9998717228011703), 'ess': np.float64(1.3206225236467792), 'max_wi': np.float64(6880.151170678275), 'min_wi': np.float64(1.1517919569661754e-37)}
Cross-validated error: 0.06984810285258232
[I 2025-10-22 20:07:45,433] Trial 88 finished with value: 0.19427010652923055 and parameters: {'lr': 0.01785408525307851, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.9458300903920962}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  69%|██████▉   | 90/130 [42:43<18:24, 27.61s/it]

Train wi info: {'gini': np.float64(0.9990354144343614), 'ess': np.float64(9.12050723728238), 'max_wi': np.float64(1812.703944964624), 'min_wi': np.float64(1.1291087789090486e-34)}
actual reward: [0.07837526]
{'gini': np.float64(0.9993122590512911), 'ess': np.float64(7.216599761360115), 'max_wi': np.float64(3150.088025551755), 'min_wi': np.float64(6.855152725250724e-35)}
Cross-validated error: 0.004438483766437001
[I 2025-10-22 20:08:12,936] Trial 89 finished with value: 0.06209661319980496 and parameters: {'lr': 0.016295044332301216, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.9280410926484507}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  70%|███████   | 91/130 [43:10<17:54, 27.55s/it]

Train wi info: {'gini': np.float64(0.9905483209415), 'ess': np.float64(96.48650040628338), 'max_wi': np.float64(268.2915040171787), 'min_wi': np.float64(1.379465108145596e-23)}
actual reward: [0.07643422]
{'gini': np.float64(0.9885693646727742), 'ess': np.float64(124.33958804621602), 'max_wi': np.float64(154.8835105207217), 'min_wi': np.float64(4.5602343390064215e-22)}
Cross-validated error: 0.006713511926212854
[I 2025-10-22 20:08:40,356] Trial 90 finished with value: 0.07503042923391932 and parameters: {'lr': 0.012208137505658611, 'num_epochs': 2, 'batch_size': 128, 'num_neighbors': 3, 'lr_decay': 0.9636542804044963}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  71%|███████   | 92/130 [43:38<17:25, 27.52s/it]

Train wi info: {'gini': np.float64(0.9991468108289909), 'ess': np.float64(8.348129129706482), 'max_wi': np.float64(2507.0222508464217), 'min_wi': np.float64(2.2042780960121493e-36)}
actual reward: [0.07793314]
{'gini': np.float64(0.9990913889709645), 'ess': np.float64(9.918962257487781), 'max_wi': np.float64(2675.1865701745214), 'min_wi': np.float64(2.2042780960121493e-36)}
Cross-validated error: 0.003915261328796157
[I 2025-10-22 20:09:07,807] Trial 91 finished with value: 0.05857952737595819 and parameters: {'lr': 0.019182530673519733, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.9484595114622821}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  72%|███████▏  | 93/130 [44:06<17:02, 27.65s/it]

Train wi info: {'gini': np.float64(0.9994303545854318), 'ess': np.float64(6.2614639426814644), 'max_wi': np.float64(1813.1114764273893), 'min_wi': np.float64(0.0)}
actual reward: [0.07863956]
{'gini': np.float64(0.9995088900794931), 'ess': np.float64(5.255364308485569), 'max_wi': np.float64(3150.4913854116903), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0029345723777728667
[I 2025-10-22 20:09:35,750] Trial 92 finished with value: 0.05119859370968734 and parameters: {'lr': 0.08825681389490161, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 10, 'lr_decay': 0.9245685754143684}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  72%|███████▏  | 94/130 [44:33<16:37, 27.70s/it]

Train wi info: {'gini': np.float64(0.9998005600544787), 'ess': np.float64(2.092499826501561), 'max_wi': np.float64(1624.150880374967), 'min_wi': np.float64(6.838639108686538e-25)}
actual reward: [0.08375047]
{'gini': np.float64(0.9992570726050227), 'ess': np.float64(8.020311762073), 'max_wi': np.float64(706.1365159429049), 'min_wi': np.float64(6.838639108686538e-25)}
Cross-validated error: 0.0037074912525478358
[I 2025-10-22 20:10:03,576] Trial 93 finished with value: 0.057140796301614984 and parameters: {'lr': 0.006934496280369256, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.9434843545985536}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  73%|███████▎  | 95/130 [45:02<16:17, 27.93s/it]

Train wi info: {'gini': np.float64(0.9998285747161553), 'ess': np.float64(1.8490244873864783), 'max_wi': np.float64(1818.0340960343947), 'min_wi': np.float64(0.0)}
actual reward: [0.07809402]
{'gini': np.float64(0.9995153325609419), 'ess': np.float64(5.068854685427682), 'max_wi': np.float64(4605.346626244665), 'min_wi': np.float64(9.705151019463231e-44)}
Cross-validated error: 0.004475482457439317
[I 2025-10-22 20:10:32,023] Trial 94 finished with value: 0.06229693650386476 and parameters: {'lr': 0.02887022159828722, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.9329735312806511}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  74%|███████▍  | 96/130 [45:30<15:49, 27.92s/it]

Train wi info: {'gini': np.float64(0.9994575152393674), 'ess': np.float64(5.981114717636652), 'max_wi': np.float64(840.7131954060806), 'min_wi': np.float64(0.0)}
actual reward: [0.08144852]
{'gini': np.float64(0.9985895412671165), 'ess': np.float64(15.701717168021476), 'max_wi': np.float64(776.2265153625536), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.018384063328031885
[I 2025-10-22 20:10:59,937] Trial 95 finished with value: 0.11709064388786128 and parameters: {'lr': 0.07086304552441101, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.956592104684764}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  75%|███████▍  | 97/130 [45:57<15:14, 27.72s/it]

Train wi info: {'gini': np.float64(0.9994141242340295), 'ess': np.float64(5.626131223264813), 'max_wi': np.float64(2177.671554172112), 'min_wi': np.float64(0.0)}
actual reward: [0.0806462]
{'gini': np.float64(0.9994693943263833), 'ess': np.float64(5.406607730056506), 'max_wi': np.float64(1327.7649724341456), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.008893156973084893
[I 2025-10-22 20:11:27,177] Trial 96 finished with value: 0.08536700743650706 and parameters: {'lr': 0.04608707602227695, 'num_epochs': 3, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.9561115759711183}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  75%|███████▌  | 98/130 [46:25<14:48, 27.78s/it]

Train wi info: {'gini': np.float64(0.9997277057308467), 'ess': np.float64(2.9159236168702045), 'max_wi': np.float64(1009.7258862240873), 'min_wi': np.float64(0.0)}
actual reward: [0.07493645]
{'gini': np.float64(0.999897495152613), 'ess': np.float64(1.0190846638195123), 'max_wi': np.float64(2986.512795991724), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.011907827718217997
[I 2025-10-22 20:11:55,097] Trial 97 finished with value: 0.09722464453719962 and parameters: {'lr': 0.03802167106073175, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 7, 'lr_decay': 0.8303206816693242}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  76%|███████▌  | 99/130 [46:53<14:18, 27.70s/it]

Train wi info: {'gini': np.float64(0.9997166859021448), 'ess': np.float64(3.0077462019942964), 'max_wi': np.float64(1039.565622000908), 'min_wi': np.float64(4.5872109436644276e-29)}
actual reward: [0.08166734]
{'gini': np.float64(0.9988057058112729), 'ess': np.float64(12.49186407670971), 'max_wi': np.float64(505.8726398646212), 'min_wi': np.float64(4.5872109436644276e-29)}
Cross-validated error: 0.0120694193101777
[I 2025-10-22 20:12:22,608] Trial 98 finished with value: 0.09775486666707646 and parameters: {'lr': 0.01014667609442005, 'num_epochs': 4, 'batch_size': 128, 'num_neighbors': 8, 'lr_decay': 0.9864014715593218}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  77%|███████▋  | 100/130 [47:20<13:45, 27.53s/it]

Train wi info: {'gini': np.float64(0.9992739490582098), 'ess': np.float64(7.957168221396392), 'max_wi': np.float64(1836.1379387258048), 'min_wi': np.float64(0.0)}
actual reward: [0.08285714]
{'gini': np.float64(0.9992208099914381), 'ess': np.float64(8.335457490310247), 'max_wi': np.float64(3043.8091720159864), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004070181046968951
[I 2025-10-22 20:12:49,742] Trial 99 finished with value: 0.05966484264156581 and parameters: {'lr': 0.06748803451505021, 'num_epochs': 2, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.9700863332982022}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  78%|███████▊  | 101/130 [47:48<13:22, 27.69s/it]

Train wi info: {'gini': np.float64(0.9995911259471361), 'ess': np.float64(4.3507139931280525), 'max_wi': np.float64(157.3868348459891), 'min_wi': np.float64(3.0859319810172214e-31)}
actual reward: [0.08280899]
{'gini': np.float64(0.9991803439403553), 'ess': np.float64(7.576099262611928), 'max_wi': np.float64(2715.9691372130187), 'min_wi': np.float64(4.2833125547529766e-31)}
Cross-validated error: 0.008007412725552434
[I 2025-10-22 20:13:17,791] Trial 100 finished with value: 0.0814117543420775 and parameters: {'lr': 0.02344109703967819, 'num_epochs': 7, 'batch_size': 512, 'num_neighbors': 5, 'lr_decay': 0.9598013330713089}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  78%|███████▊  | 102/130 [48:16<13:01, 27.90s/it]

Train wi info: {'gini': np.float64(0.9991748808761666), 'ess': np.float64(8.488375536311578), 'max_wi': np.float64(3305.535287853624), 'min_wi': np.float64(0.0)}
actual reward: [0.08308456]
{'gini': np.float64(0.9993516291181663), 'ess': np.float64(6.700036896756395), 'max_wi': np.float64(3713.24041337772), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004329538738355516
[I 2025-10-22 20:13:46,183] Trial 101 finished with value: 0.061375779190873885 and parameters: {'lr': 0.07431813660287177, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.9373326024428705}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  79%|███████▉  | 103/130 [48:44<12:34, 27.94s/it]

Train wi info: {'gini': np.float64(0.999204946615226), 'ess': np.float64(8.519148690756431), 'max_wi': np.float64(2013.8010030055095), 'min_wi': np.float64(0.0)}
actual reward: [0.07067024]
{'gini': np.float64(0.9993003114414986), 'ess': np.float64(7.714053580970892), 'max_wi': np.float64(1277.6374807105783), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.002851672546154269
[I 2025-10-22 20:14:14,229] Trial 102 finished with value: 0.05048770555406342 and parameters: {'lr': 0.09986520407684868, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 9, 'lr_decay': 0.9434571100284423}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  80%|████████  | 104/130 [49:12<12:06, 27.95s/it]

Train wi info: {'gini': np.float64(0.9996451434298215), 'ess': np.float64(3.8783833714447997), 'max_wi': np.float64(2272.766813215944), 'min_wi': np.float64(0.0)}
actual reward: [0.06925623]
{'gini': np.float64(0.9997541534361407), 'ess': np.float64(2.3731588990799195), 'max_wi': np.float64(3500.082502778547), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.01106223602783044
[I 2025-10-22 20:14:42,202] Trial 103 finished with value: 0.09402964368490412 and parameters: {'lr': 0.05577724512639102, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.9487225866397434}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  81%|████████  | 105/130 [49:43<11:59, 28.80s/it]

Train wi info: {'gini': np.float64(0.9997453750135515), 'ess': np.float64(2.734313696010348), 'max_wi': np.float64(2272.768980697348), 'min_wi': np.float64(0.0)}
actual reward: [0.06853958]
{'gini': np.float64(0.9997541857823827), 'ess': np.float64(2.3727140861820706), 'max_wi': np.float64(3501.2712125636963), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.011830289626071959
[I 2025-10-22 20:15:12,968] Trial 104 finished with value: 0.09700987988841125 and parameters: {'lr': 0.04280749426613613, 'num_epochs': 10, 'batch_size': 64, 'num_neighbors': 7, 'lr_decay': 0.972359444486688}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  82%|████████▏ | 106/130 [50:13<11:41, 29.22s/it]

Train wi info: {'gini': np.float64(0.9993867160987508), 'ess': np.float64(6.14030162917289), 'max_wi': np.float64(3009.752410018205), 'min_wi': np.float64(0.0)}
actual reward: [0.07139729]
{'gini': np.float64(0.9994476878750155), 'ess': np.float64(5.91590990935587), 'max_wi': np.float64(2178.47114975529), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0028439056928145873
[I 2025-10-22 20:15:43,183] Trial 105 finished with value: 0.050357654275687065 and parameters: {'lr': 0.08793005209596433, 'num_epochs': 8, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.8489178346883219}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  82%|████████▏ | 107/130 [50:42<11:12, 29.22s/it]

Train wi info: {'gini': np.float64(0.999465094887591), 'ess': np.float64(5.7566984501429195), 'max_wi': np.float64(2079.3612447292876), 'min_wi': np.float64(0.0)}
actual reward: [0.07841659]
{'gini': np.float64(0.9990075924045817), 'ess': np.float64(10.352756624684972), 'max_wi': np.float64(2144.0167970345365), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.044825879537544897
[I 2025-10-22 20:16:12,393] Trial 106 finished with value: 0.16663700570295298 and parameters: {'lr': 0.06319915132720946, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.9344926382138404}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  83%|████████▎ | 108/130 [51:11<10:36, 28.93s/it]

Train wi info: {'gini': np.float64(0.986104394423616), 'ess': np.float64(150.6476514608382), 'max_wi': np.float64(158.45664191921506), 'min_wi': np.float64(2.0024673321371635e-26)}
actual reward: [0.06983157]
{'gini': np.float64(0.9856016707142202), 'ess': np.float64(153.03303930430664), 'max_wi': np.float64(181.47833414268982), 'min_wi': np.float64(2.537138608512132e-25)}
Cross-validated error: 0.008488055214574323
[I 2025-10-22 20:16:40,667] Trial 107 finished with value: 0.08353300146106839 and parameters: {'lr': 0.014174466778339681, 'num_epochs': 6, 'batch_size': 256, 'num_neighbors': 3, 'lr_decay': 0.8217487193188329}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  84%|████████▍ | 109/130 [51:39<10:04, 28.77s/it]

Train wi info: {'gini': np.float64(0.9995679414773655), 'ess': np.float64(4.6730111542876065), 'max_wi': np.float64(1818.0340960343947), 'min_wi': np.float64(0.0)}
actual reward: [0.0782719]
{'gini': np.float64(0.9995153128481323), 'ess': np.float64(5.069048119708661), 'max_wi': np.float64(4605.346626244665), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.00577107116767869
[I 2025-10-22 20:17:09,039] Trial 108 finished with value: 0.07008611864029875 and parameters: {'lr': 0.06232086119211849, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.9336100898417485}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  85%|████████▍ | 110/130 [52:06<09:23, 28.17s/it]

Train wi info: {'gini': np.float64(0.9992675647201837), 'ess': np.float64(4.831151964573434), 'max_wi': np.float64(1.063675042141546e-11), 'min_wi': np.float64(0.0)}
actual reward: [0.0730586]
{'gini': np.float64(0.9998999999543468), 'ess': np.float64(1.000000377447564), 'max_wi': np.float64(3016.7620262048176), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.010055857377937627
[I 2025-10-22 20:17:35,832] Trial 109 finished with value: 0.09011598962294537 and parameters: {'lr': 0.053219623238839144, 'num_epochs': 3, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.9166013819290736}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  85%|████████▌ | 111/130 [52:34<08:56, 28.26s/it]

Train wi info: {'gini': np.float64(0.9998285747216288), 'ess': np.float64(1.8490244339110493), 'max_wi': np.float64(1818.0340960343947), 'min_wi': np.float64(0.0)}
actual reward: [0.07809501]
{'gini': np.float64(0.9995153324599064), 'ess': np.float64(5.068855582052944), 'max_wi': np.float64(4605.346626244665), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004461695197788598
[I 2025-10-22 20:18:04,296] Trial 110 finished with value: 0.06231053286276638 and parameters: {'lr': 0.030994948877122144, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.8375689979196624}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  86%|████████▌ | 112/130 [53:02<08:25, 28.10s/it]

Train wi info: {'gini': np.float64(0.9993305929264688), 'ess': np.float64(7.157774196078074), 'max_wi': np.float64(1357.3242573426414), 'min_wi': np.float64(0.0)}
actual reward: [0.07321178]
{'gini': np.float64(0.9986644499305228), 'ess': np.float64(14.663046973740052), 'max_wi': np.float64(1172.8908654552486), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.017529974688003488
[I 2025-10-22 20:18:32,008] Trial 111 finished with value: 0.1147587197881898 and parameters: {'lr': 0.07526959415300837, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.843310344747523}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  87%|████████▋ | 113/130 [53:30<07:56, 28.00s/it]

Train wi info: {'gini': np.float64(0.9998999999999998), 'ess': np.float64(1.0000000000001081), 'max_wi': np.float64(8056.690339310283), 'min_wi': np.float64(0.0)}
actual reward: [0.08185972]
{'gini': np.float64(0.999317198524417), 'ess': np.float64(6.026109688898699), 'max_wi': np.float64(8309.855343253459), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.010343763736346458
[I 2025-10-22 20:18:59,791] Trial 112 finished with value: 0.09131181780222992 and parameters: {'lr': 0.06637334703210498, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 15, 'lr_decay': 0.8446938316171325}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  88%|████████▊ | 114/130 [53:58<07:30, 28.14s/it]

Train wi info: {'gini': np.float64(0.9996041534181836), 'ess': np.float64(4.344080559526903), 'max_wi': np.float64(3288.1623992096106), 'min_wi': np.float64(0.0)}
actual reward: [0.07832822]
{'gini': np.float64(0.9995061134424883), 'ess': np.float64(5.3549687526208025), 'max_wi': np.float64(1156.2371547716039), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0018522995337984008
[I 2025-10-22 20:19:28,263] Trial 113 finished with value: 0.04126619400921203 and parameters: {'lr': 0.04902939965542347, 'num_epochs': 7, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.851446044398164}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  88%|████████▊ | 115/130 [54:26<07:00, 28.05s/it]

Train wi info: {'gini': np.float64(0.999224821496011), 'ess': np.float64(8.503935016264709), 'max_wi': np.float64(2064.5324083273076), 'min_wi': np.float64(0.0)}
actual reward: [0.08116518]
{'gini': np.float64(0.999589769594856), 'ess': np.float64(3.741401958099528), 'max_wi': np.float64(4236.29570339164), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.002653172072428143
[I 2025-10-22 20:19:56,085] Trial 114 finished with value: 0.04885078405819396 and parameters: {'lr': 0.0902576432356713, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 5, 'lr_decay': 0.8264247483702721}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  89%|████████▉ | 116/130 [54:54<06:31, 27.99s/it]

Train wi info: {'gini': np.float64(0.9996155097183499), 'ess': np.float64(4.1733487209777005), 'max_wi': np.float64(1488.7948118695997), 'min_wi': np.float64(0.0)}
actual reward: [0.07450952]
{'gini': np.float64(0.9994356680152064), 'ess': np.float64(5.829798984196155), 'max_wi': np.float64(3329.3489021497653), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.0330722862738184
[I 2025-10-22 20:20:23,928] Trial 115 finished with value: 0.1486530013529943 and parameters: {'lr': 0.0686956924257819, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.807517686144081}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  90%|█████████ | 117/130 [55:22<06:02, 27.90s/it]

Train wi info: {'gini': np.float64(0.9989823969851506), 'ess': np.float64(9.235523330337555), 'max_wi': np.float64(3009.752410018205), 'min_wi': np.float64(0.0)}
actual reward: [0.07540418]
{'gini': np.float64(0.9994204736794008), 'ess': np.float64(6.370796319861206), 'max_wi': np.float64(1279.4384608309458), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.006229613143327593
[I 2025-10-22 20:20:51,618] Trial 116 finished with value: 0.072712164539987 and parameters: {'lr': 0.07389665760205893, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.806713392363816}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  91%|█████████ | 118/130 [55:49<05:34, 27.85s/it]

Train wi info: {'gini': np.float64(0.9997325959101373), 'ess': np.float64(2.079801880072504), 'max_wi': np.float64(5865.475384650972), 'min_wi': np.float64(0.0)}
actual reward: [0.07828413]
{'gini': np.float64(0.9996290765390463), 'ess': np.float64(4.029106640827533), 'max_wi': np.float64(6122.269642590993), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004164967279111991
[I 2025-10-22 20:21:19,345] Trial 117 finished with value: 0.06027017464681114 and parameters: {'lr': 0.05774180048458236, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.8153826767572528}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  92%|█████████▏| 119/130 [56:18<05:09, 28.10s/it]

Train wi info: {'gini': np.float64(0.9997695650395098), 'ess': np.float64(2.4525632334908627), 'max_wi': np.float64(3331.161293402831), 'min_wi': np.float64(0.0)}
actual reward: [0.08061254]
{'gini': np.float64(0.9998953700995424), 'ess': np.float64(1.0473620371535062), 'max_wi': np.float64(3016.762385830918), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.010815358631029557
[I 2025-10-22 20:21:48,029] Trial 118 finished with value: 0.09305747862537114 and parameters: {'lr': 0.042469312117006865, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.8069772127110405}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  92%|█████████▏| 120/130 [56:45<04:36, 27.69s/it]

Train wi info: {'gini': np.float64(0.999749984353002), 'ess': np.float64(2.6831264691291725), 'max_wi': np.float64(4122.3729638007335), 'min_wi': np.float64(0.0)}
actual reward: [0.07826597]
{'gini': np.float64(0.9997729529068772), 'ess': np.float64(2.4963035235016497), 'max_wi': np.float64(3844.026446582613), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004712809421233079
[I 2025-10-22 20:22:14,775] Trial 119 finished with value: 0.06378724259570942 and parameters: {'lr': 0.08023320894751973, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.998582127544607}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  93%|█████████▎| 121/130 [57:12<04:07, 27.53s/it]

Train wi info: {'gini': np.float64(0.9996520227554829), 'ess': np.float64(3.0186136058685786), 'max_wi': np.float64(8056.687458013289), 'min_wi': np.float64(0.0)}
actual reward: [0.08115034]
{'gini': np.float64(0.9994478107448064), 'ess': np.float64(5.456744337610714), 'max_wi': np.float64(5464.476555490011), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.004343466375197289
[I 2025-10-22 20:22:41,940] Trial 120 finished with value: 0.06145116192648074 and parameters: {'lr': 0.07304234007090853, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 5, 'lr_decay': 0.8012575754648379}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  94%|█████████▍| 122/130 [57:39<03:39, 27.44s/it]

Train wi info: {'gini': np.float64(0.9993741857039641), 'ess': np.float64(6.410246780510202), 'max_wi': np.float64(2011.9167331080403), 'min_wi': np.float64(2.0700721347699637e-33)}
actual reward: [0.08079079]
{'gini': np.float64(0.9996392992150441), 'ess': np.float64(2.9126317818358607), 'max_wi': np.float64(3014.153837909067), 'min_wi': np.float64(2.0700721347699637e-33)}
Cross-validated error: 0.011890494561719141
[I 2025-10-22 20:23:09,155] Trial 121 finished with value: 0.0971390182373567 and parameters: {'lr': 0.017247989313088997, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 4, 'lr_decay': 0.8641588979169469}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  95%|█████████▍| 123/130 [58:07<03:12, 27.49s/it]

Train wi info: {'gini': np.float64(0.9998999999999999), 'ess': np.float64(1.0000000000005849), 'max_wi': np.float64(198.70733212342915), 'min_wi': np.float64(0.0)}
actual reward: [0.07312664]
{'gini': np.float64(0.9998999999998658), 'ess': np.float64(1.0000000010316288), 'max_wi': np.float64(3016.762385830918), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.010253578613859223
[I 2025-10-22 20:23:36,763] Trial 122 finished with value: 0.09070588006596482 and parameters: {'lr': 0.09942917349768003, 'num_epochs': 6, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.8413748336611557}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  95%|█████████▌| 124/130 [58:34<02:44, 27.36s/it]

Train wi info: {'gini': np.float64(0.9987952490610165), 'ess': np.float64(12.754450275344634), 'max_wi': np.float64(1627.6584480887163), 'min_wi': np.float64(0.0)}
actual reward: [0.08495504]
{'gini': np.float64(0.9984822678992552), 'ess': np.float64(16.382236704714202), 'max_wi': np.float64(904.2482455997801), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.012431423807644447
[I 2025-10-22 20:24:03,837] Trial 123 finished with value: 0.09900400998404317 and parameters: {'lr': 0.06196389222816645, 'num_epochs': 4, 'batch_size': 64, 'num_neighbors': 6, 'lr_decay': 0.8171909488708048}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  96%|█████████▌| 125/130 [59:01<02:17, 27.46s/it]

Train wi info: {'gini': np.float64(0.9997526328471202), 'ess': np.float64(2.6799736536608925), 'max_wi': np.float64(1818.0340960343947), 'min_wi': np.float64(0.0)}
actual reward: [0.07809547]
{'gini': np.float64(0.9995153325616422), 'ess': np.float64(5.068854679350763), 'max_wi': np.float64(4605.346626244665), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.00446169550299341
[I 2025-10-22 20:24:31,507] Trial 124 finished with value: 0.06234392963152813 and parameters: {'lr': 0.03616913143344492, 'num_epochs': 5, 'batch_size': 64, 'num_neighbors': 3, 'lr_decay': 0.8334887132889506}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  97%|█████████▋| 126/130 [59:30<01:51, 27.92s/it]

Train wi info: {'gini': np.float64(0.999277238632157), 'ess': np.float64(7.84642746119884), 'max_wi': np.float64(1818.880482692778), 'min_wi': np.float64(0.0)}
actual reward: [0.07449773]
{'gini': np.float64(0.9991526368504393), 'ess': np.float64(9.437291604316965), 'max_wi': np.float64(1415.8243753921233), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.005186691296733899
[I 2025-10-22 20:25:00,502] Trial 125 finished with value: 0.06683146085786083 and parameters: {'lr': 0.05092454621959934, 'num_epochs': 6, 'batch_size': 64, 'num_neighbors': 5, 'lr_decay': 0.8826801272207728}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  98%|█████████▊| 127/130 [59:57<01:22, 27.64s/it]

Train wi info: {'gini': np.float64(0.9998999999999811), 'ess': np.float64(1.0000000001297864), 'max_wi': np.float64(128.70520555022105), 'min_wi': np.float64(0.0)}
actual reward: [0.08078926]
{'gini': np.float64(0.9995724390659848), 'ess': np.float64(4.091585592771627), 'max_wi': np.float64(8294.435461068952), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.012048776921215268
[I 2025-10-22 20:25:27,493] Trial 126 finished with value: 0.0976918133723319 and parameters: {'lr': 0.08058108944192745, 'num_epochs': 5, 'batch_size': 512, 'num_neighbors': 7, 'lr_decay': 0.894833602895337}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  98%|█████████▊| 128/130 [1:00:26<00:55, 27.83s/it]

Train wi info: {'gini': np.float64(0.9990976216500944), 'ess': np.float64(6.507452601832343), 'max_wi': np.float64(963.0443273132896), 'min_wi': np.float64(9.107055072463448e-19)}
actual reward: [0.08800806]
{'gini': np.float64(0.9975179378492618), 'ess': np.float64(25.83397653389644), 'max_wi': np.float64(690.279131673495), 'min_wi': np.float64(3.1868481286630083e-18)}
Cross-validated error: 0.006206348250818469
[I 2025-10-22 20:25:55,779] Trial 127 finished with value: 0.0725862246718896 and parameters: {'lr': 0.007881543832904079, 'num_epochs': 4, 'batch_size': 128, 'num_neighbors': 4, 'lr_decay': 0.8129239598813308}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427:  99%|█████████▉| 129/130 [1:00:54<00:27, 27.99s/it]

Train wi info: {'gini': np.float64(0.9997014399509085), 'ess': np.float64(3.128837170620676), 'max_wi': np.float64(156.2614383324621), 'min_wi': np.float64(0.0)}
actual reward: [0.07298514]
{'gini': np.float64(0.9998734566498121), 'ess': np.float64(1.177731767069403), 'max_wi': np.float64(3016.762385830918), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.010703116463228662
[I 2025-10-22 20:26:24,126] Trial 128 finished with value: 0.09264067847339756 and parameters: {'lr': 0.06978689345214155, 'num_epochs': 5, 'batch_size': 256, 'num_neighbors': 4, 'lr_decay': 0.8221085987090847}. Best is trial 88 with value: 0.19427010652923055.


Best trial: 88. Best value: 0.19427: 100%|██████████| 130/130 [1:01:21<00:00, 28.32s/it]

Train wi info: {'gini': np.float64(0.9997180011340263), 'ess': np.float64(3.0474725411231103), 'max_wi': np.float64(4441.403071902222), 'min_wi': np.float64(0.0)}
actual reward: [0.07145454]
{'gini': np.float64(0.9996038404270671), 'ess': np.float64(4.251836618120109), 'max_wi': np.float64(1932.1679123161323), 'min_wi': np.float64(0.0)}
Cross-validated error: 0.021929676857756427
[I 2025-10-22 20:26:50,672] Trial 129 finished with value: 0.1261702860964136 and parameters: {'lr': 0.08759063621134215, 'num_epochs': 2, 'batch_size': 64, 'num_neighbors': 8, 'lr_decay': 0.9083805447559369}. Best is trial 88 with value: 0.19427010652923055.





{'gini': np.float64(0.9991224556655617), 'ess': np.float64(9.339304013040678), 'max_wi': np.float64(881.3421724791704), 'min_wi': np.float64(6.091837433869274e-35)}


Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.08610747,0.0877,0.08770321,0.09205449,0.08835112,0.08835112,0.7569287,0.0,0.87627132,0.0
10000,0.0860149,0.00153491,0.08484747,0.07907227,-0.00303158,-0.03899329,0.99592933,0.60111132,1.18163272,0.48287483


### Policy with delta function

In [21]:
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.08610747,0.0877,0.08770321,0.09205449,0.08835112,0.08835112,0.7569287,0.0,0.87627132,0.0
10000,0.0860149,0.00153491,0.08484747,0.07907227,-0.00303158,-0.03899329,0.99592933,0.60111132,1.18163272,0.48287483


### Poicy Via argmax(r_hat - error_hat) through cross validation

In [22]:
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.08610747,0.0877,0.08770321,0.09205449,0.08835112,0.08835112,0.7569287,0.0,0.87627132,0.0
10000,0.0860149,0.00153491,0.08484747,0.07907227,-0.00303158,-0.03899329,0.99592933,0.60111132,1.18163272,0.48287483


### Policy Via using actual policy value

In [23]:
# Show the performance metrics
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]


Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.08610747,0.0877,0.08770321,0.09205449,0.08835112,0.08835112,0.7569287,0.0,0.87627132,0.0
10000,0.0860149,0.00153491,0.08484747,0.07907227,-0.00303158,-0.03899329,0.99592933,0.60111132,1.18163272,0.48287483
