In [1]:
import warnings
warnings.filterwarnings("ignore")
from copy import deepcopy
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import sys

sys.path.append("/code")

from tqdm import tqdm
import torch
# device = torch.device('cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# import gym
# import recogym

import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

from sklearn.utils import check_random_state

# implementing OPE of the IPWLearner using synthetic bandit data
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt

from scipy.special import softmax
import optuna
# from memory_profiler import profile


from estimators import (
    DirectMethod as DM
)

from simulation_utils import (
    eval_policy,
    generate_dataset,
    create_simulation_data_from_pi,
    get_train_data,
    get_opl_results_dict,
    CustomCFDataset,
    calc_reward
)

from models import (    
    CFModel,
    NeighborhoodModel,
    BPRModel, 
    RegressionModel
)

from training_utils import (
    fit_bpr,
    train,
    validation_loop
 )

from custom_losses import (
    SNDRPolicyLoss,
    BPRLoss
    )

random_state=12345
random_ = check_random_state(random_state)

Using device: cpu
Using device: cpu
Using device: cpu


In [17]:
pd.options.display.float_format = '{:,.8f}'.format

## `trainer_trial` Function

This function runs policy learning experiments using offline bandit data and evaluates various estimators.

### Parameters
- **num_runs** (int): Number of experimental runs per training size
- **num_neighbors** (int): Number of neighbors to consider in the neighborhood model
- **num_rounds_list** (list): List of training set sizes to evaluate
- **dataset** (dict): Contains dataset information including embeddings, action probabilities, and reward probabilities
- **batch_size** (int): Batch size for training the policy model
- **num_epochs** (int): Number of training epochs for each experiment
- **lr** (float, default=0.001): Learning rate for the optimizer

### Process Flow
1. Initializes result structures and retrieval models
2. For each training size in `num_rounds_list`:
   - Creates a uniform logging policy and simulates data
   - Generates training data for offline learning
   - Fits regression and neighborhood models for reward estimation
   - Initializes and trains a counterfactual policy model
   - Evaluates policy performance using various estimators
   - Collects metrics on policy reward and embedding quality

### Returns
- **DataFrame**: Results table with rows indexed by training size and columns for various metrics:
  - `policy_rewards`: True expected reward of the learned policy
  - Various estimator errors (`ipw`, `reg_dm`, `conv_dm`, `conv_dr`, `conv_sndr`)
  - Variance metrics for each estimator
  - Embedding quality metrics comparing learned representations to ground truth

### Implementation Notes
- Uses uniform random logging policy for collecting offline data
- Employs Self-Normalized Doubly Robust (SNDR) policy learning
- Measures embedding quality via RMSE to original/ground truth embeddings

In [3]:
def trainer_trial(
                  num_runs,
                  num_neighbors,
                  num_rounds_list,
                  dataset,
                  batch_size,
                  val_size=2000
                  ):
    
    # Define device at the beginning
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    def objective(trial):
    
        # Optuna objective function
        lr = trial.suggest_loguniform("lr", 1e-4, 1e-3)
        epochs = trial.suggest_int("num_epochs", 1, 10)

        trial_neigh_model = NeighborhoodModel(
                                                train_data['x_idx'],
                                                train_data['a'], 
                                                our_a,
                                                our_x, 
                                                train_data['r'], 
                                                num_neighbors=num_neighbors
                                            )
        

        trial_model = CFModel(
                        n_users, 
                        n_actions, 
                        emb_dim, 
                        initial_user_embeddings=torch.tensor(our_x, device=device), 
                        initial_actions_embeddings=torch.tensor(our_a, device=device)
                        )
        
        # Training
        train(trial_model, train_loader, trial_neigh_model, criterion=SNDRPolicyLoss(), num_epochs=epochs, lr=lr, device=device)
        return validation_loop(trial_model, val_loader, trial_neigh_model)
    
    device = torch.device('cpu')

    dm = DM()
    results = {}

    our_x, our_a = dataset["our_x"], dataset["our_a"]
    emb_x, emb_a = dataset["emb_x"], dataset["emb_a"]
    original_x, original_a = dataset["original_x"], dataset["original_a"]
    n_users, n_actions, emb_dim = dataset["n_users"], dataset["n_actions"], dataset["emb_dim"]
    
    first = True

    for train_size in num_rounds_list:
        reg_results, conv_results = [], []
        
        for run in range(num_runs):

            pi_0 = softmax(our_x @ our_a.T, axis=1)
            original_policy_prob = np.expand_dims(pi_0, -1)

            simulation_data = create_simulation_data_from_pi(
                                                            dataset,
                                                            pi_0,
                                                            train_size + val_size,
                                                            random_state=(run + 1) * train_size
                                                            )
            # test_data = get_test_data(dataset, simulation_data, n_test_data)

            # idx = np.arange(train_size) + n_test_data
            idx = np.arange(train_size)

            train_data = get_train_data(n_actions, train_size, simulation_data, idx, our_x)
            val_data = get_train_data(n_actions, val_size, simulation_data, np.arange(val_size) + train_size, our_x)

            regression_model = RegressionModel(
                                    n_actions=n_actions,
                                    action_context=our_x,
                                    base_model=LogisticRegression(random_state=12345)
                                    )
            
            regression_model.fit(
                                train_data['x'], 
                                train_data['a'],
                                train_data['r'],
                                original_policy_prob[train_data['x_idx'],
                                train_data['a']].squeeze()
                                )
            
            neighberhoodmodel = NeighborhoodModel(
                                                    train_data['x_idx'],
                                                    train_data['a'], 
                                                    our_a,
                                                    our_x, 
                                                    train_data['r'], 
                                                    num_neighbors=num_neighbors
                                                )
            

            model = CFModel(
                            n_users, 
                            n_actions, 
                            emb_dim, 
                            initial_user_embeddings=torch.tensor(our_x, device=device), 
                            initial_actions_embeddings=torch.tensor(our_a, device=device)
                            )
            
            cf_dataset =  CustomCFDataset(
                                       train_data['x_idx'], 
                                       train_data['a'], 
                                       train_data['r'], 
                                       original_policy_prob
                                       )
            
            train_loader = DataLoader(cf_dataset, batch_size=batch_size, shuffle=True)

            val_dataset =  CustomCFDataset(
                            val_data['x_idx'], 
                            val_data['a'], 
                            val_data['r'], 
                            original_policy_prob
                            )
            
            val_loader = DataLoader(val_dataset, batch_size=val_size, shuffle=True)

            if first:
                policy = np.expand_dims(softmax(our_x @ our_a.T, axis=1), -1)
                conv_results.append(eval_policy(neighberhoodmodel, train_data, original_policy_prob, policy))
                conv_results[-1] = np.append(calc_reward(dataset, policy), conv_results[-1])
                conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_a-our_a)**2)), np.sqrt(np.mean((original_a-our_a)**2))])
                conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_x-our_x)**2)), np.sqrt(np.mean((original_x-our_x)**2))])
                # bpr_scores = bpr_model.calc_scores(torch.tensor(train_data['x_idx'], device=device, dtype=torch.long)).detach().cpu().numpy()
                reg_dm = dm.estimate_policy_value(policy[train_data['x_idx']], regression_model.predict(train_data['x']))
                reg_results.append(reg_dm)
                first = False
                reg_results = np.array(reg_results)
                conv_results = np.array(conv_results)
                results[0] = get_opl_results_dict(reg_results, conv_results)
                reg_results, conv_results = [], []
                
            # Bloss = BPRLoss()
            # bpr_model = BPRModel(
            #                     n_users,
            #                     n_actions,
            #                     emb_x.shape[1], 
            #                     initial_user_embeddings=torch.tensor(our_x, device=device), 
            #                     initial_actions_embeddings=torch.tensor(our_a, device=device)
            #                     )

            study = optuna.create_study(direction="maximize")
            study.optimize(objective, n_trials=10, show_progress_bar=True)
            best_params = study.best_params

            neighberhoodmodel = NeighborhoodModel(
                                                    train_data['x_idx'],
                                                    train_data['a'], 
                                                    our_a,
                                                    our_x, 
                                                    train_data['r'], 
                                                    num_neighbors=num_neighbors
                                                )
            

            model = CFModel(
                            n_users, 
                            n_actions, 
                            emb_dim, 
                            initial_user_embeddings=torch.tensor(our_x, device=device), 
                            initial_actions_embeddings=torch.tensor(our_a, device=device)
                            )

            train(model, train_loader, neighberhoodmodel, criterion=SNDRPolicyLoss(), num_epochs=best_params['num_epochs'], lr=best_params['lr'], device=device)
            # fit_bpr(bpr_model, Bloss, train_loader, num_epochs=3, lr=0.001, device=device)
            # neighborhood_model.update(model.get_params()[0].detach().numpy(), model.get_params()[1].detach().numpy())'

            our_x, our_a = model.get_params()
            our_a, our_x = our_a.detach().cpu().numpy(), our_x.detach().cpu().numpy()

            policy = np.expand_dims(softmax(our_x @ our_a.T, axis=1), -1)

            # bpr_scores = bpr_model.calc_scores(torch.tensor(train_data['x_idx'], device=device, dtype=torch.long)).detach().cpu().numpy()
            # reg_dm = dm.estimate_policy_value(policy[train_data['x_idx']], bpr_scores)
            reg_dm = dm.estimate_policy_value(policy[train_data['x_idx']], regression_model.predict(train_data['x']))

            reg_results.append(reg_dm)

            # conv_results.append(eval_policy(neighberhoodmodel, test_data, original_policy_prob[test_data['x_idx']], policy))
            conv_results.append(eval_policy(neighberhoodmodel, train_data, original_policy_prob, policy))

            conv_results[-1] = np.append(calc_reward(dataset, policy), conv_results[-1])
            conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_a-our_a)**2)), np.sqrt(np.mean((original_a-our_a)**2))])

            # temp.append(np.mean((emb_a-our_a)**2, axis=0))

            conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_x-our_x)**2)), np.sqrt(np.mean((original_x-our_x)**2))])
            
            our_a, our_x = original_a.copy(), original_x.copy()

        reg_results = np.array(reg_results)
        conv_results = np.array(conv_results)

        results[train_size] = get_opl_results_dict(reg_results, conv_results)
    
    return pd.DataFrame.from_dict(results, orient='index')

## Learning

We will run several simulations on a generated dataset, the dataset is generated like this:
$$ \text{We have users U and actions A } u_i \sim N(0, I_{emb_dim}) \ a_i \sim N(0, I_{emb_dim})$$
$$ p_{ij} = 1 / (5 + e^{-(u_i.T a_j)}) $$
$$r_{ij} \sim Bin(p_{ij})$$

We have a policy $\pi$
and it's ground truth reward is calculated by
$$R_{gt} = \sum_{i}{\sum_{j}{\pi_{ij} * p_{ij}}} $$

Our parameters for the dataset will be
$$EmbDim = 5$$
$$NumActions= 150$$
$$NumUsers = 150$$
$$NeighborhoodSize = 6$$

to learn a new policy from $\pi$ we will sample from:
$$\pi_{start} = (1-\epsilon)*\pi + \epsilon * \pi_{random}$$

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


In [5]:
num_runs = 1

In [6]:
dataset_params = dict(
                    n_actions= 500,
                    n_users = 500,
                    emb_dim = 4,
                    # sigma = 0.1,
                    eps = 0.2, # this is the epsilon for the noise in the ground truth policy representation
                    ctr = 0.02
                    )

train_dataset = generate_dataset(dataset_params)

CTR: 0.01861246403332251


In [7]:
train_dataset.keys()

dict_keys(['emb_a', 'our_a', 'original_a', 'emb_x', 'our_x', 'original_x', 'q_x_a', 'n_actions', 'n_users', 'emb_dim', 'user_prior'])

In [8]:
num_runs = 1
batch_size = 200
num_neighbors = 51
num_rounds_list = [30000, 60000, 80000, 90000]

### 1

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.005$$
$$n_{epochs} = 1$$
$$BatchSize=50$$

In [9]:
df4 = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size, val_size=35000)

[I 2025-08-18 18:19:04,422] A new study created in memory with name: no-name-37065847-3816-4ca5-88de-f8fbac65bbee
Best trial: 0. Best value: 0.0104472:  10%|█         | 1/10 [01:26<13:02, 86.91s/it]

[I 2025-08-18 18:20:31,332] Trial 0 finished with value: 0.010447228179581563 and parameters: {'lr': 0.0007220985987695284, 'num_epochs': 3}. Best is trial 0 with value: 0.010447228179581563.


Best trial: 1. Best value: 0.0105225:  20%|██        | 2/10 [02:54<11:38, 87.31s/it]

[I 2025-08-18 18:21:58,925] Trial 1 finished with value: 0.01052248297019858 and parameters: {'lr': 0.0002005232834880863, 'num_epochs': 3}. Best is trial 1 with value: 0.01052248297019858.


Best trial: 1. Best value: 0.0105225:  30%|███       | 3/10 [04:19<10:02, 86.03s/it]

[I 2025-08-18 18:23:23,427] Trial 2 finished with value: 0.01046908944460727 and parameters: {'lr': 0.0006284204983139526, 'num_epochs': 3}. Best is trial 1 with value: 0.01052248297019858.


Best trial: 1. Best value: 0.0105225:  40%|████      | 4/10 [05:48<08:45, 87.52s/it]

[I 2025-08-18 18:24:53,225] Trial 3 finished with value: 0.010489572087838335 and parameters: {'lr': 0.00019170569649793018, 'num_epochs': 8}. Best is trial 1 with value: 0.01052248297019858.


Best trial: 1. Best value: 0.0105225:  50%|█████     | 5/10 [07:17<07:18, 87.77s/it]

[I 2025-08-18 18:26:21,431] Trial 4 finished with value: 0.01044950500797998 and parameters: {'lr': 0.0005592967658161585, 'num_epochs': 4}. Best is trial 1 with value: 0.01052248297019858.


Best trial: 1. Best value: 0.0105225:  60%|██████    | 6/10 [08:48<05:56, 89.10s/it]

[I 2025-08-18 18:27:53,112] Trial 5 finished with value: 0.008424056386649758 and parameters: {'lr': 0.0009350032312405828, 'num_epochs': 9}. Best is trial 1 with value: 0.01052248297019858.


Best trial: 1. Best value: 0.0105225:  70%|███████   | 7/10 [10:14<04:24, 88.15s/it]

[I 2025-08-18 18:29:19,304] Trial 6 finished with value: 0.010195819613953949 and parameters: {'lr': 0.000978353986973095, 'num_epochs': 5}. Best is trial 1 with value: 0.01052248297019858.


Best trial: 1. Best value: 0.0105225:  80%|████████  | 8/10 [11:51<03:01, 90.84s/it]

[I 2025-08-18 18:30:55,920] Trial 7 finished with value: 0.010492068769488414 and parameters: {'lr': 0.00030431512360881116, 'num_epochs': 7}. Best is trial 1 with value: 0.01052248297019858.


Best trial: 1. Best value: 0.0105225:  90%|█████████ | 9/10 [13:19<01:29, 89.87s/it]

[I 2025-08-18 18:32:23,662] Trial 8 finished with value: 0.010481435209909351 and parameters: {'lr': 0.0003703508499325048, 'num_epochs': 5}. Best is trial 1 with value: 0.01052248297019858.


Best trial: 1. Best value: 0.0105225: 100%|██████████| 10/10 [14:45<00:00, 88.56s/it]


[I 2025-08-18 18:33:50,015] Trial 9 finished with value: 0.010481128900979773 and parameters: {'lr': 0.00030338797328410865, 'num_epochs': 6}. Best is trial 1 with value: 0.01052248297019858.


[I 2025-08-18 18:37:59,863] A new study created in memory with name: no-name-2d008a50-1052-4412-99c6-19d101c2070a
Best trial: 0. Best value: 0.0114684:  10%|█         | 1/10 [02:57<26:37, 177.46s/it]

[I 2025-08-18 18:40:57,323] Trial 0 finished with value: 0.011468363362361357 and parameters: {'lr': 0.00041422691378256536, 'num_epochs': 7}. Best is trial 0 with value: 0.011468363362361357.


Best trial: 1. Best value: 0.0114883:  20%|██        | 2/10 [05:54<23:37, 177.24s/it]

[I 2025-08-18 18:43:54,403] Trial 1 finished with value: 0.01148826953125163 and parameters: {'lr': 0.000261233902162331, 'num_epochs': 7}. Best is trial 1 with value: 0.01148826953125163.


Best trial: 1. Best value: 0.0114883:  30%|███       | 3/10 [09:19<22:09, 189.97s/it]

[I 2025-08-18 18:47:19,529] Trial 2 finished with value: 0.011450951131199646 and parameters: {'lr': 0.00010411675508995945, 'num_epochs': 9}. Best is trial 1 with value: 0.01148826953125163.


Best trial: 1. Best value: 0.0114883:  40%|████      | 4/10 [12:06<18:04, 180.83s/it]

[I 2025-08-18 18:50:06,348] Trial 3 finished with value: 0.011471780197583922 and parameters: {'lr': 0.00037650426765500856, 'num_epochs': 3}. Best is trial 1 with value: 0.01148826953125163.


Best trial: 1. Best value: 0.0114883:  50%|█████     | 5/10 [15:07<15:03, 180.74s/it]

[I 2025-08-18 18:53:06,915] Trial 4 finished with value: 0.011354944085730807 and parameters: {'lr': 0.00037575625764026214, 'num_epochs': 9}. Best is trial 1 with value: 0.01148826953125163.


Best trial: 1. Best value: 0.0114883:  60%|██████    | 6/10 [18:08<12:03, 180.91s/it]

[I 2025-08-18 18:56:08,174] Trial 5 finished with value: -0.004704136137864939 and parameters: {'lr': 0.0008081416235007266, 'num_epochs': 10}. Best is trial 1 with value: 0.01148826953125163.


Best trial: 1. Best value: 0.0114883:  70%|███████   | 7/10 [20:55<08:48, 176.27s/it]

[I 2025-08-18 18:58:54,896] Trial 6 finished with value: 0.011442371495561907 and parameters: {'lr': 0.00017721330317863, 'num_epochs': 2}. Best is trial 1 with value: 0.01148826953125163.


Best trial: 1. Best value: 0.0114883:  80%|████████  | 8/10 [23:52<05:53, 176.68s/it]

[I 2025-08-18 19:01:52,443] Trial 7 finished with value: 0.011446249350653826 and parameters: {'lr': 0.0001074173385752095, 'num_epochs': 7}. Best is trial 1 with value: 0.01148826953125163.


Best trial: 1. Best value: 0.0114883:  90%|█████████ | 9/10 [26:40<02:54, 174.05s/it]

[I 2025-08-18 19:04:40,722] Trial 8 finished with value: 0.01146017494342135 and parameters: {'lr': 0.0004286188819262623, 'num_epochs': 2}. Best is trial 1 with value: 0.01148826953125163.


Best trial: 1. Best value: 0.0114883: 100%|██████████| 10/10 [29:27<00:00, 176.79s/it]


[I 2025-08-18 19:07:27,737] Trial 9 finished with value: 0.011445608897624195 and parameters: {'lr': 0.0004861499689573691, 'num_epochs': 1}. Best is trial 1 with value: 0.01148826953125163.


[I 2025-08-18 19:14:02,812] A new study created in memory with name: no-name-96563dd3-47fd-4544-806b-6c80f4d8f09d
Best trial: 0. Best value: 0.0123436:  10%|█         | 1/10 [03:44<33:36, 224.09s/it]

[I 2025-08-18 19:17:46,905] Trial 0 finished with value: 0.0123436154566217 and parameters: {'lr': 0.0007528868567995917, 'num_epochs': 3}. Best is trial 0 with value: 0.0123436154566217.


Best trial: 1. Best value: 0.0127178:  20%|██        | 2/10 [07:34<30:24, 228.09s/it]

[I 2025-08-18 19:21:37,791] Trial 1 finished with value: 0.012717771399580705 and parameters: {'lr': 0.00017244110731018335, 'num_epochs': 5}. Best is trial 1 with value: 0.012717771399580705.


Best trial: 1. Best value: 0.0127178:  30%|███       | 3/10 [11:32<27:07, 232.47s/it]

[I 2025-08-18 19:25:35,477] Trial 2 finished with value: 0.012711358726446098 and parameters: {'lr': 0.00017561109664022528, 'num_epochs': 5}. Best is trial 1 with value: 0.012717771399580705.


Best trial: 1. Best value: 0.0127178:  40%|████      | 4/10 [15:39<23:49, 238.26s/it]

[I 2025-08-18 19:29:42,621] Trial 3 finished with value: -0.006517469895647814 and parameters: {'lr': 0.0006988112176710386, 'num_epochs': 9}. Best is trial 1 with value: 0.012717771399580705.


Best trial: 1. Best value: 0.0127178:  50%|█████     | 5/10 [19:34<19:44, 237.00s/it]

[I 2025-08-18 19:33:37,370] Trial 4 finished with value: 0.01255373816326426 and parameters: {'lr': 0.0005037550629611359, 'num_epochs': 3}. Best is trial 1 with value: 0.012717771399580705.


Best trial: 1. Best value: 0.0127178:  60%|██████    | 6/10 [23:28<15:44, 236.02s/it]

[I 2025-08-18 19:37:31,493] Trial 5 finished with value: 0.012488410927048468 and parameters: {'lr': 0.00033549721229487713, 'num_epochs': 6}. Best is trial 1 with value: 0.012717771399580705.


Best trial: 1. Best value: 0.0127178:  70%|███████   | 7/10 [27:31<11:54, 238.11s/it]

[I 2025-08-18 19:41:33,898] Trial 6 finished with value: -0.002719002543735024 and parameters: {'lr': 0.0005537814661552874, 'num_epochs': 10}. Best is trial 1 with value: 0.012717771399580705.


Best trial: 1. Best value: 0.0127178:  80%|████████  | 8/10 [31:31<07:57, 238.69s/it]

[I 2025-08-18 19:45:33,837] Trial 7 finished with value: -0.007728679140203469 and parameters: {'lr': 0.000700137749637953, 'num_epochs': 10}. Best is trial 1 with value: 0.012717771399580705.


Best trial: 1. Best value: 0.0127178:  90%|█████████ | 9/10 [35:24<03:57, 237.07s/it]

[I 2025-08-18 19:49:27,348] Trial 8 finished with value: 0.0032897019831387778 and parameters: {'lr': 0.0005463658265822983, 'num_epochs': 8}. Best is trial 1 with value: 0.012717771399580705.


Best trial: 1. Best value: 0.0127178: 100%|██████████| 10/10 [39:04<00:00, 234.44s/it]


[I 2025-08-18 19:53:07,162] Trial 9 finished with value: 0.012647729727042216 and parameters: {'lr': 0.0006985569566805721, 'num_epochs': 2}. Best is trial 1 with value: 0.012717771399580705.


[I 2025-08-18 20:01:18,810] A new study created in memory with name: no-name-3205bdf1-d8a4-4314-83bd-f33ed97f817e
Best trial: 0. Best value: -0.00323314:  10%|█         | 1/10 [04:32<40:49, 272.15s/it]

[I 2025-08-18 20:05:50,962] Trial 0 finished with value: -0.003233143552429093 and parameters: {'lr': 0.00048656051548946023, 'num_epochs': 10}. Best is trial 0 with value: -0.003233143552429093.


Best trial: 1. Best value: 0.012939:  20%|██        | 2/10 [08:43<34:38, 259.87s/it]   

[I 2025-08-18 20:10:02,237] Trial 1 finished with value: 0.012939041251138334 and parameters: {'lr': 0.0005124730757184277, 'num_epochs': 1}. Best is trial 1 with value: 0.012939041251138334.


Best trial: 1. Best value: 0.012939:  30%|███       | 3/10 [13:05<30:26, 260.91s/it]

[I 2025-08-18 20:14:24,389] Trial 2 finished with value: 0.01283812048818964 and parameters: {'lr': 0.00021852006552288848, 'num_epochs': 4}. Best is trial 1 with value: 0.012939041251138334.


Best trial: 1. Best value: 0.012939:  40%|████      | 4/10 [17:41<26:40, 266.82s/it]

[I 2025-08-18 20:19:00,255] Trial 3 finished with value: 7.179661606620744e-05 and parameters: {'lr': 0.0005368508587377182, 'num_epochs': 8}. Best is trial 1 with value: 0.012939041251138334.


Best trial: 1. Best value: 0.012939:  50%|█████     | 5/10 [22:20<22:35, 271.05s/it]

[I 2025-08-18 20:23:38,812] Trial 4 finished with value: 0.0011262813142672128 and parameters: {'lr': 0.0004115804684839092, 'num_epochs': 10}. Best is trial 1 with value: 0.012939041251138334.


Best trial: 1. Best value: 0.012939:  60%|██████    | 6/10 [26:49<18:02, 270.64s/it]

[I 2025-08-18 20:28:08,641] Trial 5 finished with value: 0.012569974486180448 and parameters: {'lr': 0.0001518513971158212, 'num_epochs': 8}. Best is trial 1 with value: 0.012939041251138334.


Best trial: 6. Best value: 0.0129829:  70%|███████   | 7/10 [31:05<13:17, 265.85s/it]

[I 2025-08-18 20:32:24,644] Trial 6 finished with value: 0.012982906051217446 and parameters: {'lr': 0.000117520734228493, 'num_epochs': 1}. Best is trial 6 with value: 0.012982906051217446.


Best trial: 6. Best value: 0.0129829:  80%|████████  | 8/10 [35:28<08:49, 264.76s/it]

[I 2025-08-18 20:36:47,068] Trial 7 finished with value: 0.01297829090891596 and parameters: {'lr': 0.00018171657505336683, 'num_epochs': 1}. Best is trial 6 with value: 0.012982906051217446.


Best trial: 6. Best value: 0.0129829:  90%|█████████ | 9/10 [39:57<04:26, 266.26s/it]

[I 2025-08-18 20:41:16,631] Trial 8 finished with value: 0.012420259294756687 and parameters: {'lr': 0.0001831293610935004, 'num_epochs': 8}. Best is trial 6 with value: 0.012982906051217446.


Best trial: 6. Best value: 0.0129829: 100%|██████████| 10/10 [44:22<00:00, 266.22s/it]


[I 2025-08-18 20:45:41,026] Trial 9 finished with value: 0.011867784048141252 and parameters: {'lr': 0.00024321546575788123, 'num_epochs': 8}. Best is trial 6 with value: 0.012982906051217446.


In [18]:
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.01978745,0.01935893,0.01978458,0.01945779,0.01965838,0.02030646,0.27840712,0.0,0.23742245,0.0
30000,0.01978824,0.01933884,0.01978477,0.01945753,0.01965032,0.02027674,0.27723678,0.00853207,0.2375129,0.00710262
60000,0.01979021,0.02009584,0.01951256,0.01944882,0.01957111,0.01997048,0.27598104,0.04029597,0.2401879,0.03518278
80000,0.01978966,0.01938873,0.01942306,0.0196211,0.01951601,0.01917503,0.27621442,0.02002988,0.23755858,0.0178121
90000,0.0197877,0.01888049,0.01933169,0.01926089,0.01918405,0.01893465,0.27808682,0.00276144,0.23740673,0.00245908


In [19]:
df4

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,ipw_var,reg_dm_var,conv_dm_var,conv_dr_var,conv_sndr_var,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.01978745,0.01935893,0.01978458,0.01945779,0.01965838,0.02030646,0.0,0.0,0.0,0.0,0.0,0.27840712,0.0,0.23742245,0.0
30000,0.01978824,0.01933884,0.01978477,0.01945753,0.01965032,0.02027674,0.0,0.0,0.0,0.0,0.0,0.27723678,0.00853207,0.2375129,0.00710262
60000,0.01979021,0.02009584,0.01951256,0.01944882,0.01957111,0.01997048,0.0,0.0,0.0,0.0,0.0,0.27598104,0.04029597,0.2401879,0.03518278
80000,0.01978966,0.01938873,0.01942306,0.0196211,0.01951601,0.01917503,0.0,0.0,0.0,0.0,0.0,0.27621442,0.02002988,0.23755858,0.0178121
90000,0.0197877,0.01888049,0.01933169,0.01926089,0.01918405,0.01893465,0.0,0.0,0.0,0.0,0.0,0.27808682,0.00276144,0.23740673,0.00245908


In [12]:
num_rounds_list = [3000, 6000, 8000, 9000]

### 2

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.001$$
$$n_{epochs} = 1$$
$$BatchSize=50$$

In [13]:
df5 = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size, val_size=3500)

[I 2025-08-18 20:50:01,245] A new study created in memory with name: no-name-5bf582bb-8c04-4208-9d37-1fad2006ccda
Best trial: 0. Best value: 0.00553381:  10%|█         | 1/10 [00:07<01:08,  7.63s/it]

[I 2025-08-18 20:50:08,873] Trial 0 finished with value: 0.00553381045182111 and parameters: {'lr': 0.0002062836169406708, 'num_epochs': 9}. Best is trial 0 with value: 0.00553381045182111.


Best trial: 1. Best value: 0.00554369:  20%|██        | 2/10 [00:15<01:01,  7.64s/it]

[I 2025-08-18 20:50:16,517] Trial 1 finished with value: 0.0055436879852006245 and parameters: {'lr': 0.0006463335099062945, 'num_epochs': 7}. Best is trial 1 with value: 0.0055436879852006245.


Best trial: 1. Best value: 0.00554369:  30%|███       | 3/10 [00:23<00:53,  7.71s/it]

[I 2025-08-18 20:50:24,318] Trial 2 finished with value: 0.005528572625061375 and parameters: {'lr': 0.0002490207592402988, 'num_epochs': 10}. Best is trial 1 with value: 0.0055436879852006245.


Best trial: 1. Best value: 0.00554369:  40%|████      | 4/10 [00:30<00:46,  7.75s/it]

[I 2025-08-18 20:50:32,115] Trial 3 finished with value: 0.005506084919011429 and parameters: {'lr': 0.0006385983102098735, 'num_epochs': 8}. Best is trial 1 with value: 0.0055436879852006245.


Best trial: 1. Best value: 0.00554369:  50%|█████     | 5/10 [00:38<00:38,  7.72s/it]

[I 2025-08-18 20:50:39,788] Trial 4 finished with value: 0.005525160054148085 and parameters: {'lr': 0.00040568477182269545, 'num_epochs': 9}. Best is trial 1 with value: 0.0055436879852006245.


Best trial: 5. Best value: 0.00554638:  60%|██████    | 6/10 [00:45<00:30,  7.62s/it]

[I 2025-08-18 20:50:47,200] Trial 5 finished with value: 0.005546379978768019 and parameters: {'lr': 0.000167728941173376, 'num_epochs': 6}. Best is trial 5 with value: 0.005546379978768019.


Best trial: 6. Best value: 0.00554812:  70%|███████   | 7/10 [00:53<00:22,  7.52s/it]

[I 2025-08-18 20:50:54,527] Trial 6 finished with value: 0.0055481183719909045 and parameters: {'lr': 0.00022734051108966975, 'num_epochs': 2}. Best is trial 6 with value: 0.0055481183719909045.


Best trial: 6. Best value: 0.00554812:  80%|████████  | 8/10 [01:01<00:15,  7.60s/it]

[I 2025-08-18 20:51:02,285] Trial 7 finished with value: 0.005527073363670863 and parameters: {'lr': 0.00031066125033661136, 'num_epochs': 9}. Best is trial 6 with value: 0.0055481183719909045.


Best trial: 6. Best value: 0.00554812:  90%|█████████ | 9/10 [01:08<00:07,  7.54s/it]

[I 2025-08-18 20:51:09,707] Trial 8 finished with value: 0.005547055552696068 and parameters: {'lr': 0.0002233698090405717, 'num_epochs': 1}. Best is trial 6 with value: 0.0055481183719909045.


Best trial: 6. Best value: 0.00554812: 100%|██████████| 10/10 [01:16<00:00,  7.62s/it]


[I 2025-08-18 20:51:17,420] Trial 9 finished with value: 0.005542880214597465 and parameters: {'lr': 0.00019495639362649187, 'num_epochs': 7}. Best is trial 6 with value: 0.0055481183719909045.


[I 2025-08-18 20:51:39,948] A new study created in memory with name: no-name-a577011e-76d6-411d-9181-458f00b03c51
Best trial: 0. Best value: 0.0113263:  10%|█         | 1/10 [00:16<02:27, 16.40s/it]

[I 2025-08-18 20:51:56,343] Trial 0 finished with value: 0.01132632426291666 and parameters: {'lr': 0.0006572479877900696, 'num_epochs': 10}. Best is trial 0 with value: 0.01132632426291666.


Best trial: 1. Best value: 0.0113631:  20%|██        | 2/10 [00:31<02:06, 15.78s/it]

[I 2025-08-18 20:52:11,694] Trial 1 finished with value: 0.011363136649272295 and parameters: {'lr': 0.0008390680445791088, 'num_epochs': 2}. Best is trial 1 with value: 0.011363136649272295.


Best trial: 1. Best value: 0.0113631:  30%|███       | 3/10 [00:48<01:52, 16.09s/it]

[I 2025-08-18 20:52:28,158] Trial 2 finished with value: 0.0113592642549369 and parameters: {'lr': 0.00011199149976968738, 'num_epochs': 10}. Best is trial 1 with value: 0.011363136649272295.


Best trial: 1. Best value: 0.0113631:  40%|████      | 4/10 [01:04<01:36, 16.05s/it]

[I 2025-08-18 20:52:44,149] Trial 3 finished with value: 0.011354149374852586 and parameters: {'lr': 0.0003012144086048595, 'num_epochs': 9}. Best is trial 1 with value: 0.011363136649272295.


Best trial: 1. Best value: 0.0113631:  50%|█████     | 5/10 [01:19<01:19, 15.84s/it]

[I 2025-08-18 20:52:59,619] Trial 4 finished with value: 0.011357472889256552 and parameters: {'lr': 0.000353603549313326, 'num_epochs': 3}. Best is trial 1 with value: 0.011363136649272295.


Best trial: 1. Best value: 0.0113631:  60%|██████    | 6/10 [01:35<01:03, 15.96s/it]

[I 2025-08-18 20:53:15,798] Trial 5 finished with value: 0.011356215782785574 and parameters: {'lr': 0.0001304269258138277, 'num_epochs': 8}. Best is trial 1 with value: 0.011363136649272295.


Best trial: 1. Best value: 0.0113631:  70%|███████   | 7/10 [01:50<00:47, 15.69s/it]

[I 2025-08-18 20:53:30,943] Trial 6 finished with value: 0.011360881802087877 and parameters: {'lr': 0.00030281195941906297, 'num_epochs': 2}. Best is trial 1 with value: 0.011363136649272295.


Best trial: 1. Best value: 0.0113631:  80%|████████  | 8/10 [02:07<00:31, 15.81s/it]

[I 2025-08-18 20:53:47,008] Trial 7 finished with value: 0.011352529697422712 and parameters: {'lr': 0.00021877461172961605, 'num_epochs': 9}. Best is trial 1 with value: 0.011363136649272295.


Best trial: 1. Best value: 0.0113631:  90%|█████████ | 9/10 [02:23<00:15, 15.90s/it]

[I 2025-08-18 20:54:03,120] Trial 8 finished with value: 0.01135861146579972 and parameters: {'lr': 0.0001267846075203842, 'num_epochs': 9}. Best is trial 1 with value: 0.011363136649272295.


Best trial: 1. Best value: 0.0113631: 100%|██████████| 10/10 [02:39<00:00, 15.91s/it]


[I 2025-08-18 20:54:19,017] Trial 9 finished with value: 0.011352289526021875 and parameters: {'lr': 0.0002359924198937373, 'num_epochs': 7}. Best is trial 1 with value: 0.011363136649272295.


[I 2025-08-18 20:54:56,008] A new study created in memory with name: no-name-b74bd860-13f4-458a-a4ff-9777a1fb7aa2
Best trial: 0. Best value: 0.00875649:  10%|█         | 1/10 [00:22<03:21, 22.35s/it]

[I 2025-08-18 20:55:18,357] Trial 0 finished with value: 0.008756485082865587 and parameters: {'lr': 0.0005078098051750882, 'num_epochs': 4}. Best is trial 0 with value: 0.008756485082865587.


Best trial: 1. Best value: 0.00875765:  20%|██        | 2/10 [00:44<02:58, 22.26s/it]

[I 2025-08-18 20:55:40,557] Trial 1 finished with value: 0.008757651597321052 and parameters: {'lr': 0.00020109743750261835, 'num_epochs': 8}. Best is trial 1 with value: 0.008757651597321052.


Best trial: 1. Best value: 0.00875765:  30%|███       | 3/10 [01:06<02:35, 22.20s/it]

[I 2025-08-18 20:56:02,688] Trial 2 finished with value: 0.008748876568047275 and parameters: {'lr': 0.0002276958487096682, 'num_epochs': 8}. Best is trial 1 with value: 0.008757651597321052.


Best trial: 1. Best value: 0.00875765:  40%|████      | 4/10 [01:29<02:15, 22.59s/it]

[I 2025-08-18 20:56:25,877] Trial 3 finished with value: 0.00874055866780808 and parameters: {'lr': 0.00011349493703749322, 'num_epochs': 10}. Best is trial 1 with value: 0.008757651597321052.


Best trial: 1. Best value: 0.00875765:  50%|█████     | 5/10 [01:51<01:51, 22.39s/it]

[I 2025-08-18 20:56:47,915] Trial 4 finished with value: 0.008746627364115092 and parameters: {'lr': 0.00019551409002211445, 'num_epochs': 8}. Best is trial 1 with value: 0.008757651597321052.


Best trial: 1. Best value: 0.00875765:  60%|██████    | 6/10 [02:12<01:27, 21.89s/it]

[I 2025-08-18 20:57:08,829] Trial 5 finished with value: 0.00873897160139768 and parameters: {'lr': 0.0003623258090935903, 'num_epochs': 1}. Best is trial 1 with value: 0.008757651597321052.


Best trial: 1. Best value: 0.00875765:  70%|███████   | 7/10 [02:34<01:05, 21.81s/it]

[I 2025-08-18 20:57:30,463] Trial 6 finished with value: 0.008744785804000088 and parameters: {'lr': 0.0006450379153322616, 'num_epochs': 5}. Best is trial 1 with value: 0.008757651597321052.


Best trial: 7. Best value: 0.00875948:  80%|████████  | 8/10 [02:56<00:43, 21.97s/it]

[I 2025-08-18 20:57:52,795] Trial 7 finished with value: 0.008759475089715188 and parameters: {'lr': 0.000312351861491277, 'num_epochs': 9}. Best is trial 7 with value: 0.008759475089715188.


Best trial: 8. Best value: 0.00876758:  90%|█████████ | 9/10 [03:19<00:22, 22.13s/it]

[I 2025-08-18 20:58:15,257] Trial 8 finished with value: 0.008767584653394398 and parameters: {'lr': 0.0003363799993788356, 'num_epochs': 9}. Best is trial 8 with value: 0.008767584653394398.


Best trial: 8. Best value: 0.00876758: 100%|██████████| 10/10 [03:41<00:00, 22.14s/it]


[I 2025-08-18 20:58:37,384] Trial 9 finished with value: 0.008754409618250776 and parameters: {'lr': 0.00036854602511769116, 'num_epochs': 7}. Best is trial 8 with value: 0.008767584653394398.


[I 2025-08-18 20:59:24,321] A new study created in memory with name: no-name-65cf1841-b9ca-4f1d-917a-5fb66c1b6277
Best trial: 0. Best value: 0.0093453:  10%|█         | 1/10 [00:23<03:33, 23.76s/it]

[I 2025-08-18 20:59:48,085] Trial 0 finished with value: 0.009345297501601743 and parameters: {'lr': 0.00014999221191935785, 'num_epochs': 1}. Best is trial 0 with value: 0.009345297501601743.


Best trial: 1. Best value: 0.00936498:  20%|██        | 2/10 [00:48<03:14, 24.36s/it]

[I 2025-08-18 21:00:12,869] Trial 1 finished with value: 0.00936498114693873 and parameters: {'lr': 0.0001598654511815398, 'num_epochs': 6}. Best is trial 1 with value: 0.00936498114693873.


Best trial: 2. Best value: 0.00937063:  30%|███       | 3/10 [01:13<02:52, 24.63s/it]

[I 2025-08-18 21:00:37,816] Trial 2 finished with value: 0.00937063337656717 and parameters: {'lr': 0.00016074835424644213, 'num_epochs': 7}. Best is trial 2 with value: 0.00937063337656717.


Best trial: 2. Best value: 0.00937063:  40%|████      | 4/10 [01:37<02:25, 24.28s/it]

[I 2025-08-18 21:01:01,555] Trial 3 finished with value: 0.00935276891604691 and parameters: {'lr': 0.0007281874297108259, 'num_epochs': 1}. Best is trial 2 with value: 0.00937063337656717.


Best trial: 2. Best value: 0.00937063:  50%|█████     | 5/10 [02:01<02:01, 24.30s/it]

[I 2025-08-18 21:01:25,899] Trial 4 finished with value: 0.00935765399353709 and parameters: {'lr': 0.00013127960626581095, 'num_epochs': 4}. Best is trial 2 with value: 0.00937063337656717.


Best trial: 2. Best value: 0.00937063:  60%|██████    | 6/10 [02:25<01:36, 24.14s/it]

[I 2025-08-18 21:01:49,725] Trial 5 finished with value: 0.009352653037241886 and parameters: {'lr': 0.0001929606590974913, 'num_epochs': 3}. Best is trial 2 with value: 0.00937063337656717.


Best trial: 2. Best value: 0.00937063:  70%|███████   | 7/10 [02:48<01:11, 23.90s/it]

[I 2025-08-18 21:02:13,118] Trial 6 finished with value: 0.009361040054947723 and parameters: {'lr': 0.0004773569856727589, 'num_epochs': 2}. Best is trial 2 with value: 0.00937063337656717.


Best trial: 2. Best value: 0.00937063:  80%|████████  | 8/10 [03:13<00:48, 24.10s/it]

[I 2025-08-18 21:02:37,657] Trial 7 finished with value: 0.009358228524662995 and parameters: {'lr': 0.00011333756556964095, 'num_epochs': 6}. Best is trial 2 with value: 0.00937063337656717.


Best trial: 8. Best value: 0.00939558:  90%|█████████ | 9/10 [03:37<00:24, 24.18s/it]

[I 2025-08-18 21:03:02,019] Trial 8 finished with value: 0.009395580579119099 and parameters: {'lr': 0.000402505762171222, 'num_epochs': 5}. Best is trial 8 with value: 0.009395580579119099.


Best trial: 8. Best value: 0.00939558: 100%|██████████| 10/10 [04:01<00:00, 24.16s/it]


[I 2025-08-18 21:03:25,939] Trial 9 finished with value: 0.009373266705966847 and parameters: {'lr': 0.0006123613550065155, 'num_epochs': 1}. Best is trial 8 with value: 0.009395580579119099.


In [20]:
df5

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,ipw_var,reg_dm_var,conv_dm_var,conv_dr_var,conv_sndr_var,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.01978745,0.01964856,0.01969683,0.01992051,0.01994549,0.02002184,0.0,0.0,0.0,0.0,0.0,0.27840712,0.0,0.23742245,0.0
3000,0.01978779,0.01965154,0.01969771,0.01992277,0.01994686,0.02002081,0.0,0.0,0.0,0.0,0.0,0.27785089,0.00248316,0.2373999,0.00153581
6000,0.01978885,0.02199032,0.02274482,0.02273408,0.02284151,0.0231775,0.0,0.0,0.0,0.0,0.0,0.27619045,0.01298967,0.23773052,0.0088275
8000,0.01979069,0.02177713,0.02005721,0.02016991,0.02057855,0.02193945,0.0,0.0,0.0,0.0,0.0,0.27391127,0.02429254,0.23816576,0.01751989
9000,0.01978977,0.02362445,0.02127779,0.02114514,0.02142483,0.02234303,0.0,0.0,0.0,0.0,0.0,0.27558966,0.0166072,0.23741756,0.01266007


### 3

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.003$$
$$n_{epochs} = 10$$
$$BatchSize=50$$

In [15]:
df6 = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size)

[I 2025-08-18 21:03:57,328] A new study created in memory with name: no-name-bfa9bf7d-151c-4705-87f9-e0dc42e00dbf
Best trial: 0. Best value: 0.0125066:  10%|█         | 1/10 [00:07<01:05,  7.24s/it]

[I 2025-08-18 21:04:04,570] Trial 0 finished with value: 0.012506563033600631 and parameters: {'lr': 0.0007602033817158572, 'num_epochs': 6}. Best is trial 0 with value: 0.012506563033600631.


Best trial: 1. Best value: 0.0125324:  20%|██        | 2/10 [00:14<00:58,  7.25s/it]

[I 2025-08-18 21:04:11,832] Trial 1 finished with value: 0.01253241233339062 and parameters: {'lr': 0.00038428445979786947, 'num_epochs': 4}. Best is trial 1 with value: 0.01253241233339062.


Best trial: 1. Best value: 0.0125324:  30%|███       | 3/10 [00:21<00:51,  7.36s/it]

[I 2025-08-18 21:04:19,322] Trial 2 finished with value: 0.012521948863225909 and parameters: {'lr': 0.00012365881440515677, 'num_epochs': 8}. Best is trial 1 with value: 0.01253241233339062.


Best trial: 1. Best value: 0.0125324:  40%|████      | 4/10 [00:29<00:44,  7.39s/it]

[I 2025-08-18 21:04:26,754] Trial 3 finished with value: 0.012517779565449427 and parameters: {'lr': 0.00019563773945936193, 'num_epochs': 7}. Best is trial 1 with value: 0.01253241233339062.


Best trial: 1. Best value: 0.0125324:  50%|█████     | 5/10 [00:36<00:36,  7.39s/it]

[I 2025-08-18 21:04:34,138] Trial 4 finished with value: 0.012518437416244638 and parameters: {'lr': 0.0008124163150796744, 'num_epochs': 4}. Best is trial 1 with value: 0.01253241233339062.


Best trial: 1. Best value: 0.0125324:  60%|██████    | 6/10 [00:44<00:29,  7.46s/it]

[I 2025-08-18 21:04:41,728] Trial 5 finished with value: 0.012518995482609313 and parameters: {'lr': 0.0004190813923448069, 'num_epochs': 10}. Best is trial 1 with value: 0.01253241233339062.


Best trial: 1. Best value: 0.0125324:  70%|███████   | 7/10 [00:52<00:22,  7.50s/it]

[I 2025-08-18 21:04:49,331] Trial 6 finished with value: 0.012513246018250533 and parameters: {'lr': 0.0008583883824643166, 'num_epochs': 6}. Best is trial 1 with value: 0.01253241233339062.


Best trial: 1. Best value: 0.0125324:  80%|████████  | 8/10 [00:59<00:15,  7.58s/it]

[I 2025-08-18 21:04:57,076] Trial 7 finished with value: 0.012522668534016453 and parameters: {'lr': 0.00020459336887364876, 'num_epochs': 7}. Best is trial 1 with value: 0.01253241233339062.


Best trial: 1. Best value: 0.0125324:  90%|█████████ | 9/10 [01:07<00:07,  7.63s/it]

[I 2025-08-18 21:05:04,824] Trial 8 finished with value: 0.012510626236906006 and parameters: {'lr': 0.00030143459928533, 'num_epochs': 9}. Best is trial 1 with value: 0.01253241233339062.


Best trial: 1. Best value: 0.0125324: 100%|██████████| 10/10 [01:15<00:00,  7.51s/it]


[I 2025-08-18 21:05:12,379] Trial 9 finished with value: 0.012522053666054857 and parameters: {'lr': 0.00017175741878183462, 'num_epochs': 6}. Best is trial 1 with value: 0.01253241233339062.


[I 2025-08-18 21:05:34,348] A new study created in memory with name: no-name-ff7f1776-ab1e-474a-81a5-cd81d6981c85
Best trial: 0. Best value: 0.0107771:  10%|█         | 1/10 [00:14<02:13, 14.82s/it]

[I 2025-08-18 21:05:49,171] Trial 0 finished with value: 0.0107770962729577 and parameters: {'lr': 0.0001845277299390864, 'num_epochs': 4}. Best is trial 0 with value: 0.0107770962729577.


Best trial: 0. Best value: 0.0107771:  20%|██        | 2/10 [00:29<01:58, 14.78s/it]

[I 2025-08-18 21:06:03,914] Trial 1 finished with value: 0.010749230519040236 and parameters: {'lr': 0.00073852378847419, 'num_epochs': 3}. Best is trial 0 with value: 0.0107770962729577.


Best trial: 0. Best value: 0.0107771:  30%|███       | 3/10 [00:45<01:45, 15.08s/it]

[I 2025-08-18 21:06:19,365] Trial 2 finished with value: 0.010776018702850559 and parameters: {'lr': 0.0001926688913125828, 'num_epochs': 8}. Best is trial 0 with value: 0.0107770962729577.


Best trial: 0. Best value: 0.0107771:  40%|████      | 4/10 [01:00<01:31, 15.32s/it]

[I 2025-08-18 21:06:35,049] Trial 3 finished with value: 0.010729713420659034 and parameters: {'lr': 0.00037020190252131985, 'num_epochs': 9}. Best is trial 0 with value: 0.0107770962729577.


Best trial: 0. Best value: 0.0107771:  50%|█████     | 5/10 [01:16<01:16, 15.34s/it]

[I 2025-08-18 21:06:50,425] Trial 4 finished with value: 0.01074921129519001 and parameters: {'lr': 0.00021688901265387012, 'num_epochs': 8}. Best is trial 0 with value: 0.0107770962729577.


Best trial: 0. Best value: 0.0107771:  60%|██████    | 6/10 [01:30<01:00, 15.19s/it]

[I 2025-08-18 21:07:05,310] Trial 5 finished with value: 0.010767799228280401 and parameters: {'lr': 0.00031607768635259537, 'num_epochs': 3}. Best is trial 0 with value: 0.0107770962729577.


Best trial: 6. Best value: 0.0107853:  70%|███████   | 7/10 [01:46<00:45, 15.19s/it]

[I 2025-08-18 21:07:20,495] Trial 6 finished with value: 0.010785301780983922 and parameters: {'lr': 0.0001348398488367947, 'num_epochs': 2}. Best is trial 6 with value: 0.010785301780983922.


Best trial: 6. Best value: 0.0107853:  80%|████████  | 8/10 [02:01<00:30, 15.37s/it]

[I 2025-08-18 21:07:36,255] Trial 7 finished with value: 0.010730674369667985 and parameters: {'lr': 0.0002939472558506658, 'num_epochs': 10}. Best is trial 6 with value: 0.010785301780983922.


Best trial: 6. Best value: 0.0107853:  90%|█████████ | 9/10 [02:16<00:15, 15.04s/it]

[I 2025-08-18 21:07:50,589] Trial 8 finished with value: 0.0107781880715802 and parameters: {'lr': 0.0004169089721665731, 'num_epochs': 2}. Best is trial 6 with value: 0.010785301780983922.


Best trial: 6. Best value: 0.0107853: 100%|██████████| 10/10 [02:30<00:00, 15.08s/it]


[I 2025-08-18 21:08:05,170] Trial 9 finished with value: 0.010763588868587375 and parameters: {'lr': 0.0004715341215474761, 'num_epochs': 3}. Best is trial 6 with value: 0.010785301780983922.


[I 2025-08-18 21:08:40,054] A new study created in memory with name: no-name-0152e0f2-0401-4822-a965-069d8eaf58fc
Best trial: 0. Best value: 0.00804945:  10%|█         | 1/10 [00:21<03:15, 21.69s/it]

[I 2025-08-18 21:09:01,747] Trial 0 finished with value: 0.0080494463820595 and parameters: {'lr': 0.0002759588361300536, 'num_epochs': 8}. Best is trial 0 with value: 0.0080494463820595.


Best trial: 0. Best value: 0.00804945:  20%|██        | 2/10 [00:43<02:55, 21.96s/it]

[I 2025-08-18 21:09:23,897] Trial 1 finished with value: 0.008043567428718762 and parameters: {'lr': 0.00014924591813531308, 'num_epochs': 9}. Best is trial 0 with value: 0.0080494463820595.


Best trial: 0. Best value: 0.00804945:  30%|███       | 3/10 [01:04<02:30, 21.47s/it]

[I 2025-08-18 21:09:44,781] Trial 2 finished with value: 0.008041643814109208 and parameters: {'lr': 0.000415382012777479, 'num_epochs': 2}. Best is trial 0 with value: 0.0080494463820595.


Best trial: 0. Best value: 0.00804945:  40%|████      | 4/10 [01:27<02:11, 21.93s/it]

[I 2025-08-18 21:10:07,428] Trial 3 finished with value: 0.00804308485214614 and parameters: {'lr': 0.0001875435670045635, 'num_epochs': 8}. Best is trial 0 with value: 0.0080494463820595.


Best trial: 4. Best value: 0.00805489:  50%|█████     | 5/10 [01:50<01:51, 22.36s/it]

[I 2025-08-18 21:10:30,534] Trial 4 finished with value: 0.008054894676613513 and parameters: {'lr': 0.00026069367267443026, 'num_epochs': 9}. Best is trial 4 with value: 0.008054894676613513.


Best trial: 5. Best value: 0.00806375:  60%|██████    | 6/10 [02:12<01:29, 22.26s/it]

[I 2025-08-18 21:10:52,599] Trial 5 finished with value: 0.0080637467084747 and parameters: {'lr': 0.0009725572622256445, 'num_epochs': 3}. Best is trial 5 with value: 0.0080637467084747.


Best trial: 5. Best value: 0.00806375:  70%|███████   | 7/10 [02:33<01:05, 21.95s/it]

[I 2025-08-18 21:11:13,916] Trial 6 finished with value: 0.008037817721030991 and parameters: {'lr': 0.00013392051455170056, 'num_epochs': 1}. Best is trial 5 with value: 0.0080637467084747.


Best trial: 7. Best value: 0.00807582:  80%|████████  | 8/10 [02:56<00:44, 22.08s/it]

[I 2025-08-18 21:11:36,268] Trial 7 finished with value: 0.008075821067988915 and parameters: {'lr': 0.000872934586982575, 'num_epochs': 7}. Best is trial 7 with value: 0.008075821067988915.


Best trial: 7. Best value: 0.00807582:  90%|█████████ | 9/10 [03:18<00:22, 22.00s/it]

[I 2025-08-18 21:11:58,101] Trial 8 finished with value: 0.008043362855271916 and parameters: {'lr': 0.00020738113687138434, 'num_epochs': 4}. Best is trial 7 with value: 0.008075821067988915.


Best trial: 7. Best value: 0.00807582: 100%|██████████| 10/10 [03:39<00:00, 22.00s/it]


[I 2025-08-18 21:12:20,018] Trial 9 finished with value: 0.00804421696013413 and parameters: {'lr': 0.00026498841632132037, 'num_epochs': 4}. Best is trial 7 with value: 0.008075821067988915.


[I 2025-08-18 21:13:06,867] A new study created in memory with name: no-name-9beff353-f8c0-41c5-87e6-4da9e706f347
Best trial: 0. Best value: 0.0100237:  10%|█         | 1/10 [00:25<03:52, 25.80s/it]

[I 2025-08-18 21:13:32,666] Trial 0 finished with value: 0.010023744991566887 and parameters: {'lr': 0.0002335611182825513, 'num_epochs': 10}. Best is trial 0 with value: 0.010023744991566887.


Best trial: 0. Best value: 0.0100237:  20%|██        | 2/10 [00:50<03:20, 25.00s/it]

[I 2025-08-18 21:13:57,110] Trial 1 finished with value: 0.010022104725823783 and parameters: {'lr': 0.00017969886537341702, 'num_epochs': 6}. Best is trial 0 with value: 0.010023744991566887.


Best trial: 0. Best value: 0.0100237:  30%|███       | 3/10 [01:13<02:50, 24.31s/it]

[I 2025-08-18 21:14:20,590] Trial 2 finished with value: 0.010019843384593075 and parameters: {'lr': 0.000438308081380463, 'num_epochs': 1}. Best is trial 0 with value: 0.010023744991566887.


Best trial: 3. Best value: 0.0100327:  40%|████      | 4/10 [01:40<02:30, 25.12s/it]

[I 2025-08-18 21:14:46,957] Trial 3 finished with value: 0.010032749896563666 and parameters: {'lr': 0.0002791402318171105, 'num_epochs': 10}. Best is trial 3 with value: 0.010032749896563666.


Best trial: 3. Best value: 0.0100327:  50%|█████     | 5/10 [02:06<02:07, 25.49s/it]

[I 2025-08-18 21:15:13,100] Trial 4 finished with value: 0.010018433625815414 and parameters: {'lr': 0.00017768238470758355, 'num_epochs': 3}. Best is trial 3 with value: 0.010032749896563666.


Best trial: 3. Best value: 0.0100327:  50%|█████     | 5/10 [02:19<02:19, 27.90s/it]


[W 2025-08-18 21:15:26,335] Trial 5 failed with parameters: {'lr': 0.0006929989248602841, 'num_epochs': 2} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/roee/Documents/code/OPC/.venv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_2093602/3301235813.py", line 18, in objective
    trial_neigh_model = NeighborhoodModel(
                        ^^^^^^^^^^^^^^^^^^
  File "/home/roee/Documents/code/OPC/models.py", line 37, in __init__
    self.fit(action_emb, context_emb, actions, context, rewards)
  File "/home/roee/Documents/code/OPC/models.py", line 44, in fit
    self.calculate_scores()
  File "/home/roee/Documents/code/OPC/models.py", line 63, in calculate_scores
    self.scores = self.context_convolve(context)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/roee/Documents/code/OPC/models.py", 

KeyboardInterrupt: 

In [None]:
df6

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,ipw_var,reg_dm_var,conv_dm_var,conv_dr_var,conv_sndr_var,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.014,0.0206,0.0158,0.0163,0.017,0.019,0.0,0.0,0.0,0.0,0.0,0.2189,0.0,0.3154,0.0
3000,0.0139,0.0206,0.0158,0.0163,0.017,0.019,0.0,0.0,0.0,0.0,0.0,0.2189,0.0,0.3154,0.0
6000,0.014,0.0156,0.0129,0.0135,0.0135,0.0136,0.0,0.0,0.0,0.0,0.0,0.2079,0.06,0.3195,0.0415
8000,0.0138,0.0194,0.0155,0.0168,0.0167,0.0163,0.0,0.0,0.0,0.0,0.0,0.2189,0.0001,0.3154,0.0001
9000,0.0143,0.0164,0.015,0.0165,0.016,0.0146,0.0,0.0,0.0,0.0,0.0,0.2155,0.0121,0.3156,0.0089


### 4

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.05$$
$$n_{epochs} = 10$$
$$BatchSize=150$$

In [None]:
df7 = trainer_trial(num_runs, num_neighbors, num_rounds_list[:-3], train_dataset, batch_size+100, num_epochs=10, lr=0.05)

TypeError: trainer_trial() got an unexpected keyword argument 'num_epochs'

In [None]:
df7