In [1]:
import warnings
warnings.filterwarnings("ignore")
from copy import deepcopy
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import sys

sys.path.append("/code")

from tqdm import tqdm
import torch
# device = torch.device('cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# import gym
# import recogym

import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

from sklearn.utils import check_random_state

# implementing OPE of the IPWLearner using synthetic bandit data
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt

from scipy.special import softmax
import optuna
# from memory_profiler import profile


from estimators import (
    DirectMethod as DM
)

from simulation_utils import (
    eval_policy,
    generate_dataset,
    create_simulation_data_from_pi,
    get_train_data,
    get_opl_results_dict,
    CustomCFDataset,
    calc_reward
)

from models import (    
    CFModel,
    NeighborhoodModel,
    BPRModel, 
    RegressionModel
)

from training_utils import (
    fit_bpr,
    train,
    validation_loop
 )

from custom_losses import (
    SNDRPolicyLoss,
    BPRLoss
    )

random_state=12345
random_ = check_random_state(random_state)

Using device: cpu
Using device: cpu
Using device: cpu


In [2]:
pd.options.display.float_format = '{:,.4f}'.format

## `trainer_trial` Function

This function runs policy learning experiments using offline bandit data and evaluates various estimators.

### Parameters
- **num_runs** (int): Number of experimental runs per training size
- **num_neighbors** (int): Number of neighbors to consider in the neighborhood model
- **num_rounds_list** (list): List of training set sizes to evaluate
- **dataset** (dict): Contains dataset information including embeddings, action probabilities, and reward probabilities
- **batch_size** (int): Batch size for training the policy model
- **num_epochs** (int): Number of training epochs for each experiment
- **lr** (float, default=0.001): Learning rate for the optimizer

### Process Flow
1. Initializes result structures and retrieval models
2. For each training size in `num_rounds_list`:
   - Creates a uniform logging policy and simulates data
   - Generates training data for offline learning
   - Fits regression and neighborhood models for reward estimation
   - Initializes and trains a counterfactual policy model
   - Evaluates policy performance using various estimators
   - Collects metrics on policy reward and embedding quality

### Returns
- **DataFrame**: Results table with rows indexed by training size and columns for various metrics:
  - `policy_rewards`: True expected reward of the learned policy
  - Various estimator errors (`ipw`, `reg_dm`, `conv_dm`, `conv_dr`, `conv_sndr`)
  - Variance metrics for each estimator
  - Embedding quality metrics comparing learned representations to ground truth

### Implementation Notes
- Uses uniform random logging policy for collecting offline data
- Employs Self-Normalized Doubly Robust (SNDR) policy learning
- Measures embedding quality via RMSE to original/ground truth embeddings

In [None]:
def trainer_trial(
                  num_runs,
                  num_neighbors,
                  num_rounds_list,
                  dataset,
                  batch_size,
                  val_size=2000
                  ):
    
    # Define device at the beginning
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    def objective(trial):
    
        # Optuna objective function
        lr = trial.suggest_loguniform("lr", 1e-4, 1e-3)
        epochs = trial.suggest_int("num_epochs", 1, 10)

        trial_neigh_model = NeighborhoodModel(
                                                train_data['x_idx'],
                                                train_data['a'], 
                                                our_a,
                                                our_x, 
                                                train_data['r'], 
                                                num_neighbors=num_neighbors
                                            )
        

        trial_model = CFModel(
                        n_users, 
                        n_actions, 
                        emb_dim, 
                        initial_user_embeddings=torch.tensor(our_x, device=device), 
                        initial_actions_embeddings=torch.tensor(our_a, device=device)
                        )
        
        # Training
        train(trial_model, train_loader, trial_neigh_model, criterion=SNDRPolicyLoss(), num_epochs=epochs, lr=lr, device=device)
        return validation_loop(trial_model, val_loader, trial_neigh_model)
    
    device = torch.device('cpu')

    dm = DM()
    results = {}

    our_x, our_a = dataset["our_x"], dataset["our_a"]
    emb_x, emb_a = dataset["emb_x"], dataset["emb_a"]
    original_x, original_a = dataset["original_x"], dataset["original_a"]
    n_users, n_actions, emb_dim = dataset["n_users"], dataset["n_actions"], dataset["emb_dim"]
    
    first = True

    for train_size in num_rounds_list:
        reg_results, conv_results = [], []
        
        for run in range(num_runs):

            pi_0 = softmax(our_x @ our_a.T, axis=1)
            original_policy_prob = np.expand_dims(pi_0, -1)

            simulation_data = create_simulation_data_from_pi(
                                                            dataset,
                                                            pi_0,
                                                            train_size + val_size,
                                                            random_state=(run + 1) * train_size
                                                            )
            # test_data = get_test_data(dataset, simulation_data, n_test_data)

            # idx = np.arange(train_size) + n_test_data
            idx = np.arange(train_size)

            train_data = get_train_data(n_actions, train_size, simulation_data, idx, our_x)
            val_data = get_train_data(n_actions, val_size, simulation_data, np.arange(val_size) + train_size, our_x)

            regression_model = RegressionModel(
                                    n_actions=n_actions,
                                    action_context=our_x,
                                    base_model=LogisticRegression(random_state=12345)
                                    )
            
            regression_model.fit(
                                train_data['x'], 
                                train_data['a'],
                                train_data['r'],
                                original_policy_prob[train_data['x_idx'],
                                train_data['a']].squeeze()
                                )
            
            neighberhoodmodel = NeighborhoodModel(
                                                    train_data['x_idx'],
                                                    train_data['a'], 
                                                    our_a,
                                                    our_x, 
                                                    train_data['r'], 
                                                    num_neighbors=num_neighbors
                                                )
            

            model = CFModel(
                            n_users, 
                            n_actions, 
                            emb_dim, 
                            initial_user_embeddings=torch.tensor(our_x, device=device), 
                            initial_actions_embeddings=torch.tensor(our_a, device=device)
                            )
            
            cf_dataset =  CustomCFDataset(
                                       train_data['x_idx'], 
                                       train_data['a'], 
                                       train_data['r'], 
                                       original_policy_prob
                                       )
            
            train_loader = DataLoader(cf_dataset, batch_size=batch_size, shuffle=True)

            val_dataset =  CustomCFDataset(
                            val_data['x_idx'], 
                            val_data['a'], 
                            val_data['r'], 
                            original_policy_prob
                            )
            
            val_loader = DataLoader(val_dataset, batch_size=val_size, shuffle=True)

            if first:
                policy = np.expand_dims(softmax(our_x @ our_a.T, axis=1), -1)
                conv_results.append(eval_policy(neighberhoodmodel, train_data, original_policy_prob, policy))
                conv_results[-1] = np.append(calc_reward(dataset, policy), conv_results[-1])
                conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_a-our_a)**2)), np.sqrt(np.mean((original_a-our_a)**2))])
                conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_x-our_x)**2)), np.sqrt(np.mean((original_x-our_x)**2))])
                # bpr_scores = bpr_model.calc_scores(torch.tensor(train_data['x_idx'], device=device, dtype=torch.long)).detach().cpu().numpy()
                reg_dm = dm.estimate_policy_value(policy[train_data['x_idx']], regression_model.predict(train_data['x']))
                reg_results.append(reg_dm)
                first = False
                reg_results = np.array(reg_results)
                conv_results = np.array(conv_results)
                results[0] = get_opl_results_dict(reg_results, conv_results)
                reg_results, conv_results = [], []
                
            # Bloss = BPRLoss()
            # bpr_model = BPRModel(
            #                     n_users,
            #                     n_actions,
            #                     emb_x.shape[1], 
            #                     initial_user_embeddings=torch.tensor(our_x, device=device), 
            #                     initial_actions_embeddings=torch.tensor(our_a, device=device)
            #                     )

            study = optuna.create_study(direction="maximize")
            study.optimize(objective, n_trials=10, show_progress_bar=True)
            best_params = study.best_params

            neighberhoodmodel = NeighborhoodModel(
                                                    train_data['x_idx'],
                                                    train_data['a'], 
                                                    our_a,
                                                    our_x, 
                                                    train_data['r'], 
                                                    num_neighbors=num_neighbors
                                                )
            

            model = CFModel(
                            n_users, 
                            n_actions, 
                            emb_dim, 
                            initial_user_embeddings=torch.tensor(our_x, device=device), 
                            initial_actions_embeddings=torch.tensor(our_a, device=device)
                            )

            train(model, train_loader, neighberhoodmodel, criterion=SNDRPolicyLoss(), num_epochs=best_params['num_epochs'], lr=best_params['lr'], device=device)
            # fit_bpr(bpr_model, Bloss, train_loader, num_epochs=3, lr=0.001, device=device)
            # neighborhood_model.update(model.get_params()[0].detach().numpy(), model.get_params()[1].detach().numpy())'

            our_x, our_a = model.get_params()
            our_a, our_x = our_a.detach().cpu().numpy(), our_x.detach().cpu().numpy()

            policy = np.expand_dims(softmax(our_x @ our_a.T, axis=1), -1)

            # bpr_scores = bpr_model.calc_scores(torch.tensor(train_data['x_idx'], device=device, dtype=torch.long)).detach().cpu().numpy()
            # reg_dm = dm.estimate_policy_value(policy[train_data['x_idx']], bpr_scores)
            reg_dm = dm.estimate_policy_value(policy[train_data['x_idx']], regression_model.predict(train_data['x']))

            reg_results.append(reg_dm)

            # conv_results.append(eval_policy(neighberhoodmodel, test_data, original_policy_prob[test_data['x_idx']], policy))
            conv_results.append(eval_policy(neighberhoodmodel, train_data, original_policy_prob, policy))

            conv_results[-1] = np.append(calc_reward(dataset, policy), conv_results[-1])
            conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_a-our_a)**2)), np.sqrt(np.mean((original_a-our_a)**2))])

            # temp.append(np.mean((emb_a-our_a)**2, axis=0))

            conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_x-our_x)**2)), np.sqrt(np.mean((original_x-our_x)**2))])
            
            our_a, our_x = original_a.copy(), original_x.copy()

        reg_results = np.array(reg_results)
        conv_results = np.array(conv_results)

        results[train_size] = get_opl_results_dict(reg_results, conv_results)
    
    return pd.DataFrame.from_dict(results, orient='index')

## Learning

We will run several simulations on a generated dataset, the dataset is generated like this:
$$ \text{We have users U and actions A } u_i \sim N(0, I_{emb_dim}) \ a_i \sim N(0, I_{emb_dim})$$
$$ p_{ij} = 1 / (5 + e^{-(u_i.T a_j)}) $$
$$r_{ij} \sim Bin(p_{ij})$$

We have a policy $\pi$
and it's ground truth reward is calculated by
$$R_{gt} = \sum_{i}{\sum_{j}{\pi_{ij} * p_{ij}}} $$

Our parameters for the dataset will be
$$EmbDim = 5$$
$$NumActions= 150$$
$$NumUsers = 150$$
$$NeighborhoodSize = 6$$

to learn a new policy from $\pi$ we will sample from:
$$\pi_{start} = (1-\epsilon)*\pi + \epsilon * \pi_{random}$$

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


In [5]:
num_runs = 1

In [6]:
dataset_params = dict(
                    n_actions= 500,
                    n_users = 500,
                    emb_dim = 4,
                    # sigma = 0.1,
                    eps = 0.2, # this is the epsilon for the noise in the ground truth policy representation
                    ctr = 0.02
                    )

train_dataset = generate_dataset(dataset_params)

CTR: 0.01861246403332251


In [7]:
train_dataset.keys()

dict_keys(['emb_a', 'our_a', 'original_a', 'emb_x', 'our_x', 'original_x', 'q_x_a', 'n_actions', 'n_users', 'emb_dim', 'user_prior'])

In [8]:
num_runs = 1
batch_size = 200
num_neighbors = 51
num_rounds_list = [30000, 60000, 80000, 90000]

### 1

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.005$$
$$n_{epochs} = 1$$
$$BatchSize=50$$

In [None]:
df4 = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size, val_size=35000)

[I 2025-08-18 00:46:01,982] A new study created in memory with name: no-name-1686133b-49dd-4658-8ffa-e01ba0620026
Best trial: 0. Best value: 0.0103668:  10%|█         | 1/10 [03:28<31:19, 208.79s/it]

[I 2025-08-18 00:49:30,770] Trial 0 finished with value: 0.01036684519920781 and parameters: {'lr': 0.00021998095044840113, 'num_epochs': 1}. Best is trial 0 with value: 0.01036684519920781.


Best trial: 0. Best value: 0.0103668:  20%|██        | 2/10 [03:34<11:55, 89.45s/it] 

[I 2025-08-18 00:49:36,682] Trial 2 finished with value: 0.010365837663681828 and parameters: {'lr': 0.00020944807542242172, 'num_epochs': 3}. Best is trial 0 with value: 0.01036684519920781.
[I 2025-08-18 00:49:36,687] Trial 1 finished with value: 0.010363003040643437 and parameters: {'lr': 0.0006597293469526339, 'num_epochs': 3}. Best is trial 0 with value: 0.01036684519920781.


Best trial: 0. Best value: 0.0103668:  40%|████      | 4/10 [06:47<09:22, 93.71s/it]

[I 2025-08-18 00:52:49,164] Trial 3 finished with value: 0.010362328807094319 and parameters: {'lr': 0.00044462930806082, 'num_epochs': 5}. Best is trial 0 with value: 0.01036684519920781.


Best trial: 4. Best value: 0.0103677:  50%|█████     | 5/10 [06:57<05:40, 68.02s/it]

[I 2025-08-18 00:52:59,830] Trial 4 finished with value: 0.010367662871983234 and parameters: {'lr': 0.00018196890332457217, 'num_epochs': 1}. Best is trial 4 with value: 0.010367662871983234.


Best trial: 4. Best value: 0.0103677:  60%|██████    | 6/10 [07:06<03:19, 49.85s/it]

[I 2025-08-18 00:53:08,565] Trial 5 finished with value: 0.010364870659039634 and parameters: {'lr': 0.00018888020593426397, 'num_epochs': 6}. Best is trial 4 with value: 0.010367662871983234.


Best trial: 6. Best value: 0.0103696:  70%|███████   | 7/10 [10:00<04:22, 87.56s/it]

[I 2025-08-18 00:56:02,255] Trial 6 finished with value: 0.010369596564941274 and parameters: {'lr': 0.0003006411130746048, 'num_epochs': 1}. Best is trial 6 with value: 0.010369596564941274.


Best trial: 6. Best value: 0.0103696:  80%|████████  | 8/10 [10:13<02:09, 64.89s/it]

[I 2025-08-18 00:56:15,020] Trial 7 finished with value: 0.010365924290088964 and parameters: {'lr': 0.0001293617525843229, 'num_epochs': 4}. Best is trial 6 with value: 0.010369596564941274.


Best trial: 8. Best value: 0.0103729:  90%|█████████ | 9/10 [10:32<00:51, 51.16s/it]

[I 2025-08-18 00:56:34,487] Trial 8 finished with value: 0.010372910396167757 and parameters: {'lr': 0.0003474050541067679, 'num_epochs': 4}. Best is trial 8 with value: 0.010372910396167757.


Best trial: 8. Best value: 0.0103729: 100%|██████████| 10/10 [11:41<00:00, 70.18s/it]


[I 2025-08-18 00:57:43,762] Trial 9 finished with value: 0.010368064860685176 and parameters: {'lr': 0.00011305947321108417, 'num_epochs': 7}. Best is trial 8 with value: 0.010372910396167757.


[I 2025-08-18 01:01:55,476] A new study created in memory with name: no-name-94d0b100-42e3-4d04-9eee-9f7a60b81f75
Best trial: 0. Best value: 0.0105601:  10%|█         | 1/10 [07:48<1:10:15, 468.35s/it]

[I 2025-08-18 01:09:43,827] Trial 0 finished with value: 0.01056008354305044 and parameters: {'lr': 0.00015612399899248307, 'num_epochs': 6}. Best is trial 0 with value: 0.01056008354305044.


Best trial: 0. Best value: 0.0105601:  20%|██        | 2/10 [07:55<26:14, 196.87s/it]  

[I 2025-08-18 01:09:50,668] Trial 1 finished with value: 0.0007366250452178237 and parameters: {'lr': 0.0008145137498369256, 'num_epochs': 7}. Best is trial 0 with value: 0.01056008354305044.


Best trial: 2. Best value: 0.0105832:  30%|███       | 3/10 [07:59<12:41, 108.72s/it]

[I 2025-08-18 01:09:54,491] Trial 2 finished with value: 0.010583158933262709 and parameters: {'lr': 0.00010307696077706996, 'num_epochs': 7}. Best is trial 2 with value: 0.010583158933262709.


Best trial: 3. Best value: 0.0105873:  40%|████      | 4/10 [15:02<23:16, 232.82s/it]

[I 2025-08-18 01:16:57,542] Trial 3 finished with value: 0.010587282562991815 and parameters: {'lr': 0.000711824176650503, 'num_epochs': 1}. Best is trial 3 with value: 0.010587282562991815.


Best trial: 3. Best value: 0.0105873:  50%|█████     | 5/10 [15:21<12:58, 155.78s/it]

[I 2025-08-18 01:17:16,741] Trial 4 finished with value: 0.01051865419508811 and parameters: {'lr': 0.00020984050502020698, 'num_epochs': 6}. Best is trial 3 with value: 0.010587282562991815.


Best trial: 3. Best value: 0.0105873:  60%|██████    | 6/10 [15:58<07:41, 115.34s/it]

[I 2025-08-18 01:17:53,560] Trial 5 finished with value: 0.008754240368865598 and parameters: {'lr': 0.0003841863941807458, 'num_epochs': 8}. Best is trial 3 with value: 0.010587282562991815.


Best trial: 3. Best value: 0.0105873:  70%|███████   | 7/10 [22:21<10:09, 203.07s/it]

[I 2025-08-18 01:24:17,271] Trial 6 finished with value: 0.010573955104059247 and parameters: {'lr': 0.00012440977675262847, 'num_epochs': 7}. Best is trial 3 with value: 0.010587282562991815.


Best trial: 3. Best value: 0.0105873:  80%|████████  | 8/10 [22:32<04:43, 141.77s/it]

[I 2025-08-18 01:24:27,783] Trial 7 finished with value: 0.009555218239389904 and parameters: {'lr': 0.0005610960548209259, 'num_epochs': 4}. Best is trial 3 with value: 0.010587282562991815.


Best trial: 3. Best value: 0.0105873:  90%|█████████ | 9/10 [23:20<01:52, 112.65s/it]

[I 2025-08-18 01:25:16,393] Trial 8 finished with value: 0.010488506759005475 and parameters: {'lr': 0.0002499885177096407, 'num_epochs': 5}. Best is trial 3 with value: 0.010587282562991815.


Best trial: 3. Best value: 0.0105873: 100%|██████████| 10/10 [25:38<00:00, 153.86s/it]


[I 2025-08-18 01:27:34,040] Trial 9 finished with value: 0.007912739149983753 and parameters: {'lr': 0.0008247421354429188, 'num_epochs': 4}. Best is trial 3 with value: 0.010587282562991815.


[I 2025-08-18 01:34:06,011] A new study created in memory with name: no-name-94cc3a01-6135-401e-a646-36bfea6a84d3
  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.0198,0.0202,0.02,0.0198,0.02,0.0205,0.2784,0.0,0.2374,0.0
30000,0.0199,0.0205,0.0198,0.0173,0.0195,0.0199,0.4736,0.379,0.3557,0.3496
60000,0.0199,0.017,0.0197,0.0221,0.0176,0.0166,0.4546,0.3624,0.39,0.3823
80000,0.0198,0.0198,0.0194,0.0189,0.0195,0.0202,0.2979,0.1149,0.246,0.1062
90000,0.0199,0.0205,0.0191,0.0192,0.0199,0.02,0.6269,0.5273,0.6198,0.6548


In [None]:
df4

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,ipw_var,reg_dm_var,conv_dm_var,conv_dr_var,conv_sndr_var,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.0198,0.0202,0.02,0.0198,0.02,0.0205,0.0,0.0,0.0,0.0,0.0,0.2784,0.0,0.2374,0.0
30000,0.0199,0.0205,0.0198,0.0173,0.0195,0.0199,0.0,0.0,0.0,0.0,0.0,0.4736,0.379,0.3557,0.3496
60000,0.0199,0.017,0.0197,0.0221,0.0176,0.0166,0.0,0.0,0.0,0.0,0.0,0.4546,0.3624,0.39,0.3823
80000,0.0198,0.0198,0.0194,0.0189,0.0195,0.0202,0.0,0.0,0.0,0.0,0.0,0.2979,0.1149,0.246,0.1062
90000,0.0199,0.0205,0.0191,0.0192,0.0199,0.02,0.0,0.0,0.0,0.0,0.0,0.6269,0.5273,0.6198,0.6548


In [None]:
num_rounds_list = [3000, 6000, 8000, 9000]

### 2

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.001$$
$$n_{epochs} = 1$$
$$BatchSize=50$$

In [None]:
df5 = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size, val_size=3500)

[I 2025-08-17 18:59:47,813] A new study created in memory with name: no-name-da278b4b-2e05-4978-b549-0ee7a31d906a
Best trial: 0. Best value: 0.0055823:  10%|█         | 1/10 [00:13<01:58, 13.20s/it]

[I 2025-08-17 19:00:01,010] Trial 0 finished with value: 0.005582304033634449 and parameters: {'lr': 0.00013239533643429304, 'num_epochs': 3}. Best is trial 0 with value: 0.005582304033634449.
[I 2025-08-17 19:00:01,210] Trial 1 finished with value: 0.005557379902794693 and parameters: {'lr': 0.0005547548817794695, 'num_epochs': 5}. Best is trial 0 with value: 0.005582304033634449.


Best trial: 0. Best value: 0.0055823:  30%|███       | 3/10 [00:25<01:01,  8.75s/it]

[I 2025-08-17 19:00:13,770] Trial 2 finished with value: 0.00556623068617111 and parameters: {'lr': 0.000718531569169898, 'num_epochs': 2}. Best is trial 0 with value: 0.005582304033634449.


Best trial: 0. Best value: 0.0055823:  40%|████      | 4/10 [00:26<00:34,  5.68s/it]

[I 2025-08-17 19:00:14,744] Trial 3 finished with value: 0.005569562361360523 and parameters: {'lr': 0.0002723759725933621, 'num_epochs': 8}. Best is trial 0 with value: 0.005582304033634449.


Best trial: 4. Best value: 0.00558643:  50%|█████     | 5/10 [00:38<00:39,  7.87s/it]

[I 2025-08-17 19:00:26,481] Trial 4 finished with value: 0.005586427873099138 and parameters: {'lr': 0.00010945278586730512, 'num_epochs': 1}. Best is trial 4 with value: 0.005586427873099138.


Best trial: 4. Best value: 0.00558643:  60%|██████    | 6/10 [00:40<00:23,  5.77s/it]

[I 2025-08-17 19:00:28,196] Trial 5 finished with value: 0.0055616076295547535 and parameters: {'lr': 0.00030938126605713835, 'num_epochs': 8}. Best is trial 4 with value: 0.005586427873099138.


Best trial: 4. Best value: 0.00558643:  70%|███████   | 7/10 [00:52<00:23,  7.71s/it]

[I 2025-08-17 19:00:39,878] Trial 6 finished with value: 0.005577139448007883 and parameters: {'lr': 0.00010621311979313267, 'num_epochs': 9}. Best is trial 4 with value: 0.005586427873099138.


Best trial: 4. Best value: 0.00558643:  80%|████████  | 8/10 [00:52<00:11,  5.53s/it]

[I 2025-08-17 19:00:40,735] Trial 7 finished with value: 0.005579764558941724 and parameters: {'lr': 0.000667388225032928, 'num_epochs': 2}. Best is trial 4 with value: 0.005586427873099138.


Best trial: 4. Best value: 0.00558643:  90%|█████████ | 9/10 [01:05<00:07,  7.59s/it]

[I 2025-08-17 19:00:52,861] Trial 8 finished with value: 0.005574484286551209 and parameters: {'lr': 0.0005599136329743947, 'num_epochs': 3}. Best is trial 4 with value: 0.005586427873099138.


Best trial: 4. Best value: 0.00558643: 100%|██████████| 10/10 [01:05<00:00,  6.59s/it]


[I 2025-08-17 19:00:53,674] Trial 9 finished with value: 0.0055633257032295345 and parameters: {'lr': 0.0005879020960134675, 'num_epochs': 5}. Best is trial 4 with value: 0.005586427873099138.


[I 2025-08-17 19:01:25,168] A new study created in memory with name: no-name-69c9288b-e37b-4d33-8b5a-10904ae041f8
Best trial: 0. Best value: 0.00907487:  10%|█         | 1/10 [00:25<03:50, 25.56s/it]

[I 2025-08-17 19:01:50,726] Trial 0 finished with value: 0.009074871543366364 and parameters: {'lr': 0.0006491577954358776, 'num_epochs': 1}. Best is trial 0 with value: 0.009074871543366364.


Best trial: 1. Best value: 0.009076:  20%|██        | 2/10 [00:25<01:25, 10.74s/it]  

[I 2025-08-17 19:01:51,087] Trial 1 finished with value: 0.00907600345401617 and parameters: {'lr': 0.00032938005976856097, 'num_epochs': 3}. Best is trial 1 with value: 0.00907600345401617.


Best trial: 1. Best value: 0.009076:  30%|███       | 3/10 [00:52<02:04, 17.81s/it]

[I 2025-08-17 19:02:17,316] Trial 2 finished with value: 0.009058915392535474 and parameters: {'lr': 0.0008016158389025671, 'num_epochs': 2}. Best is trial 1 with value: 0.00907600345401617.


Best trial: 1. Best value: 0.009076:  40%|████      | 4/10 [00:56<01:15, 12.59s/it]

[I 2025-08-17 19:02:21,896] Trial 3 finished with value: 0.009050563696855102 and parameters: {'lr': 0.00025430011172084937, 'num_epochs': 10}. Best is trial 1 with value: 0.00907600345401617.


Best trial: 4. Best value: 0.00908259:  50%|█████     | 5/10 [01:17<01:17, 15.59s/it]

[I 2025-08-17 19:02:42,801] Trial 4 finished with value: 0.009082588342473313 and parameters: {'lr': 0.00045441683264452486, 'num_epochs': 1}. Best is trial 4 with value: 0.009082588342473313.


Best trial: 4. Best value: 0.00908259:  60%|██████    | 6/10 [01:25<00:51, 12.85s/it]

[I 2025-08-17 19:02:50,334] Trial 5 finished with value: 0.00907537603616786 and parameters: {'lr': 0.0007585397701519196, 'num_epochs': 1}. Best is trial 4 with value: 0.009082588342473313.


Best trial: 4. Best value: 0.00908259:  70%|███████   | 7/10 [01:44<00:44, 14.82s/it]

[I 2025-08-17 19:03:09,219] Trial 6 finished with value: 0.009080095613630703 and parameters: {'lr': 0.0002805873454925688, 'num_epochs': 2}. Best is trial 4 with value: 0.009082588342473313.


Best trial: 4. Best value: 0.00908259:  80%|████████  | 8/10 [01:56<00:27, 13.96s/it]

[I 2025-08-17 19:03:21,335] Trial 7 finished with value: 0.009023550013484457 and parameters: {'lr': 0.000500044881828388, 'num_epochs': 8}. Best is trial 4 with value: 0.009082588342473313.


Best trial: 4. Best value: 0.00908259:  90%|█████████ | 9/10 [02:09<00:13, 13.92s/it]

[I 2025-08-17 19:03:35,150] Trial 8 finished with value: 0.009081998781899836 and parameters: {'lr': 0.00011135127157027648, 'num_epochs': 2}. Best is trial 4 with value: 0.009082588342473313.


Best trial: 4. Best value: 0.00908259: 100%|██████████| 10/10 [02:20<00:00, 14.01s/it]


[I 2025-08-17 19:03:45,270] Trial 9 finished with value: 0.009076007019957845 and parameters: {'lr': 0.00033839243696348283, 'num_epochs': 3}. Best is trial 4 with value: 0.009082588342473313.


[I 2025-08-17 19:04:28,060] A new study created in memory with name: no-name-14d65acd-a76b-48fb-ad5c-13b4e62dadc3
Best trial: 0. Best value: 0.00890418:  10%|█         | 1/10 [00:38<05:43, 38.17s/it]

[I 2025-08-17 19:05:06,225] Trial 0 finished with value: 0.008904176321507383 and parameters: {'lr': 0.00025436494695784366, 'num_epochs': 6}. Best is trial 0 with value: 0.008904176321507383.


Best trial: 0. Best value: 0.00890418:  20%|██        | 2/10 [00:38<02:07, 15.96s/it]

[I 2025-08-17 19:05:06,646] Trial 1 finished with value: 0.00877625858881576 and parameters: {'lr': 0.0007061237834580604, 'num_epochs': 8}. Best is trial 0 with value: 0.008904176321507383.


Best trial: 2. Best value: 0.00890443:  30%|███       | 3/10 [01:15<02:58, 25.49s/it]

[I 2025-08-17 19:05:43,473] Trial 2 finished with value: 0.008904432931301996 and parameters: {'lr': 0.00018862467069973078, 'num_epochs': 7}. Best is trial 2 with value: 0.008904432931301996.


Best trial: 2. Best value: 0.00890443:  40%|████      | 4/10 [01:15<01:33, 15.54s/it]

[I 2025-08-17 19:05:43,768] Trial 3 finished with value: 0.008781133009322466 and parameters: {'lr': 0.0008591024496732168, 'num_epochs': 6}. Best is trial 2 with value: 0.008904432931301996.


Best trial: 2. Best value: 0.00890443:  50%|█████     | 5/10 [01:51<01:54, 22.97s/it]

[I 2025-08-17 19:06:19,908] Trial 4 finished with value: 0.008824613514615812 and parameters: {'lr': 0.0007030094378923832, 'num_epochs': 7}. Best is trial 2 with value: 0.008904432931301996.


Best trial: 5. Best value: 0.00890875:  60%|██████    | 6/10 [01:56<01:07, 16.79s/it]

[I 2025-08-17 19:06:24,712] Trial 5 finished with value: 0.008908751776037076 and parameters: {'lr': 0.00034650787033943195, 'num_epochs': 5}. Best is trial 5 with value: 0.008908751776037076.


Best trial: 5. Best value: 0.00890875:  70%|███████   | 7/10 [02:29<01:06, 22.04s/it]

[I 2025-08-17 19:06:57,547] Trial 6 finished with value: 0.008892385311027418 and parameters: {'lr': 0.0002051268705543595, 'num_epochs': 10}. Best is trial 5 with value: 0.008908751776037076.


Best trial: 5. Best value: 0.00890875:  80%|████████  | 8/10 [02:37<00:35, 17.72s/it]

[I 2025-08-17 19:07:06,021] Trial 7 finished with value: 0.008839976575358803 and parameters: {'lr': 0.0005941811370654618, 'num_epochs': 7}. Best is trial 5 with value: 0.008908751776037076.


Best trial: 5. Best value: 0.00890875:  90%|█████████ | 9/10 [03:05<00:20, 20.80s/it]

[I 2025-08-17 19:07:33,593] Trial 8 finished with value: 0.008849741107019958 and parameters: {'lr': 0.0005213344711314894, 'num_epochs': 7}. Best is trial 5 with value: 0.008908751776037076.


Best trial: 9. Best value: 0.0089264: 100%|██████████| 10/10 [03:14<00:00, 19.43s/it]


[I 2025-08-17 19:07:42,320] Trial 9 finished with value: 0.008926396188774643 and parameters: {'lr': 0.00018480937932340735, 'num_epochs': 2}. Best is trial 9 with value: 0.008926396188774643.


[I 2025-08-17 19:08:29,141] A new study created in memory with name: no-name-ffe9a439-9cb8-4b4c-8171-3b3b5d5f0915
Best trial: 1. Best value: 0.00910607:  10%|█         | 1/10 [00:40<06:00, 40.06s/it]

[I 2025-08-17 19:09:09,199] Trial 1 finished with value: 0.009106073182299973 and parameters: {'lr': 0.0007509754319561484, 'num_epochs': 2}. Best is trial 1 with value: 0.009106073182299973.


Best trial: 0. Best value: 0.00911675:  20%|██        | 2/10 [00:41<02:17, 17.21s/it]

[I 2025-08-17 19:09:10,413] Trial 0 finished with value: 0.00911674941153962 and parameters: {'lr': 0.00014688442288042814, 'num_epochs': 6}. Best is trial 0 with value: 0.00911674941153962.


Best trial: 2. Best value: 0.00912185:  30%|███       | 3/10 [01:19<03:09, 27.03s/it]

[I 2025-08-17 19:09:49,119] Trial 2 finished with value: 0.009121849405102554 and parameters: {'lr': 0.0006661976193777732, 'num_epochs': 4}. Best is trial 2 with value: 0.009121849405102554.


Best trial: 2. Best value: 0.00912185:  40%|████      | 4/10 [01:28<01:57, 19.60s/it]

[I 2025-08-17 19:09:57,328] Trial 3 finished with value: 0.009119872026727241 and parameters: {'lr': 0.0002535703990452961, 'num_epochs': 7}. Best is trial 2 with value: 0.009121849405102554.


Best trial: 2. Best value: 0.00912185:  50%|█████     | 5/10 [02:04<02:08, 25.78s/it]

[I 2025-08-17 19:10:34,072] Trial 4 finished with value: 0.009117957720723092 and parameters: {'lr': 0.00015494665977641123, 'num_epochs': 4}. Best is trial 2 with value: 0.009121849405102554.


Best trial: 5. Best value: 0.00914043:  60%|██████    | 6/10 [02:14<01:21, 20.41s/it]

[I 2025-08-17 19:10:44,058] Trial 5 finished with value: 0.009140427565767958 and parameters: {'lr': 0.0009312069592979441, 'num_epochs': 7}. Best is trial 5 with value: 0.009140427565767958.


Best trial: 5. Best value: 0.00914043:  70%|███████   | 7/10 [02:44<01:10, 23.54s/it]

[I 2025-08-17 19:11:14,029] Trial 6 finished with value: 0.009114119790104944 and parameters: {'lr': 0.00014316238181249753, 'num_epochs': 3}. Best is trial 5 with value: 0.009140427565767958.


Best trial: 5. Best value: 0.00914043:  80%|████████  | 8/10 [03:01<00:42, 21.38s/it]

[I 2025-08-17 19:11:30,806] Trial 7 finished with value: 0.009127868076393845 and parameters: {'lr': 0.0006385589919752676, 'num_epochs': 6}. Best is trial 5 with value: 0.009140427565767958.


Best trial: 5. Best value: 0.00914043:  90%|█████████ | 9/10 [03:30<00:23, 23.65s/it]

[I 2025-08-17 19:11:59,451] Trial 8 finished with value: 0.009113317619059215 and parameters: {'lr': 0.00016551277217555124, 'num_epochs': 2}. Best is trial 5 with value: 0.009140427565767958.


Best trial: 5. Best value: 0.00914043: 100%|██████████| 10/10 [03:43<00:00, 22.34s/it]


[I 2025-08-17 19:12:12,589] Trial 9 finished with value: 0.009119895306215898 and parameters: {'lr': 0.0008896592064285964, 'num_epochs': 8}. Best is trial 5 with value: 0.009140427565767958.


In [None]:
df5

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,ipw_var,reg_dm_var,conv_dm_var,conv_dr_var,conv_sndr_var,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.0198,0.0191,0.0196,0.0197,0.0197,0.0196,0.0,0.0,0.0,0.0,0.0,0.2784,0.0,0.2374,0.0
3000,0.0198,0.0191,0.0196,0.0197,0.0197,0.0196,0.0,0.0,0.0,0.0,0.0,0.2783,0.0006,0.2374,0.0004
6000,0.0198,0.0225,0.0224,0.0229,0.0226,0.0215,0.0,0.0,0.0,0.0,0.0,0.278,0.0037,0.2375,0.0024
8000,0.0198,0.0167,0.0202,0.0197,0.0193,0.0179,0.0,0.0,0.0,0.0,0.0,0.2779,0.0033,0.2374,0.0022
9000,0.0198,0.0248,0.0213,0.021,0.0215,0.023,0.0,0.0,0.0,0.0,0.0,0.2743,0.0588,0.2376,0.042


### 3

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.003$$
$$n_{epochs} = 10$$
$$BatchSize=50$$

In [None]:
df6 = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size)

[I 2025-08-17 18:49:28,153] A new study created in memory with name: no-name-d5061523-9cad-4d43-a177-edb90d613796
Best trial: 1. Best value: 0.114287:  20%|██        | 2/10 [00:13<00:43,  5.42s/it]  

[I 2025-08-17 18:49:41,075] Trial 0 finished with value: -0.04472256198718548 and parameters: {'lr': 0.07127227807002877, 'num_epochs': 5}. Best is trial 0 with value: -0.04472256198718548.
[I 2025-08-17 18:49:41,251] Trial 1 finished with value: 0.1142872512698241 and parameters: {'lr': 0.011825759151589394, 'num_epochs': 7}. Best is trial 1 with value: 0.1142872512698241.


Best trial: 1. Best value: 0.114287:  30%|███       | 3/10 [00:26<01:03,  9.06s/it]

[I 2025-08-17 18:49:54,627] Trial 2 finished with value: 0.045004589209512315 and parameters: {'lr': 0.014394419357596071, 'num_epochs': 10}. Best is trial 1 with value: 0.1142872512698241.


Best trial: 3. Best value: 0.135547:  40%|████      | 4/10 [00:27<00:34,  5.69s/it]

[I 2025-08-17 18:49:55,153] Trial 3 finished with value: 0.1355472436777312 and parameters: {'lr': 0.0013464932403186058, 'num_epochs': 6}. Best is trial 3 with value: 0.1355472436777312.


Best trial: 3. Best value: 0.135547:  50%|█████     | 5/10 [00:39<00:40,  8.03s/it]

[I 2025-08-17 18:50:07,338] Trial 4 finished with value: 0.13150393664579516 and parameters: {'lr': 0.02359040984327493, 'num_epochs': 2}. Best is trial 3 with value: 0.1355472436777312.


Best trial: 3. Best value: 0.135547:  60%|██████    | 6/10 [00:40<00:22,  5.75s/it]

[I 2025-08-17 18:50:08,652] Trial 5 finished with value: -0.04957683777459038 and parameters: {'lr': 0.0337470417117592, 'num_epochs': 9}. Best is trial 3 with value: 0.1355472436777312.


Best trial: 3. Best value: 0.135547:  70%|███████   | 7/10 [00:52<00:23,  7.72s/it]

[I 2025-08-17 18:50:20,425] Trial 6 finished with value: 0.11547923480129461 and parameters: {'lr': 0.008080131035353195, 'num_epochs': 10}. Best is trial 3 with value: 0.1355472436777312.


Best trial: 3. Best value: 0.135547:  80%|████████  | 8/10 [00:54<00:12,  6.04s/it]

[I 2025-08-17 18:50:22,874] Trial 7 finished with value: 0.1267705194945704 and parameters: {'lr': 0.006831979864790386, 'num_epochs': 9}. Best is trial 3 with value: 0.1355472436777312.


Best trial: 3. Best value: 0.135547:  90%|█████████ | 9/10 [01:04<00:07,  7.34s/it]

[I 2025-08-17 18:50:33,078] Trial 8 finished with value: 0.1355434279117786 and parameters: {'lr': 0.004372015093996474, 'num_epochs': 4}. Best is trial 3 with value: 0.1355472436777312.


Best trial: 3. Best value: 0.135547: 100%|██████████| 10/10 [01:07<00:00,  6.78s/it]


[I 2025-08-17 18:50:35,906] Trial 9 finished with value: 0.13546888605663238 and parameters: {'lr': 0.001492647045490235, 'num_epochs': 7}. Best is trial 3 with value: 0.1355472436777312.


[I 2025-08-17 18:50:59,603] A new study created in memory with name: no-name-7646701b-8975-4845-9019-3644c110a3e4
 10%|█         | 1/10 [00:24<03:43, 24.80s/it]

[W 2025-08-17 18:51:24,398] Trial 1 failed with parameters: {'lr': 0.09437849269881586, 'num_epochs': 5} because of the following error: The value nan is not acceptable.
[W 2025-08-17 18:51:24,399] Trial 1 failed with value nan.


Best trial: 0. Best value: 0.0907204:  20%|██        | 2/10 [00:25<01:24, 10.53s/it]

[I 2025-08-17 18:51:24,943] Trial 0 finished with value: 0.09072042998759373 and parameters: {'lr': 0.0010379728733970212, 'num_epochs': 7}. Best is trial 0 with value: 0.09072042998759373.


Best trial: 0. Best value: 0.0907204:  30%|███       | 3/10 [00:51<02:03, 17.64s/it]

[I 2025-08-17 18:51:51,036] Trial 2 finished with value: 0.04690063559993854 and parameters: {'lr': 0.005387872846022235, 'num_epochs': 8}. Best is trial 0 with value: 0.09072042998759373.


Best trial: 0. Best value: 0.0907204:  40%|████      | 4/10 [00:52<01:05, 10.96s/it]

[I 2025-08-17 18:51:51,749] Trial 3 finished with value: 0.090543620574664 and parameters: {'lr': 0.0014846283927581049, 'num_epochs': 8}. Best is trial 0 with value: 0.09072042998759373.


Best trial: 0. Best value: 0.0907204:  50%|█████     | 5/10 [01:16<01:19, 15.96s/it]

[I 2025-08-17 18:52:16,584] Trial 4 finished with value: 0.08984474991916688 and parameters: {'lr': 0.0033925528225261866, 'num_epochs': 4}. Best is trial 0 with value: 0.09072042998759373.


Best trial: 0. Best value: 0.0907204:  60%|██████    | 6/10 [01:18<00:44, 11.21s/it]

[I 2025-08-17 18:52:18,568] Trial 5 finished with value: -0.023466562351199267 and parameters: {'lr': 0.06060954632498749, 'num_epochs': 1}. Best is trial 0 with value: 0.09072042998759373.


Best trial: 0. Best value: 0.0907204:  70%|███████   | 7/10 [01:44<00:47, 15.92s/it]

[I 2025-08-17 18:52:44,184] Trial 6 finished with value: 0.08501539990662584 and parameters: {'lr': 0.005776895640798584, 'num_epochs': 4}. Best is trial 0 with value: 0.09072042998759373.


Best trial: 0. Best value: 0.0907204:  80%|████████  | 8/10 [01:44<00:21, 10.94s/it]

[I 2025-08-17 18:52:44,463] Trial 7 finished with value: 0.08784684172470424 and parameters: {'lr': 0.008920015594635834, 'num_epochs': 2}. Best is trial 0 with value: 0.09072042998759373.


Best trial: 0. Best value: 0.0907204:  90%|█████████ | 9/10 [02:11<00:15, 15.77s/it]

[I 2025-08-17 18:53:10,853] Trial 9 finished with value: 0.05786944371201447 and parameters: {'lr': 0.012302764724565911, 'num_epochs': 3}. Best is trial 0 with value: 0.09072042998759373.


Best trial: 0. Best value: 0.0907204: 100%|██████████| 10/10 [02:11<00:00, 13.15s/it]


[I 2025-08-17 18:53:11,118] Trial 8 finished with value: -0.10155643825261661 and parameters: {'lr': 0.020503730819531513, 'num_epochs': 10}. Best is trial 0 with value: 0.09072042998759373.


[I 2025-08-17 18:53:48,342] A new study created in memory with name: no-name-9847ed0a-6652-42e5-bb23-228394a67fab
Best trial: 1. Best value: 0.0412514:  10%|█         | 1/10 [00:34<05:13, 34.84s/it]

[I 2025-08-17 18:54:23,179] Trial 1 finished with value: 0.04125140694042286 and parameters: {'lr': 0.009370035704828939, 'num_epochs': 3}. Best is trial 1 with value: 0.04125140694042286.


Best trial: 0. Best value: 0.0709456:  20%|██        | 2/10 [00:35<01:57, 14.64s/it]

[I 2025-08-17 18:54:23,677] Trial 0 finished with value: 0.07094556804245737 and parameters: {'lr': 0.0011211348394267325, 'num_epochs': 5}. Best is trial 0 with value: 0.07094556804245737.


Best trial: 2. Best value: 0.0719043:  30%|███       | 3/10 [01:07<02:39, 22.80s/it]

[I 2025-08-17 18:54:56,195] Trial 2 finished with value: 0.07190425393452309 and parameters: {'lr': 0.001053252444532054, 'num_epochs': 1}. Best is trial 2 with value: 0.07190425393452309.


Best trial: 2. Best value: 0.0719043:  40%|████      | 4/10 [01:09<01:25, 14.32s/it]

[I 2025-08-17 18:54:57,516] Trial 3 finished with value: 0.06493458239475818 and parameters: {'lr': 0.004822117781513079, 'num_epochs': 4}. Best is trial 2 with value: 0.07190425393452309.


Best trial: 2. Best value: 0.0719043:  50%|█████     | 5/10 [01:43<01:48, 21.62s/it]

[I 2025-08-17 18:55:32,082] Trial 4 finished with value: 0.07018113747888728 and parameters: {'lr': 0.0021706302038442585, 'num_epochs': 2}. Best is trial 2 with value: 0.07190425393452309.


Best trial: 2. Best value: 0.0719043:  60%|██████    | 6/10 [01:45<00:58, 14.72s/it]

[I 2025-08-17 18:55:33,400] Trial 5 finished with value: 0.032881778544321213 and parameters: {'lr': 0.030590521561647196, 'num_epochs': 1}. Best is trial 2 with value: 0.07190425393452309.


Best trial: 2. Best value: 0.0719043:  70%|███████   | 7/10 [02:21<01:05, 21.98s/it]

[I 2025-08-17 18:56:10,319] Trial 6 finished with value: -0.0051989051297017565 and parameters: {'lr': 0.006225043298701339, 'num_epochs': 8}. Best is trial 2 with value: 0.07190425393452309.


Best trial: 2. Best value: 0.0719043:  80%|████████  | 8/10 [02:23<00:35, 17.92s/it]

[I 2025-08-17 18:56:11,729] Trial 7 finished with value: 0.06390395626225005 and parameters: {'lr': 0.0026362426348245987, 'num_epochs': 7}. Best is trial 2 with value: 0.07190425393452309.





KeyboardInterrupt: 

In [None]:
df6

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,ipw_var,reg_dm_var,conv_dm_var,conv_dr_var,conv_sndr_var,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.014,0.0206,0.0158,0.0163,0.017,0.019,0.0,0.0,0.0,0.0,0.0,0.2189,0.0,0.3154,0.0
3000,0.0139,0.0206,0.0158,0.0163,0.017,0.019,0.0,0.0,0.0,0.0,0.0,0.2189,0.0,0.3154,0.0
6000,0.014,0.0156,0.0129,0.0135,0.0135,0.0136,0.0,0.0,0.0,0.0,0.0,0.2079,0.06,0.3195,0.0415
8000,0.0138,0.0194,0.0155,0.0168,0.0167,0.0163,0.0,0.0,0.0,0.0,0.0,0.2189,0.0001,0.3154,0.0001
9000,0.0143,0.0164,0.015,0.0165,0.016,0.0146,0.0,0.0,0.0,0.0,0.0,0.2155,0.0121,0.3156,0.0089


### 4

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.05$$
$$n_{epochs} = 10$$
$$BatchSize=150$$

In [None]:
df7 = trainer_trial(num_runs, num_neighbors, num_rounds_list[:-3], train_dataset, batch_size+100, num_epochs=10, lr=0.05)

TypeError: trainer_trial() got an unexpected keyword argument 'num_epochs'

In [None]:
df7