In [1]:
import warnings
warnings.filterwarnings("ignore")
from copy import deepcopy
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import sys
sys.path.append("/code")

from tqdm import tqdm
import torch
# device = torch.device('cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
import gym
import recogym

import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

from sklearn.utils import check_random_state

# implementing OPE of the IPWLearner using synthetic bandit data
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt

from scipy.special import softmax
import optuna


from estimators import (
    DirectMethod as DM
)

from simulation_utils import (
    eval_policy,
    generate_dataset,
    create_simulation_data_from_pi,
    get_train_data,
    get_opl_results_dict,
    CustomCFDataset,
    calc_reward
)

from models import (    
    CFModel,
    NeighborhoodModel,
    BPRModel
)

from training_utils import (
    fit_bpr,
    train,
    validation_loop
 )

from custom_losses import (
    SNDRPolicyLoss,
    BPRLoss
    )

random_state=12345
random_ = check_random_state(random_state)

Using device: cpu
Using device: cpu
Using device: cpu


In [2]:
pd.options.display.float_format = '{:,.4f}'.format

## `trainer_trial` Function

This function runs policy learning experiments using offline bandit data and evaluates various estimators.

### Parameters
- **num_runs** (int): Number of experimental runs per training size
- **num_neighbors** (int): Number of neighbors to consider in the neighborhood model
- **num_rounds_list** (list): List of training set sizes to evaluate
- **dataset** (dict): Contains dataset information including embeddings, action probabilities, and reward probabilities
- **batch_size** (int): Batch size for training the policy model
- **num_epochs** (int): Number of training epochs for each experiment
- **lr** (float, default=0.001): Learning rate for the optimizer

### Process Flow
1. Initializes result structures and retrieval models
2. For each training size in `num_rounds_list`:
   - Creates a uniform logging policy and simulates data
   - Generates training data for offline learning
   - Fits regression and neighborhood models for reward estimation
   - Initializes and trains a counterfactual policy model
   - Evaluates policy performance using various estimators
   - Collects metrics on policy reward and embedding quality

### Returns
- **DataFrame**: Results table with rows indexed by training size and columns for various metrics:
  - `policy_rewards`: True expected reward of the learned policy
  - Various estimator errors (`ipw`, `reg_dm`, `conv_dm`, `conv_dr`, `conv_sndr`)
  - Variance metrics for each estimator
  - Embedding quality metrics comparing learned representations to ground truth

### Implementation Notes
- Uses uniform random logging policy for collecting offline data
- Employs Self-Normalized Doubly Robust (SNDR) policy learning
- Measures embedding quality via RMSE to original/ground truth embeddings

In [3]:
def trainer_trial(
                  num_runs,
                  num_neighbors,
                  num_rounds_list,
                  dataset,
                  batch_size,
                  val_size=2000
                  ):
    
    # Define device at the beginning
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    def objective(trial):
    
        # Optuna objective function
        lr = trial.suggest_loguniform("lr", 1e-5, 1e-1)
        epochs = trial.suggest_int("num_epochs", 1, 10)

        trial_neigh_model = NeighborhoodModel(
                                                train_data['x_idx'],
                                                train_data['a'], 
                                                our_a,
                                                our_x, 
                                                train_data['r'], 
                                                num_neighbors=num_neighbors
                                            )
        

        trial_model = CFModel(
                        n_users, 
                        n_actions, 
                        emb_dim, 
                        initial_user_embeddings=torch.tensor(our_x, device=device), 
                        initial_actions_embeddings=torch.tensor(our_a, device=device)
                        )
        
        # Training
        train(trial_model, train_loader, trial_neigh_model, criterion=SNDRPolicyLoss(), num_epochs=epochs, lr=lr, device=device)
        return validation_loop(trial_model, val_loader, trial_neigh_model)
    
    device = torch.device('cpu')

    dm = DM()
    results = {}

    our_x, our_a = dataset["our_x"], dataset["our_a"]
    emb_x, emb_a = dataset["emb_x"], dataset["emb_a"]
    original_x, original_a = dataset["original_x"], dataset["original_a"]
    n_users, n_actions, emb_dim = dataset["n_users"], dataset["n_actions"], dataset["emb_dim"]
    
    first = True

    for train_size in num_rounds_list:
        reg_results, conv_results = [], []
        
        for run in range(num_runs):

            pi_0 = softmax(our_x @ our_a.T, axis=1)
            original_policy_prob = np.expand_dims(pi_0, -1)
            simulation_data = create_simulation_data_from_pi(
                                                            dataset['env'],
                                                            pi_0,
                                                            train_size + val_size
                                                            )
            # test_data = get_test_data(dataset, simulation_data, n_test_data)

            # idx = np.arange(train_size) + n_test_data
            idx = np.arange(train_size)

            train_data = get_train_data(n_actions, train_size, simulation_data, idx, our_x)
            val_data = get_train_data(n_actions, val_size, simulation_data, np.arange(val_size) + train_size, our_x)

            # bpr_model = BPRModel(
            #                     n_users,
            #                     n_actions,
            #                     emb_x.shape[1], 
            #                     initial_user_embeddings=torch.tensor(our_x, device=device), 
            #                     initial_actions_embeddings=torch.tensor(our_a, device=device)
            #                     )
            
            neighberhoodmodel = NeighborhoodModel(
                                                    train_data['x_idx'],
                                                    train_data['a'], 
                                                    our_a,
                                                    our_x, 
                                                    train_data['r'], 
                                                    num_neighbors=num_neighbors
                                                )
            

            model = CFModel(
                            n_users, 
                            n_actions, 
                            emb_dim, 
                            initial_user_embeddings=torch.tensor(our_x, device=device), 
                            initial_actions_embeddings=torch.tensor(our_a, device=device)
                            )
            
            cf_dataset =  CustomCFDataset(
                                       train_data['x_idx'], 
                                       train_data['a'], 
                                       train_data['r'], 
                                       original_policy_prob
                                       )
            
            train_loader = DataLoader(cf_dataset, batch_size=batch_size, shuffle=True)

            val_dataset =  CustomCFDataset(
                            val_data['x_idx'], 
                            val_data['a'], 
                            val_data['r'], 
                            original_policy_prob
                            )
            
            val_loader = DataLoader(val_dataset, batch_size=len(val_data['r']), shuffle=True)

            if first:
                policy = np.expand_dims(softmax(our_x @ our_a.T, axis=1), -1)
                conv_results.append(eval_policy(neighberhoodmodel, train_data, original_policy_prob, policy))
                conv_results[-1] = np.append(calc_reward(dataset, policy), conv_results[-1])
                conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_a-our_a)**2)), np.sqrt(np.mean((original_a-our_a)**2))])
                conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_x-our_x)**2)), np.sqrt(np.mean((original_x-our_x)**2))])
                # bpr_scores = bpr_model.calc_scores(torch.tensor(train_data['x_idx'], device=device, dtype=torch.long)).detach().cpu().numpy()
                # reg_dm = dm.estimate_policy_value(policy[train_data['x_idx']], bpr_scores)
                reg_dm = 0.0
                reg_results.append(reg_dm)
                first = False
                reg_results = np.array(reg_results)
                conv_results = np.array(conv_results)
                results[0] = get_opl_results_dict(reg_results, conv_results)
                reg_results, conv_results = [], []
            
            # Bloss = BPRLoss()

            
            study = optuna.create_study(direction="minimize")
            study.optimize(objective, n_trials=5)
            best_params = study.best_params

            neighberhoodmodel = NeighborhoodModel(
                                                    train_data['x_idx'],
                                                    train_data['a'], 
                                                    our_a,
                                                    our_x, 
                                                    train_data['r'], 
                                                    num_neighbors=num_neighbors
                                                )
            

            model = CFModel(
                            n_users, 
                            n_actions, 
                            emb_dim, 
                            initial_user_embeddings=torch.tensor(our_x, device=device), 
                            initial_actions_embeddings=torch.tensor(our_a, device=device)
                            )

            train(model, train_loader, neighberhoodmodel, criterion=SNDRPolicyLoss(), num_epochs=best_params['num_epochs'], lr=best_params['lr'], device=device)
            # fit_bpr(bpr_model, Bloss, train_loader, num_epochs=3, lr=0.001, device=device)
            # neighborhood_model.update(model.get_params()[0].detach().numpy(), model.get_params()[1].detach().numpy())'

            our_x, our_a = model.get_params()
            our_a, our_x = our_a.detach().cpu().numpy(), our_x.detach().cpu().numpy()


            policy = np.expand_dims(softmax(our_x @ our_a.T, axis=1), -1)

            # bpr_scores = bpr_model.calc_scores(torch.tensor(train_data['x_idx'], device=device, dtype=torch.long)).detach().cpu().numpy()
            # reg_dm = dm.estimate_policy_value(policy[train_data['x_idx']], bpr_scores)
            reg_dm = 0.0

            reg_results.append(reg_dm)

            # conv_results.append(eval_policy(neighberhoodmodel, test_data, original_policy_prob[test_data['x_idx']], policy))
            conv_results.append(eval_policy(neighberhoodmodel, train_data, original_policy_prob, policy))

            conv_results[-1] = np.append(calc_reward(dataset, policy), conv_results[-1])
            conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_a-our_a)**2)), np.sqrt(np.mean((original_a-our_a)**2))])

            # temp.append(np.mean((emb_a-our_a)**2, axis=0))

            conv_results[-1] = np.append(conv_results[-1], [np.sqrt(np.mean((emb_x-our_x)**2)), np.sqrt(np.mean((original_x-our_x)**2))])
            
            our_a, our_x = original_a.copy(), original_x.copy()

        reg_results = np.array(reg_results)
        conv_results = np.array(conv_results)

        results[train_size] = get_opl_results_dict(reg_results, conv_results)
    
    return pd.DataFrame.from_dict(results, orient='index')

## Learning

We will run several simulations on a generated dataset, the dataset is generated like this:
$$ \text{We have users U and actions A } u_i \sim N(0, I_{emb_dim}) \ a_i \sim N(0, I_{emb_dim})$$
$$ p_{ij} = 1 / (5 + e^{-(u_i.T a_j)}) $$
$$r_{ij} \sim Bin(p_{ij})$$

We have a policy $\pi$
and it's ground truth reward is calculated by
$$R_{gt} = \sum_{i}{\sum_{j}{\pi_{ij} * p_{ij}}} $$

Our parameters for the dataset will be
$$EmbDim = 5$$
$$NumActions= 150$$
$$NumUsers = 150$$
$$NeighborhoodSize = 6$$

to learn a new policy from $\pi$ we will sample from:
$$\pi_{start} = (1-\epsilon)*\pi + \epsilon * \pi_{random}$$

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


In [5]:
num_runs = 1

In [6]:
dataset_params = dict(
                    n_actions= 180,
                    n_users = 180,
                    emb_dim = 5,
                    # sigma = 0.1,
                    eps = 0.2 # this is the epsilon for the noise in the ground truth policy representation
                    )

train_dataset = generate_dataset(dataset_params)

In [17]:
num_runs = 1
batch_size = 200
num_neighbors = 51
num_rounds_list = [1000, 2000, 3000, 4000]

In [14]:
num_rounds_list = [2000]

### 1

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.005$$
$$n_{epochs} = 1$$
$$BatchSize=50$$

In [18]:
df4 = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size, val_size=5000)

[I 2025-07-12 11:38:48,647] A new study created in memory with name: no-name-d51a3529-5882-4495-a263-f200e32486e0
[I 2025-07-12 11:38:50,412] Trial 0 finished with value: -0.001421813136377667 and parameters: {'lr': 0.001667000269167642, 'num_epochs': 4}. Best is trial 0 with value: -0.001421813136377667.


0.053679542930237224 0.017338296539337973 0.005555555555555556 0.0182 88.56600189208984
Estimated rewards variance: 0.01143674616268241
Estimated rewards mean: 0.017393447009291976
Estimated rewards dm: 0.01739326335618298
Estimated rewards iw: 1.0000658706169503


[I 2025-07-12 11:38:52,208] Trial 1 finished with value: -0.010262909056418707 and parameters: {'lr': 0.0415548211213662, 'num_epochs': 9}. Best is trial 1 with value: -0.010262909056418707.


0.053679542930237224 0.017338296539337973 0.005555555555555556 0.0182 88.56600189208984
Estimated rewards variance: 0.017574149986079448
Estimated rewards mean: 0.01864935314854631
Estimated rewards dm: 0.018646335209822815
Estimated rewards iw: 0.9902100905592409


[I 2025-07-12 11:38:54,103] Trial 2 finished with value: -0.0013806729402571216 and parameters: {'lr': 2.868994421386672e-05, 'num_epochs': 7}. Best is trial 1 with value: -0.010262909056418707.


0.053679542930237224 0.017338296539337973 0.005555555555555556 0.0182 88.56600189208984
Estimated rewards variance: 0.01141900638303754
Estimated rewards mean: 0.01740540245616678
Estimated rewards dm: 0.0174052297603873
Estimated rewards iw: 0.9999923432666546


[I 2025-07-12 11:38:55,864] Trial 3 finished with value: -0.001426719511954666 and parameters: {'lr': 0.008377871856152458, 'num_epochs': 2}. Best is trial 1 with value: -0.010262909056418707.


0.053679542930237224 0.017338296539337973 0.005555555555555556 0.0182 88.56600189208984
Estimated rewards variance: 0.01144847303391115
Estimated rewards mean: 0.017407833195578724
Estimated rewards dm: 0.01740768975244712
Estimated rewards iw: 0.9997847003922323


[I 2025-07-12 11:38:57,529] Trial 4 finished with value: -0.0013804041610357039 and parameters: {'lr': 2.597879663273124e-05, 'num_epochs': 1}. Best is trial 1 with value: -0.010262909056418707.


0.053679542930237224 0.017338296539337973 0.005555555555555556 0.0182 88.56600189208984
Estimated rewards variance: 0.011418953870294547
Estimated rewards mean: 0.01740558484360278
Estimated rewards dm: 0.01740541250205122
Estimated rewards iw: 0.999997675360792


[I 2025-07-12 11:39:15,360] A new study created in memory with name: no-name-8b6f4bf5-cdf4-4638-90ea-f4f74fafdb0b
[I 2025-07-12 11:39:18,824] Trial 0 finished with value: 0.0007022101963990359 and parameters: {'lr': 0.0008267064161580157, 'num_epochs': 1}. Best is trial 0 with value: 0.0007022101963990359.


0.05314686060029868 0.01352486851982936 0.005555555555555556 0.0138 88.19860076904297
Estimated rewards variance: 0.007766610436416377
Estimated rewards mean: 0.013479515361050255
Estimated rewards dm: 0.01347945891993797
Estimated rewards iw: 0.9997560490528876


[I 2025-07-12 11:39:22,259] Trial 1 finished with value: 0.0006988624747999994 and parameters: {'lr': 0.0011847953277015395, 'num_epochs': 2}. Best is trial 1 with value: 0.0006988624747999994.


0.05314686060029869 0.013524868519829363 0.005555555555555556 0.0138 88.19860076904297
Estimated rewards variance: 0.007769186268025502
Estimated rewards mean: 0.013480405290713888
Estimated rewards dm: 0.013480350056850415
Estimated rewards iw: 0.9996601870161991


[I 2025-07-12 11:39:25,876] Trial 2 finished with value: 0.0006743853592347179 and parameters: {'lr': 0.0024885158096756128, 'num_epochs': 7}. Best is trial 2 with value: 0.0006743853592347179.


0.05314686060029868 0.01352486851982936 0.005555555555555556 0.0138 88.19860076904297
Estimated rewards variance: 0.007800681037254123
Estimated rewards mean: 0.013507742062405972
Estimated rewards dm: 0.013507685823920883
Estimated rewards iw: 0.996096479896002


[I 2025-07-12 11:39:29,304] Trial 3 finished with value: 0.0006990516463400796 and parameters: {'lr': 0.0003300561468802669, 'num_epochs': 2}. Best is trial 2 with value: 0.0006743853592347179.


0.05314686060029868 0.013524868519829363 0.005555555555555556 0.0138 88.19860076904297
Estimated rewards variance: 0.007766758902631575
Estimated rewards mean: 0.013476601061446984
Estimated rewards dm: 0.013476547734123386
Estimated rewards iw: 0.9999312903161328


[I 2025-07-12 11:39:32,918] Trial 4 finished with value: 0.0006162381168083753 and parameters: {'lr': 0.006474796005172913, 'num_epochs': 5}. Best is trial 4 with value: 0.0006162381168083753.


0.05314686060029869 0.013524868519829363 0.005555555555555556 0.0138 88.19860076904297
Estimated rewards variance: 0.007824928032703913
Estimated rewards mean: 0.013489484970608945
Estimated rewards dm: 0.013489374477999642
Estimated rewards iw: 0.9916043721513467


[I 2025-07-12 11:39:55,193] A new study created in memory with name: no-name-b59523c6-4928-4a31-b53a-9f63b2af9511
[I 2025-07-12 11:40:00,622] Trial 0 finished with value: 0.0020380606936551797 and parameters: {'lr': 0.001926867315624549, 'num_epochs': 4}. Best is trial 0 with value: 0.0020380606936551797.


0.05830298610458417 0.017930466230908347 0.005555555555555556 0.0108 88.99579620361328
Estimated rewards variance: 0.009771880841086267
Estimated rewards mean: 0.018114353506505365
Estimated rewards dm: 0.018115780323191322
Estimated rewards iw: 0.9990450248153724


[I 2025-07-12 11:40:06,240] Trial 1 finished with value: 0.0019870202832318433 and parameters: {'lr': 6.451815191981529e-05, 'num_epochs': 7}. Best is trial 1 with value: 0.0019870202832318433.


0.05830298610458417 0.017930466230908347 0.005555555555555556 0.0108 88.99579620361328
Estimated rewards variance: 0.00983196181881963
Estimated rewards mean: 0.01816215582720323
Estimated rewards dm: 0.018163582130751373
Estimated rewards iw: 0.9999517341545877


[I 2025-07-12 11:40:11,521] Trial 2 finished with value: 0.001984715944478306 and parameters: {'lr': 1.0417350246698749e-05, 'num_epochs': 1}. Best is trial 2 with value: 0.001984715944478306.


0.05830298610458415 0.01793046623090835 0.005555555555555556 0.0108 88.99579620361328
Estimated rewards variance: 0.00983516228357327
Estimated rewards mean: 0.018165116760238558
Estimated rewards dm: 0.01816654293421198
Estimated rewards iw: 0.9999990937949363


[I 2025-07-12 11:40:16,794] Trial 3 finished with value: 0.0019318331259345276 and parameters: {'lr': 0.003041205182259255, 'num_epochs': 2}. Best is trial 3 with value: 0.0019318331259345276.


0.05830298610458415 0.01793046623090835 0.005555555555555556 0.0108 88.99579620361328
Estimated rewards variance: 0.00983090514135159
Estimated rewards mean: 0.018105230267989103
Estimated rewards dm: 0.018106666598292815
Estimated rewards iw: 0.9987375298653602


[I 2025-07-12 11:40:22,626] Trial 4 finished with value: 0.001841995537373433 and parameters: {'lr': 0.0014087109519266517, 'num_epochs': 10}. Best is trial 4 with value: 0.001841995537373433.


0.05830298610458416 0.01793046623090835 0.005555555555555556 0.0108 88.99579620361328
Estimated rewards variance: 0.009848445966640081
Estimated rewards mean: 0.018044250117223527
Estimated rewards dm: 0.018045715906220214
Estimated rewards iw: 0.9972312540219226


[I 2025-07-12 11:40:49,495] A new study created in memory with name: no-name-00bbb04c-f83e-43cb-b1eb-92a8a2982864
[I 2025-07-12 11:40:57,286] Trial 0 finished with value: 0.003049270090122928 and parameters: {'lr': 0.002124313970742067, 'num_epochs': 7}. Best is trial 0 with value: 0.003049270090122928.


0.05459684694381458 0.0146445249504824 0.005555555555555556 0.0128 88.21700286865234
Estimated rewards variance: 0.007063084609510306
Estimated rewards mean: 0.014669163761667227
Estimated rewards dm: 0.014669546049722442
Estimated rewards iw: 1.0007179370048387


[I 2025-07-12 11:41:05,028] Trial 1 finished with value: 0.0029944618923268343 and parameters: {'lr': 0.0001128782197339371, 'num_epochs': 7}. Best is trial 1 with value: 0.0029944618923268343.


0.05459684694381458 0.014644524950482398 0.005555555555555556 0.0128 88.21700286865234
Estimated rewards variance: 0.00705083472134069
Estimated rewards mean: 0.014594202556242202
Estimated rewards dm: 0.01459457304850975
Estimated rewards iw: 0.9999552599812157


[I 2025-07-12 11:41:12,574] Trial 2 finished with value: -0.0046365972546273775 and parameters: {'lr': 0.022848220993349037, 'num_epochs': 4}. Best is trial 2 with value: -0.0046365972546273775.


0.05459684694381458 0.014644524950482398 0.005555555555555556 0.0128 88.21700286865234
Estimated rewards variance: 0.013763071962569106
Estimated rewards mean: 0.018005837551197976
Estimated rewards dm: 0.018006286387830764
Estimated rewards iw: 0.8841507938290964


[I 2025-07-12 11:41:19,818] Trial 3 finished with value: 0.002765568847382504 and parameters: {'lr': 0.007605619228011804, 'num_epochs': 3}. Best is trial 2 with value: -0.0046365972546273775.


0.05459684694381459 0.014644524950482398 0.005555555555555556 0.0128 88.21700286865234
Estimated rewards variance: 0.007018549986818782
Estimated rewards mean: 0.01431219600592467
Estimated rewards dm: 0.01431257559857445
Estimated rewards iw: 0.9953142851872845


[I 2025-07-12 11:41:27,735] Trial 4 finished with value: 0.002000388279942946 and parameters: {'lr': 0.0038960450928688544, 'num_epochs': 10}. Best is trial 2 with value: -0.0046365972546273775.


0.05459684694381458 0.014644524950482398 0.005555555555555556 0.0128 88.21700286865234
Estimated rewards variance: 0.007925352794421442
Estimated rewards mean: 0.01503884978395125
Estimated rewards dm: 0.015039215456372648
Estimated rewards iw: 0.982427525098866


In [19]:
df4[['policy_rewards', 'ipw', 'reg_dm', 'conv_dm', 'conv_dr', 'conv_sndr', 'action_diff_to_real', 'action_delta', 'context_diff_to_real', 'context_delta']]

Unnamed: 0,policy_rewards,ipw,reg_dm,conv_dm,conv_dr,conv_sndr,action_diff_to_real,action_delta,context_diff_to_real,context_delta
0,0.0151,0.02,0.0,0.0181,0.0186,0.0196,0.2703,0.0,0.2661,0.0
1000,0.0151,0.0,0.0,0.0179,0.0137,0.0045,1.0946,1.0995,0.9902,1.0044
2000,0.0146,0.0153,0.0,0.0137,0.0141,0.0147,0.2892,0.1329,0.2963,0.1123
3000,0.0146,0.0177,0.0,0.0179,0.0174,0.0163,0.2717,0.0662,0.2726,0.0563
4000,0.0166,0.0293,0.0,0.0162,0.0234,0.0281,0.8088,0.7738,0.7965,0.7827


In [11]:
num_rounds_list = [1]

### 2

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.001$$
$$n_{epochs} = 1$$
$$BatchSize=50$$

In [12]:
df5 = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size)

ValueError: `pscore` must be 1D array, but got 0D array

In [None]:
df5

### 3

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.003$$
$$n_{epochs} = 10$$
$$BatchSize=50$$

In [None]:
df6 = trainer_trial(num_runs, num_neighbors, num_rounds_list, train_dataset, batch_size)

In [None]:
df6

### 4

$$emb = 0.7 * gt + 0.3 * noise$$
$$lr = 0.05$$
$$n_{epochs} = 10$$
$$BatchSize=150$$

In [None]:
df7 = trainer_trial(num_runs, num_neighbors, num_rounds_list[:-3], train_dataset, batch_size+100, num_epochs=10, lr=0.05)

In [None]:
df7