In [1]:
import pandas as pd
import numpy as np
import re
import datetime
import os
import time
import json

from darts.allocation import Allocator
from darts.bandit import Bandit

In [2]:
# load in the data for the target pool
start = time.time()
file_loc = 'data/'
allocation_pool_df = pd.read_csv(file_loc + 'target_voter_universe.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.05939030647277832 seconds


In [3]:
allocation_pool_df.head(10)

Unnamed: 0,target_id,model_id,probability,target_round,target_result,target_reward,target_regret
0,0,model1_rf,0.0,0,0,0,0
1,0,model2_lr,1.0,0,0,0,0
2,0,model3_lr,0.0,0,0,0,0
3,0,model4_xgb,1.0,0,0,0,0
4,0,model5_lgbm,0.0,0,0,0,0
5,0,model_baseline,1.0,0,0,0,0
6,1,model1_rf,0.0,0,0,0,0
7,1,model2_lr,0.0,0,0,0,0
8,1,model3_lr,0.0,0,0,0,0
9,1,model4_xgb,0.0,0,0,0,0


In [4]:
(allocation_pool_df.shape[0]/6)/250

54.54

In [5]:
# load in our target results
start = time.time()
file_loc = 'data/'
target_results_df = pd.read_csv(file_loc + 'voter_responses.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.009937524795532227 seconds


In [6]:
target_results_df.head(10)

Unnamed: 0,target_id,target_result
0,0,1
1,1,0
2,3,0
3,4,0
4,16,1
5,22,0
6,26,0
7,27,1
8,29,0
9,31,0


In [7]:
# Specify the column that indicates the id of an individual target
target_id_col = 'target_id'

# Specify the column that provides the confidence ranking (e.g. probability)
# that any of the models have on this target resulting in a 'reward'
# (e.g. the target outcome of the model)
confidence_rank_col = 'probability'

# Specify the column containing the names of the different 'arms' or
# models we're using in the target pool
arm_name_col = 'model_id'

# Specify column to track the pool round
pool_round_col = 'target_round'

# Specify the column that indicates the 'result'
result_col = 'target_result'

# Specify the column that indicates the 'reward'
reward_col = 'target_reward'

# Specify the column that indicates the 'regret'
regret_col = 'target_regret'

# Specify the column that indicates which arm was 'picked'
picked_col = 'model_picked'

# Specify an initial allocation method across the pool of models. These must sum to 1.
# For the simulation, we'll start with an equal allocation for each model.
allocation_method = {
    'model_baseline': (1/6),
    'model1_rf': (1/6),
    'model2_lr': (1/6),
    'model3_lr': (1/6),
    'model4_xgb': (1/6),
    'model5_lgbm': (1/6)
}

# Specify number of rounds to simulate
simulation_rounds = 50

# Specify the number of targets to pull in the first round.
num_targets = 250

# Specify the allocation policy
allocation_policy = 'UCB1'

# Specify the allocation strategy
allocation_strategy = 'round-robin'

# Specify the allocation order
allocation_order = 'best'

# bandit parameters
ucb_scale = 1.96
epsilon = 0.1
greed_factor = 1

In [8]:
# Index the file of responses for updating of results
target_results_df = target_results_df.set_index([target_id_col])

In [9]:
# Set up defaults - nothing picked yet
allocation_pool_df[picked_col]  = 0

timesteps = {}

for allocation_round in range(1, simulation_rounds+1):
    
    remaining_allocation_pool_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col]==0])

    # set up an allocator
    # TODO: I think this should be in the sequence of:
    #  allocation_pool_df, arm_name_col, target_id_col, confidence_rank_col, allocation_method, strategy, order
    # TODO: Would be good to explain that Allocator introduces a picked column to the dataframe. This is masked behavior.
    #       I may rely on this column if I know about it, but not sure if it will ever go away?
    allocator = Allocator(allocation_method, num_targets, remaining_allocation_pool_df, arm_name_col,
                          confidence_rank_col, target_id_col, strategy=allocation_strategy,
                          order=allocation_order) # another idea is to provide a dictionary mapping to simplify the calling interface

    # retreive targets
    targets = allocator.allocate_pool() # TODO: can we change this to allocator.retrieve_targets(num_targets) ? Also, might be nice to have a DF return option

    # convert to dataframe
    targets_df = pd.DataFrame(targets, columns=[target_id_col, arm_name_col])
    targets_df[pool_round_col] = allocation_round
    targets_df = targets_df[[target_id_col, pool_round_col]]

    # join targets with universe of allocations
    targets_df = targets_df.set_index([target_id_col])
    allocation_pool_df = allocation_pool_df.set_index([target_id_col])
    allocation_pool_df.update(targets_df)
    allocation_pool_df = allocation_pool_df.reset_index()   

    # add indicator for the arm we picked
    target_arm_picked_df = pd.DataFrame(targets, columns=[target_id_col, arm_name_col])
    target_arm_picked_df[picked_col] = 1
    target_arm_picked_df = target_arm_picked_df[[target_id_col, arm_name_col, picked_col]]

    # join target model we picked with the universe of allocations
    target_arm_picked_df = target_arm_picked_df.set_index([target_id_col, arm_name_col])
    allocation_pool_df = allocation_pool_df.set_index([target_id_col, arm_name_col])
    allocation_pool_df.update(target_arm_picked_df)
    allocation_pool_df = allocation_pool_df.reset_index()

    # evaluate this allocation pool with the bandit
    last_allocation_pool_df = allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round]

    # sync up results
    last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col])
    last_allocation_pool_df.update(target_results_df)
    last_allocation_pool_df = last_allocation_pool_df.reset_index(level=0)

    # update results in the master pool set
    allocation_pool_df = allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, regret_col, picked_col])
    last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, regret_col, picked_col])
    allocation_pool_df.update(last_allocation_pool_df)
    allocation_pool_df = allocation_pool_df.reset_index()
    
    TP = allocation_pool_df[result_col] * allocation_pool_df[confidence_rank_col] * allocation_pool_df[picked_col]
    TN = (1 - allocation_pool_df[result_col]) * (1 - allocation_pool_df[confidence_rank_col]) * (1 - allocation_pool_df[picked_col])
    FN = allocation_pool_df[result_col] * (1 - allocation_pool_df[confidence_rank_col]) * (1 - allocation_pool_df[picked_col])
    FP = (1 - allocation_pool_df[result_col]) * allocation_pool_df[confidence_rank_col] * allocation_pool_df[picked_col]
    
    # update rewards from the results we synced
    allocation_pool_df[reward_col] = TP

    # update regrets from the results we synced
    # regrets can be false positives or false negatives (type 1 and type 2 error)
    allocation_pool_df[regret_col] = FN

    # prepare a dataframe with just our results of this round for the bandit to evaluate
    results_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round])
    
    if results_df[(results_df[result_col]==1)&(results_df[reward_col]==1)].shape[0] == 0:
        # no more results left after a steady state - reset selections to include possibilities from the other models

        TP = allocation_pool_df[result_col] * allocation_pool_df[confidence_rank_col]
        TN = (1 - allocation_pool_df[result_col]) * (1 - allocation_pool_df[confidence_rank_col])
        FN = allocation_pool_df[result_col] * (1 - allocation_pool_df[confidence_rank_col])
        FP = (1 - allocation_pool_df[result_col]) * allocation_pool_df[confidence_rank_col]

        # update rewards from the results we synced
        allocation_pool_df[reward_col] = TP

        # update regrets from the results we synced
        # regrets can be false positives or false negatives (type 1 and type 2 error)
        allocation_pool_df[regret_col] = FN

        # prepare a dataframe with just our results of this round for the bandit to evaluate
        results_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round])
        
    
    # prepare dictionary of results for timestep tracking
    stats = results_df[[arm_name_col, reward_col, regret_col]].groupby(arm_name_col).agg({reward_col:['count','sum'],
                                                                                          regret_col:['sum']})
    stats.columns = stats.columns.to_flat_index()
    stats.columns = ['_'.join(tup).rstrip('_') for tup in stats.columns.values]
    stats = pd.DataFrame(stats).reset_index()
    stats.rename(columns={'target_reward_count': 'total_pool_size', 'target_reward_sum': 'rewards', 'target_regret_sum':'regrets'}, inplace=True)
    stats = stats.set_index([arm_name_col])
    stats['allocation'] = 0.0
    allocs = pd.DataFrame.from_dict(allocation_method, orient='index', columns = ['allocation'])
    allocs.index.name=arm_name_col
    stats.update(allocs)
    stats = stats.reset_index()
    timesteps[allocation_round] = stats.to_dict(orient='records')

    # set up a multi-arm bandit and calculate allocations to each arm.
    bandit = Bandit(results_df, arm_name_col, reward_col, regret_col, policy = allocation_policy, t = allocation_round, ucb_scale = ucb_scale, epsilon = epsilon, greed_factor = greed_factor)    

    # use these allocations for the next round
    allocation_method = bandit.get_new_allocations()# bandit.make_allocs().set_index(arm_name_col)['allocation'].to_dict()
    print("Allocations after round", allocation_round, ":\n", allocation_method)
    display(bandit.get_allocation_stats())

    # useful for outputting the result of each timestep into its own directory
    # and keeping an updated master file
    
    #allocation_pool_df[[target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, result_col]].sort_values(by=[target_id_col,arm_name_col]).to_csv(file_loc + 'simulated_target_universe_updates.csv', index=False)
    
    #round_loc = file_loc + 'round_{:05}/'.format(allocation_round)
    #if not os.path.exists(round_loc):
    #    os.makedirs(round_loc)
    #    
    #targets_df.sort_values(by=[target_id_col])[target_id_col].to_csv(round_loc + 'targets.csv', index=False)
    #allocation_pool_df[[target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, result_col]].sort_values(by=[target_id_col,arm_name_col]).to_csv(round_loc + 'simulated_allocation_round_{:05}'.format(allocation_round) +'.csv', index=False)
    

Allocations after round 1 :
 {'model3_lr': 0.23076923076923075, 'model_baseline': 0.1923076923076923, 'model2_lr': 0.17307692307692304, 'model4_xgb': 0.16346153846153846, 'model5_lgbm': 0.12499999999999999, 'model1_rf': 0.11538461538461538}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.096,250,0.295182,24.0,0.224,250,0.417758,56.0,0.096,0.096,0.230769
5,model_baseline,0.08,250,0.271837,20.0,0.212,250,0.409545,53.0,0.08,0.08,0.192308
1,model2_lr,0.072,250,0.259006,18.0,0.176,250,0.381584,44.0,0.072,0.072,0.173077
3,model4_xgb,0.068,250,0.252251,17.0,0.04,250,0.196352,10.0,0.068,0.068,0.163462
4,model5_lgbm,0.052,250,0.222472,13.0,0.156,250,0.363583,39.0,0.052,0.052,0.125
0,model1_rf,0.048,250,0.214195,12.0,0.212,250,0.409545,53.0,0.048,0.048,0.115385


Allocations after round 2 :
 {'model3_lr': 0.23874416193769316, 'model2_lr': 0.17018264204574113, 'model4_xgb': 0.15963471590851774, 'model_baseline': 0.15436075283990605, 'model1_rf': 0.13853886363407097, 'model5_lgbm': 0.13853886363407097}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.132,250,0.33917,33.0,0.26,250,0.439514,65.0,0.181074,0.181074,0.238744
1,model2_lr,0.08,250,0.271837,20.0,0.184,250,0.388261,46.0,0.129074,0.129074,0.170183
3,model4_xgb,0.072,250,0.259006,18.0,0.052,250,0.222472,13.0,0.121074,0.121074,0.159635
5,model_baseline,0.068,250,0.252251,17.0,0.236,250,0.425474,59.0,0.117074,0.117074,0.154361
0,model1_rf,0.056,250,0.230383,14.0,0.2,250,0.400802,50.0,0.105074,0.105074,0.138539
4,model5_lgbm,0.056,250,0.230383,14.0,0.168,250,0.374616,42.0,0.105074,0.105074,0.138539


Allocations after round 3 :
 {'model3_lr': 0.19755058046645751, 'model2_lr': 0.1775177715152418, 'model5_lgbm': 0.16249316480183007, 'model_baseline': 0.16249316480183007, 'model4_xgb': 0.15748496256402614, 'model1_rf': 0.14246035585061437}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.096,250,0.295182,24.0,0.28,250,0.4499,70.0,0.157782,0.157782,0.197551
1,model2_lr,0.08,250,0.271837,20.0,0.212,250,0.409545,53.0,0.141782,0.141782,0.177518
4,model5_lgbm,0.068,250,0.252251,17.0,0.136,250,0.343476,34.0,0.129782,0.129782,0.162493
5,model_baseline,0.068,250,0.252251,17.0,0.228,250,0.420384,57.0,0.129782,0.129782,0.162493
3,model4_xgb,0.064,250,0.245244,16.0,0.064,250,0.245244,16.0,0.125782,0.125782,0.157485
0,model1_rf,0.052,250,0.222472,13.0,0.212,250,0.409545,53.0,0.113782,0.113782,0.14246


Allocations after round 4 :
 {'model3_lr': 0.2024752619779382, 'model1_rf': 0.17445114390824742, 'model2_lr': 0.169780457563299, 'model5_lgbm': 0.169780457563299, 'model4_xgb': 0.1464270258385567, 'model_baseline': 0.1370856531486598}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.104,250,0.305873,26.0,0.308,250,0.462593,77.0,0.173401,0.173401,0.202475
0,model1_rf,0.08,250,0.271837,20.0,0.196,250,0.397765,49.0,0.149401,0.149401,0.174451
1,model2_lr,0.076,250,0.26553,19.0,0.192,250,0.394663,48.0,0.145401,0.145401,0.16978
4,model5_lgbm,0.076,250,0.26553,19.0,0.148,250,0.355812,37.0,0.145401,0.145401,0.16978
3,model4_xgb,0.056,250,0.230383,14.0,0.076,250,0.26553,19.0,0.125401,0.125401,0.146427
5,model_baseline,0.048,250,0.214195,12.0,0.264,250,0.441684,66.0,0.117401,0.117401,0.137086


Allocations after round 5 :
 {'model3_lr': 0.18738803669847476, 'model2_lr': 0.1827832878025174, 'model5_lgbm': 0.1827832878025174, 'model1_rf': 0.17817853890656005, 'model4_xgb': 0.13673579884294393, 'model_baseline': 0.13213104994698657}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.088,250,0.283863,22.0,0.288,250,0.453739,72.0,0.162778,0.162778,0.187388
1,model2_lr,0.084,250,0.277944,21.0,0.192,250,0.394663,48.0,0.158778,0.158778,0.182783
4,model5_lgbm,0.084,250,0.277944,21.0,0.148,250,0.355812,37.0,0.158778,0.158778,0.182783
0,model1_rf,0.08,250,0.271837,20.0,0.196,250,0.397765,49.0,0.154778,0.154778,0.178179
3,model4_xgb,0.044,250,0.205507,11.0,0.064,250,0.245244,16.0,0.118778,0.118778,0.136736
5,model_baseline,0.04,250,0.196352,10.0,0.256,250,0.437297,64.0,0.114778,0.114778,0.132131


Allocations after round 6 :
 {'model3_lr': 0.1908835247213194, 'model1_rf': 0.18150925708726026, 'model5_lgbm': 0.1721349894532012, 'model2_lr': 0.16276072181914206, 'model_baseline': 0.16276072181914206, 'model4_xgb': 0.12995078509993516}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.084,250,0.277944,21.0,0.236,250,0.425474,59.0,0.1629,0.1629,0.190884
0,model1_rf,0.076,250,0.26553,19.0,0.16,250,0.367341,40.0,0.1549,0.1549,0.181509
4,model5_lgbm,0.068,250,0.252251,17.0,0.124,250,0.330243,31.0,0.1469,0.1469,0.172135
1,model2_lr,0.06,250,0.237963,15.0,0.2,250,0.400802,50.0,0.1389,0.1389,0.162761
5,model_baseline,0.06,250,0.237963,15.0,0.208,250,0.406691,52.0,0.1389,0.1389,0.162761
3,model4_xgb,0.032,250,0.176353,8.0,0.068,250,0.252251,17.0,0.1109,0.1109,0.129951


Allocations after round 7 :
 {'model3_lr': 0.18360313996317917, 'model5_lgbm': 0.18360313996317917, 'model1_rf': 0.17476671911282482, 'model2_lr': 0.1659302982624705, 'model_baseline': 0.16151208783729332, 'model4_xgb': 0.13058461486105313}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.084,250,0.277944,21.0,0.268,250,0.443806,67.0,0.166224,0.166224,0.183603
4,model5_lgbm,0.084,250,0.277944,21.0,0.18,250,0.384958,45.0,0.166224,0.166224,0.183603
0,model1_rf,0.076,250,0.26553,19.0,0.208,250,0.406691,52.0,0.158224,0.158224,0.174767
1,model2_lr,0.068,250,0.252251,17.0,0.204,250,0.403777,51.0,0.150224,0.150224,0.16593
5,model_baseline,0.064,250,0.245244,16.0,0.196,250,0.397765,49.0,0.146224,0.146224,0.161512
3,model4_xgb,0.036,250,0.186664,9.0,0.068,250,0.252251,17.0,0.118224,0.118224,0.130585


Allocations after round 8 :
 {'model5_lgbm': 0.18476010320867872, 'model1_rf': 0.18058469477590672, 'model3_lr': 0.18058469477590672, 'model2_lr': 0.1764092863431347, 'model_baseline': 0.14718142731373057, 'model4_xgb': 0.1304797935826425}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.092,250,0.289606,23.0,0.16,250,0.367341,40.0,0.176998,0.176998,0.18476
0,model1_rf,0.088,250,0.283863,22.0,0.172,250,0.378137,43.0,0.172998,0.172998,0.180585
2,model3_lr,0.088,250,0.283863,22.0,0.296,250,0.457407,74.0,0.172998,0.172998,0.180585
1,model2_lr,0.084,250,0.277944,21.0,0.192,250,0.394663,48.0,0.168998,0.168998,0.176409
5,model_baseline,0.056,250,0.230383,14.0,0.276,250,0.447914,69.0,0.140998,0.140998,0.147181
3,model4_xgb,0.04,250,0.196352,10.0,0.064,250,0.245244,16.0,0.124998,0.124998,0.13048


Allocations after round 9 :
 {'model5_lgbm': 0.18883884403908824, 'model2_lr': 0.1848075390622843, 'model3_lr': 0.17271362413187255, 'model1_rf': 0.16465101417826472, 'model4_xgb': 0.14449448929424513, 'model_baseline': 0.14449448929424513}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.1,250,0.300602,25.0,0.184,250,0.388261,46.0,0.187372,0.187372,0.188839
1,model2_lr,0.096,250,0.295182,24.0,0.244,250,0.430354,61.0,0.183372,0.183372,0.184808
2,model3_lr,0.084,250,0.277944,21.0,0.324,250,0.468939,81.0,0.171372,0.171372,0.172714
0,model1_rf,0.076,250,0.26553,19.0,0.24,250,0.42794,60.0,0.163372,0.163372,0.164651
3,model4_xgb,0.056,250,0.230383,14.0,0.084,250,0.277944,21.0,0.143372,0.143372,0.144494
5,model_baseline,0.056,250,0.230383,14.0,0.28,250,0.4499,70.0,0.143372,0.143372,0.144494


Allocations after round 10 :
 {'model5_lgbm': 0.19880727958314137, 'model3_lr': 0.1944244687308948, 'model1_rf': 0.15936198191292242, 'model4_xgb': 0.15936198191292242, 'model2_lr': 0.14621354935618275, 'model_baseline': 0.1418307385039362}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.092,250,0.289606,23.0,0.108,250,0.311003,27.0,0.181443,0.181443,0.198807
2,model3_lr,0.088,250,0.283863,22.0,0.228,250,0.420384,57.0,0.177443,0.177443,0.194424
0,model1_rf,0.056,250,0.230383,14.0,0.196,250,0.397765,49.0,0.145443,0.145443,0.159362
3,model4_xgb,0.056,250,0.230383,14.0,0.06,250,0.237963,15.0,0.145443,0.145443,0.159362
1,model2_lr,0.044,250,0.205507,11.0,0.184,250,0.388261,46.0,0.133443,0.133443,0.146214
5,model_baseline,0.04,250,0.196352,10.0,0.204,250,0.403777,51.0,0.129443,0.129443,0.141831


Allocations after round 11 :
 {'model3_lr': 0.1875948193304116, 'model5_lgbm': 0.1875948193304116, 'model1_rf': 0.17503792773216467, 'model2_lr': 0.1624810361339177, 'model_baseline': 0.14573851400292173, 'model4_xgb': 0.14155288347017272}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.088,250,0.283863,22.0,0.264,250,0.441684,66.0,0.179275,0.179275,0.187595
4,model5_lgbm,0.088,250,0.283863,22.0,0.156,250,0.363583,39.0,0.179275,0.179275,0.187595
0,model1_rf,0.076,250,0.26553,19.0,0.192,250,0.394663,48.0,0.167275,0.167275,0.175038
1,model2_lr,0.064,250,0.245244,16.0,0.196,250,0.397765,49.0,0.155275,0.155275,0.162481
5,model_baseline,0.048,250,0.214195,12.0,0.208,250,0.406691,52.0,0.139275,0.139275,0.145739
3,model4_xgb,0.044,250,0.205507,11.0,0.064,250,0.245244,16.0,0.135275,0.135275,0.141553


Allocations after round 12 :
 {'model3_lr': 0.1932651821964211, 'model5_lgbm': 0.1932651821964211, 'model1_rf': 0.16462062701053173, 'model_baseline': 0.15234438907372197, 'model2_lr': 0.14825230976145204, 'model4_xgb': 0.14825230976145204}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.096,250,0.295182,24.0,0.252,250,0.435032,63.0,0.188916,0.188916,0.193265
4,model5_lgbm,0.096,250,0.295182,24.0,0.132,250,0.33917,33.0,0.188916,0.188916,0.193265
0,model1_rf,0.068,250,0.252251,17.0,0.188,250,0.391496,47.0,0.160916,0.160916,0.164621
5,model_baseline,0.056,250,0.230383,14.0,0.212,250,0.409545,53.0,0.148916,0.148916,0.152344
1,model2_lr,0.052,250,0.222472,13.0,0.164,250,0.371018,41.0,0.144916,0.144916,0.148252
3,model4_xgb,0.052,250,0.222472,13.0,0.04,250,0.196352,10.0,0.144916,0.144916,0.148252


Allocations after round 13 :
 {'model3_lr': 0.1852145169728598, 'model5_lgbm': 0.1852145169728598, 'model1_rf': 0.1653418202162243, 'model2_lr': 0.1613672808648972, 'model4_xgb': 0.1573927415135701, 'model_baseline': 0.14546912345958884}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.092,250,0.289606,23.0,0.288,250,0.453739,72.0,0.186401,0.186401,0.185215
4,model5_lgbm,0.092,250,0.289606,23.0,0.188,250,0.391496,47.0,0.186401,0.186401,0.185215
0,model1_rf,0.072,250,0.259006,18.0,0.232,250,0.422956,58.0,0.166401,0.166401,0.165342
1,model2_lr,0.068,250,0.252251,17.0,0.228,250,0.420384,57.0,0.162401,0.162401,0.161367
3,model4_xgb,0.064,250,0.245244,16.0,0.084,250,0.277944,21.0,0.158401,0.158401,0.157393
5,model_baseline,0.052,250,0.222472,13.0,0.224,250,0.417758,56.0,0.146401,0.146401,0.145469


Allocations after round 14 :
 {'model5_lgbm': 0.188067795729832, 'model1_rf': 0.17094689247929976, 'model3_lr': 0.17094689247929976, 'model_baseline': 0.16666666666666669, 'model2_lr': 0.16238644085403361, 'model4_xgb': 0.14098531179086826}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.08,250,0.271837,20.0,0.132,250,0.33917,33.0,0.175755,0.175755,0.188068
0,model1_rf,0.064,250,0.245244,16.0,0.204,250,0.403777,51.0,0.159755,0.159755,0.170947
2,model3_lr,0.064,250,0.245244,16.0,0.248,250,0.432718,62.0,0.159755,0.159755,0.170947
5,model_baseline,0.06,250,0.237963,15.0,0.16,250,0.367341,40.0,0.155755,0.155755,0.166667
1,model2_lr,0.056,250,0.230383,14.0,0.204,250,0.403777,51.0,0.151755,0.151755,0.162386
3,model4_xgb,0.036,250,0.186664,9.0,0.096,250,0.295182,24.0,0.131755,0.131755,0.140985


Allocations after round 15 :
 {'model5_lgbm': 0.1890596803688816, 'model1_rf': 0.18522087801993048, 'model3_lr': 0.16986566862412592, 'model2_lr': 0.1660268662751748, 'model_baseline': 0.15067165687937029, 'model4_xgb': 0.1391552498325169}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.1,250,0.300602,25.0,0.18,250,0.384958,45.0,0.196999,0.196999,0.18906
0,model1_rf,0.096,250,0.295182,24.0,0.216,250,0.41234,54.0,0.192999,0.192999,0.185221
2,model3_lr,0.08,250,0.271837,20.0,0.304,250,0.460905,76.0,0.176999,0.176999,0.169866
1,model2_lr,0.076,250,0.26553,19.0,0.232,250,0.422956,58.0,0.172999,0.172999,0.166027
5,model_baseline,0.06,250,0.237963,15.0,0.256,250,0.437297,64.0,0.156999,0.156999,0.150672
3,model4_xgb,0.048,250,0.214195,12.0,0.084,250,0.277944,21.0,0.144999,0.144999,0.139155


Allocations after round 16 :
 {'model1_rf': 0.1869901537940228, 'model5_lgbm': 0.1869901537940228, 'model3_lr': 0.17125584117929546, 'model4_xgb': 0.1633886848719318, 'model2_lr': 0.15945510671824997, 'model_baseline': 0.13192005964247713}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
0,model1_rf,0.092,250,0.289606,23.0,0.196,250,0.397765,49.0,0.190148,0.190148,0.18699
4,model5_lgbm,0.092,250,0.289606,23.0,0.168,250,0.374616,42.0,0.190148,0.190148,0.18699
2,model3_lr,0.076,250,0.26553,19.0,0.308,250,0.462593,77.0,0.174148,0.174148,0.171256
3,model4_xgb,0.068,250,0.252251,17.0,0.064,250,0.245244,16.0,0.166148,0.166148,0.163389
1,model2_lr,0.064,250,0.245244,16.0,0.224,250,0.417758,56.0,0.162148,0.162148,0.159455
5,model_baseline,0.036,250,0.186664,9.0,0.236,250,0.425474,59.0,0.134148,0.134148,0.13192


Allocations after round 17 :
 {'model5_lgbm': 0.18640015165817594, 'model2_lr': 0.18258076746627092, 'model1_rf': 0.17494199908246091, 'model3_lr': 0.17494199908246091, 'model4_xgb': 0.14820630973912574, 'model_baseline': 0.13292877297150568}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.096,250,0.295182,24.0,0.144,250,0.351794,36.0,0.195215,0.195215,0.1864
1,model2_lr,0.092,250,0.289606,23.0,0.232,250,0.422956,58.0,0.191215,0.191215,0.182581
0,model1_rf,0.084,250,0.277944,21.0,0.212,250,0.409545,53.0,0.183215,0.183215,0.174942
2,model3_lr,0.084,250,0.277944,21.0,0.276,250,0.447914,69.0,0.183215,0.183215,0.174942
3,model4_xgb,0.056,250,0.230383,14.0,0.06,250,0.237963,15.0,0.155215,0.155215,0.148206
5,model_baseline,0.04,250,0.196352,10.0,0.224,250,0.417758,56.0,0.139215,0.139215,0.132929


Allocations after round 18 :
 {'model3_lr': 0.1877139796265939, 'model2_lr': 0.18006041127752945, 'model5_lgbm': 0.17623362710299723, 'model1_rf': 0.15709970623033612, 'model4_xgb': 0.15709970623033612, 'model_baseline': 0.14179256953220726}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.096,250,0.295182,24.0,0.252,250,0.435032,63.0,0.196211,0.196211,0.187714
1,model2_lr,0.088,250,0.283863,22.0,0.204,250,0.403777,51.0,0.188211,0.188211,0.18006
4,model5_lgbm,0.084,250,0.277944,21.0,0.164,250,0.371018,41.0,0.184211,0.184211,0.176234
0,model1_rf,0.064,250,0.245244,16.0,0.216,250,0.41234,54.0,0.164211,0.164211,0.1571
3,model4_xgb,0.064,250,0.245244,16.0,0.072,250,0.259006,18.0,0.164211,0.164211,0.1571
5,model_baseline,0.048,250,0.214195,12.0,0.26,250,0.439514,65.0,0.148211,0.148211,0.141793


Allocations after round 19 :
 {'model5_lgbm': 0.1837954679271963, 'model1_rf': 0.17998906764707862, 'model_baseline': 0.17618266736696092, 'model2_lr': 0.1685698668067255, 'model3_lr': 0.14953786540613706, 'model4_xgb': 0.14192506484590164}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.092,250,0.289606,23.0,0.176,250,0.381584,44.0,0.193144,0.193144,0.183795
0,model1_rf,0.088,250,0.283863,22.0,0.22,250,0.415077,55.0,0.189144,0.189144,0.179989
5,model_baseline,0.084,250,0.277944,21.0,0.224,250,0.417758,56.0,0.185144,0.185144,0.176183
1,model2_lr,0.076,250,0.26553,19.0,0.204,250,0.403777,51.0,0.177144,0.177144,0.16857
2,model3_lr,0.056,250,0.230383,14.0,0.296,250,0.457407,74.0,0.157144,0.157144,0.149538
3,model4_xgb,0.048,250,0.214195,12.0,0.056,250,0.230383,14.0,0.149144,0.149144,0.141925


Allocations after round 20 :
 {'model3_lr': 0.1877444385050537, 'model5_lgbm': 0.18379235628535612, 'model_baseline': 0.16798402740656584, 'model2_lr': 0.16403194518686828, 'model1_rf': 0.15217569852777554, 'model4_xgb': 0.14427153408838042}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.088,250,0.283863,22.0,0.272,250,0.445883,68.0,0.190021,0.190021,0.187744
4,model5_lgbm,0.084,250,0.277944,21.0,0.152,250,0.359741,38.0,0.186021,0.186021,0.183792
5,model_baseline,0.068,250,0.252251,17.0,0.232,250,0.422956,58.0,0.170021,0.170021,0.167984
1,model2_lr,0.064,250,0.245244,16.0,0.224,250,0.417758,56.0,0.166021,0.166021,0.164032
0,model1_rf,0.052,250,0.222472,13.0,0.22,250,0.415077,55.0,0.154021,0.154021,0.152176
3,model4_xgb,0.044,250,0.205507,11.0,0.068,250,0.252251,17.0,0.146021,0.146021,0.144272


Allocations after round 21 :
 {'model5_lgbm': 0.18298910515672637, 'model3_lr': 0.17123694944388337, 'model_baseline': 0.17123694944388337, 'model1_rf': 0.16731956420626906, 'model2_lr': 0.16340217896865475, 'model4_xgb': 0.1438152527805831}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.084,250,0.277944,21.0,0.14,250,0.347683,35.0,0.186848,0.186848,0.182989
2,model3_lr,0.072,250,0.259006,18.0,0.248,250,0.432718,62.0,0.174848,0.174848,0.171237
5,model_baseline,0.072,250,0.259006,18.0,0.212,250,0.409545,53.0,0.174848,0.174848,0.171237
0,model1_rf,0.068,250,0.252251,17.0,0.188,250,0.391496,47.0,0.170848,0.170848,0.16732
1,model2_lr,0.064,250,0.245244,16.0,0.192,250,0.394663,48.0,0.166848,0.166848,0.163402
3,model4_xgb,0.044,250,0.205507,11.0,0.052,250,0.222472,13.0,0.146848,0.146848,0.143815


Allocations after round 22 :
 {'model5_lgbm': 0.18655161392438183, 'model3_lr': 0.1706436561182097, 'model4_xgb': 0.16666666666666666, 'model_baseline': 0.16666666666666666, 'model1_rf': 0.15473569831203754, 'model2_lr': 0.15473569831203754}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.084,250,0.277944,21.0,0.124,250,0.330243,31.0,0.187631,0.187631,0.186552
2,model3_lr,0.068,250,0.252251,17.0,0.256,250,0.437297,64.0,0.171631,0.171631,0.170644
3,model4_xgb,0.064,250,0.245244,16.0,0.072,250,0.259006,18.0,0.167631,0.167631,0.166667
5,model_baseline,0.064,250,0.245244,16.0,0.236,250,0.425474,59.0,0.167631,0.167631,0.166667
0,model1_rf,0.052,250,0.222472,13.0,0.204,250,0.403777,51.0,0.155631,0.155631,0.154736
1,model2_lr,0.052,250,0.222472,13.0,0.18,250,0.384958,45.0,0.155631,0.155631,0.154736


Allocations after round 23 :
 {'model5_lgbm': 0.18427504124418717, 'model3_lr': 0.1803620691158493, 'model4_xgb': 0.17644909698751138, 'model_baseline': 0.16079720847415982, 'model1_rf': 0.14905829208914612, 'model2_lr': 0.14905829208914612}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.084,250,0.277944,21.0,0.152,250,0.359741,38.0,0.188373,0.188373,0.184275
2,model3_lr,0.08,250,0.271837,20.0,0.272,250,0.445883,68.0,0.184373,0.184373,0.180362
3,model4_xgb,0.076,250,0.26553,19.0,0.064,250,0.245244,16.0,0.180373,0.180373,0.176449
5,model_baseline,0.06,250,0.237963,15.0,0.228,250,0.420384,57.0,0.164373,0.164373,0.160797
0,model1_rf,0.048,250,0.214195,12.0,0.236,250,0.425474,59.0,0.152373,0.152373,0.149058
1,model2_lr,0.048,250,0.214195,12.0,0.252,250,0.435032,63.0,0.152373,0.152373,0.149058


Allocations after round 24 :
 {'model3_lr': 0.18305299549006107, 'model_baseline': 0.1794115890848623, 'model5_lgbm': 0.17577018267966354, 'model1_rf': 0.16484596346406727, 'model4_xgb': 0.15392174424847102, 'model2_lr': 0.14299752503287472}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.096,250,0.295182,24.0,0.316,250,0.465846,79.0,0.201079,0.201079,0.183053
5,model_baseline,0.092,250,0.289606,23.0,0.228,250,0.420384,57.0,0.197079,0.197079,0.179412
4,model5_lgbm,0.088,250,0.283863,22.0,0.188,250,0.391496,47.0,0.193079,0.193079,0.17577
0,model1_rf,0.076,250,0.26553,19.0,0.216,250,0.41234,54.0,0.181079,0.181079,0.164846
3,model4_xgb,0.064,250,0.245244,16.0,0.064,250,0.245244,16.0,0.169079,0.169079,0.153922
1,model2_lr,0.052,250,0.222472,13.0,0.22,250,0.415077,55.0,0.157079,0.157079,0.142998


Allocations after round 25 :
 {'model3_lr': 0.1813572580942397, 'model1_rf': 0.17783151615162215, 'model5_lgbm': 0.17430577420900464, 'model2_lr': 0.1672542903237696, 'model_baseline': 0.16020280643853455, 'model4_xgb': 0.1390483547828294}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.1,250,0.300602,25.0,0.296,250,0.457407,74.0,0.205752,0.205752,0.181357
0,model1_rf,0.096,250,0.295182,24.0,0.228,250,0.420384,57.0,0.201752,0.201752,0.177832
4,model5_lgbm,0.092,250,0.289606,23.0,0.156,250,0.363583,39.0,0.197752,0.197752,0.174306
1,model2_lr,0.084,250,0.277944,21.0,0.244,250,0.430354,61.0,0.189752,0.189752,0.167254
5,model_baseline,0.076,250,0.26553,19.0,0.264,250,0.441684,66.0,0.181752,0.181752,0.160203
3,model4_xgb,0.052,250,0.222472,13.0,0.08,250,0.271837,20.0,0.157752,0.157752,0.139048


Allocations after round 26 :
 {'model3_lr': 0.18816455913876953, 'model1_rf': 0.18437081340839842, 'model5_lgbm': 0.18437081340839842, 'model2_lr': 0.15402084756542972, 'model4_xgb': 0.15022710183505864, 'model_baseline': 0.13884586464394535}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.092,250,0.289606,23.0,0.264,250,0.441684,66.0,0.198394,0.198394,0.188165
0,model1_rf,0.088,250,0.283863,22.0,0.176,250,0.381584,44.0,0.194394,0.194394,0.184371
4,model5_lgbm,0.088,250,0.283863,22.0,0.132,250,0.33917,33.0,0.194394,0.194394,0.184371
1,model2_lr,0.056,250,0.230383,14.0,0.18,250,0.384958,45.0,0.162394,0.162394,0.154021
3,model4_xgb,0.052,250,0.222472,13.0,0.04,250,0.196352,10.0,0.158394,0.158394,0.150227
5,model_baseline,0.04,250,0.196352,10.0,0.22,250,0.415077,55.0,0.146394,0.146394,0.138846


Allocations after round 27 :
 {'model3_lr': 0.19097763732296727, 'model1_rf': 0.16794619143805092, 'model_baseline': 0.16794619143805092, 'model5_lgbm': 0.16410761712389818, 'model2_lr': 0.16026904280974544, 'model4_xgb': 0.14875331986728724}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.092,250,0.289606,23.0,0.244,250,0.430354,61.0,0.199009,0.199009,0.190978
0,model1_rf,0.068,250,0.252251,17.0,0.208,250,0.406691,52.0,0.175009,0.175009,0.167946
5,model_baseline,0.068,250,0.252251,17.0,0.224,250,0.417758,56.0,0.175009,0.175009,0.167946
4,model5_lgbm,0.064,250,0.245244,16.0,0.16,250,0.367341,40.0,0.171009,0.171009,0.164108
1,model2_lr,0.06,250,0.237963,15.0,0.228,250,0.420384,57.0,0.167009,0.167009,0.160269
3,model4_xgb,0.048,250,0.214195,12.0,0.08,250,0.271837,20.0,0.155009,0.155009,0.148753


Allocations after round 28 :
 {'model4_xgb': 0.1910663825606386, 'model5_lgbm': 0.18731258011541213, 'model3_lr': 0.16478976544405347, 'model1_rf': 0.16103596299882703, 'model_baseline': 0.1497745556631477, 'model2_lr': 0.14602075321792127}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.096,250,0.295182,24.0,0.036,250,0.186664,9.0,0.203598,0.203598,0.191066
4,model5_lgbm,0.092,250,0.289606,23.0,0.172,250,0.378137,43.0,0.199598,0.199598,0.187313
2,model3_lr,0.068,250,0.252251,17.0,0.308,250,0.462593,77.0,0.175598,0.175598,0.16479
0,model1_rf,0.064,250,0.245244,16.0,0.256,250,0.437297,64.0,0.171598,0.171598,0.161036
5,model_baseline,0.052,250,0.222472,13.0,0.232,250,0.422956,58.0,0.159598,0.159598,0.149775
1,model2_lr,0.048,250,0.214195,12.0,0.26,250,0.439514,65.0,0.155598,0.155598,0.146021


Allocations after round 29 :
 {'model4_xgb': 0.18976044405778048, 'model3_lr': 0.1824676722500603, 'model2_lr': 0.16788212863462004, 'model1_rf': 0.15694297092303983, 'model_baseline': 0.15694297092303983, 'model5_lgbm': 0.1460038132114596}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.1,250,0.300602,25.0,0.084,250,0.277944,21.0,0.208163,0.208163,0.18976
2,model3_lr,0.092,250,0.289606,23.0,0.3,250,0.459177,75.0,0.200163,0.200163,0.182468
1,model2_lr,0.076,250,0.26553,19.0,0.216,250,0.41234,54.0,0.184163,0.184163,0.167882
0,model1_rf,0.064,250,0.245244,16.0,0.22,250,0.415077,55.0,0.172163,0.172163,0.156943
5,model_baseline,0.064,250,0.245244,16.0,0.28,250,0.4499,70.0,0.172163,0.172163,0.156943
4,model5_lgbm,0.052,250,0.222472,13.0,0.184,250,0.388261,46.0,0.160163,0.160163,0.146004


Allocations after round 30 :
 {'model3_lr': 0.18443243190096878, 'model1_rf': 0.17340540520381575, 'model5_lgbm': 0.17340540520381575, 'model2_lr': 0.16972972963809807, 'model4_xgb': 0.15502702737522736, 'model_baseline': 0.14400000067807434}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.092,250,0.289606,23.0,0.284,250,0.451841,71.0,0.200706,0.200706,0.184432
0,model1_rf,0.08,250,0.271837,20.0,0.184,250,0.388261,46.0,0.188706,0.188706,0.173405
4,model5_lgbm,0.08,250,0.271837,20.0,0.192,250,0.394663,48.0,0.188706,0.188706,0.173405
1,model2_lr,0.076,250,0.26553,19.0,0.232,250,0.422956,58.0,0.184706,0.184706,0.16973
3,model4_xgb,0.06,250,0.237963,15.0,0.076,250,0.26553,19.0,0.168706,0.168706,0.155027
5,model_baseline,0.048,250,0.214195,12.0,0.24,250,0.42794,60.0,0.156706,0.156706,0.144


Allocations after round 31 :
 {'model5_lgbm': 0.1857429127717924, 'model1_rf': 0.1820507361062842, 'model2_lr': 0.17097420610975958, 'model3_lr': 0.16358985277874316, 'model4_xgb': 0.15620549944772674, 'model_baseline': 0.1414367927856939}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.092,250,0.289606,23.0,0.144,250,0.351794,36.0,0.201229,0.201229,0.185743
0,model1_rf,0.088,250,0.283863,22.0,0.196,250,0.397765,49.0,0.197229,0.197229,0.182051
1,model2_lr,0.076,250,0.26553,19.0,0.2,250,0.400802,50.0,0.185229,0.185229,0.170974
2,model3_lr,0.068,250,0.252251,17.0,0.28,250,0.4499,70.0,0.177229,0.177229,0.16359
3,model4_xgb,0.06,250,0.237963,15.0,0.072,250,0.259006,18.0,0.169229,0.169229,0.156205
5,model_baseline,0.044,250,0.205507,11.0,0.24,250,0.42794,60.0,0.153229,0.153229,0.141437


Allocations after round 32 :
 {'model5_lgbm': 0.18585326234127758, 'model2_lr': 0.1781786240714332, 'model1_rf': 0.174341304936511, 'model3_lr': 0.16666666666666666, 'model_baseline': 0.1513173901269779, 'model4_xgb': 0.14364275185713357}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.084,250,0.277944,21.0,0.136,250,0.343476,34.0,0.193732,0.193732,0.185853
1,model2_lr,0.076,250,0.26553,19.0,0.188,250,0.391496,47.0,0.185732,0.185732,0.178179
0,model1_rf,0.072,250,0.259006,18.0,0.192,250,0.394663,48.0,0.181732,0.181732,0.174341
2,model3_lr,0.064,250,0.245244,16.0,0.236,250,0.425474,59.0,0.173732,0.173732,0.166667
5,model_baseline,0.048,250,0.214195,12.0,0.232,250,0.422956,58.0,0.157732,0.157732,0.151317
3,model4_xgb,0.04,250,0.196352,10.0,0.036,250,0.186664,9.0,0.149732,0.149732,0.143643


Allocations after round 33 :
 {'model1_rf': 0.1888228031098067, 'model3_lr': 0.1888228031098067, 'model5_lgbm': 0.16367259417435046, 'model2_lr': 0.15289393320201206, 'model4_xgb': 0.15289393320201206, 'model_baseline': 0.15289393320201206}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
0,model1_rf,0.1,250,0.300602,25.0,0.24,250,0.42794,60.0,0.210218,0.210218,0.188823
2,model3_lr,0.1,250,0.300602,25.0,0.296,250,0.457407,74.0,0.210218,0.210218,0.188823
4,model5_lgbm,0.072,250,0.259006,18.0,0.204,250,0.403777,51.0,0.182218,0.182218,0.163673
1,model2_lr,0.06,250,0.237963,15.0,0.236,250,0.425474,59.0,0.170218,0.170218,0.152894
3,model4_xgb,0.06,250,0.237963,15.0,0.072,250,0.259006,18.0,0.170218,0.170218,0.152894
5,model_baseline,0.06,250,0.237963,15.0,0.248,250,0.432718,62.0,0.170218,0.170218,0.152894


Allocations after round 34 :
 {'model1_rf': 0.18160754745424157, 'model5_lgbm': 0.17126386075515124, 'model_baseline': 0.16781596518878783, 'model2_lr': 0.16092017405606096, 'model3_lr': 0.16092017405606096, 'model4_xgb': 0.15747227848969753}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
0,model1_rf,0.1,250,0.300602,25.0,0.204,250,0.403777,51.0,0.210688,0.210688,0.181608
4,model5_lgbm,0.088,250,0.283863,22.0,0.18,250,0.384958,45.0,0.198688,0.198688,0.171264
5,model_baseline,0.084,250,0.277944,21.0,0.26,250,0.439514,65.0,0.194688,0.194688,0.167816
1,model2_lr,0.076,250,0.26553,19.0,0.248,250,0.432718,62.0,0.186688,0.186688,0.16092
2,model3_lr,0.076,250,0.26553,19.0,0.316,250,0.465846,79.0,0.186688,0.186688,0.16092
3,model4_xgb,0.072,250,0.259006,18.0,0.064,250,0.245244,16.0,0.182688,0.182688,0.157472


Allocations after round 35 :
 {'model3_lr': 0.18554288607298813, 'model5_lgbm': 0.18554288607298813, 'model1_rf': 0.1672755769700964, 'model2_lr': 0.16362211514951805, 'model4_xgb': 0.15631519150836135, 'model_baseline': 0.141701344226048}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.092,250,0.289606,23.0,0.228,250,0.420384,57.0,0.203142,0.203142,0.185543
4,model5_lgbm,0.092,250,0.289606,23.0,0.132,250,0.33917,33.0,0.203142,0.203142,0.185543
0,model1_rf,0.072,250,0.259006,18.0,0.192,250,0.394663,48.0,0.183142,0.183142,0.167276
1,model2_lr,0.068,250,0.252251,17.0,0.16,250,0.367341,40.0,0.179142,0.179142,0.163622
3,model4_xgb,0.06,250,0.237963,15.0,0.056,250,0.230383,14.0,0.171142,0.171142,0.156315
5,model_baseline,0.044,250,0.205507,11.0,0.232,250,0.422956,58.0,0.155142,0.155142,0.141701


Allocations after round 36 :
 {'model1_rf': 0.18425203206551458, 'model3_lr': 0.18425203206551458, 'model5_lgbm': 0.17294715430911234, 'model2_lr': 0.1541056913817753, 'model4_xgb': 0.1541056913817753, 'model_baseline': 0.15033739879630786}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
0,model1_rf,0.084,250,0.277944,21.0,0.192,250,0.394663,48.0,0.195581,0.195581,0.184252
2,model3_lr,0.084,250,0.277944,21.0,0.28,250,0.4499,70.0,0.195581,0.195581,0.184252
4,model5_lgbm,0.072,250,0.259006,18.0,0.192,250,0.394663,48.0,0.183581,0.183581,0.172947
1,model2_lr,0.052,250,0.222472,13.0,0.18,250,0.384958,45.0,0.163581,0.163581,0.154106
3,model4_xgb,0.052,250,0.222472,13.0,0.088,250,0.283863,22.0,0.163581,0.163581,0.154106
5,model_baseline,0.048,250,0.214195,12.0,0.228,250,0.420384,57.0,0.159581,0.159581,0.150337


Allocations after round 37 :
 {'model3_lr': 0.1934296014296626, 'model5_lgbm': 0.17883163701348298, 'model1_rf': 0.1642336725973034, 'model2_lr': 0.1569346903892136, 'model_baseline': 0.1569346903892136, 'model4_xgb': 0.1496357081811238}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.1,250,0.300602,25.0,0.272,250,0.445883,68.0,0.212007,0.212007,0.19343
4,model5_lgbm,0.084,250,0.277944,21.0,0.16,250,0.367341,40.0,0.196007,0.196007,0.178832
0,model1_rf,0.068,250,0.252251,17.0,0.22,250,0.415077,55.0,0.180007,0.180007,0.164234
1,model2_lr,0.06,250,0.237963,15.0,0.212,250,0.409545,53.0,0.172007,0.172007,0.156935
5,model_baseline,0.06,250,0.237963,15.0,0.248,250,0.432718,62.0,0.172007,0.172007,0.156935
3,model4_xgb,0.052,250,0.222472,13.0,0.068,250,0.252251,17.0,0.164007,0.164007,0.149636


Allocations after round 38 :
 {'model3_lr': 0.18721742070634795, 'model1_rf': 0.1722714177683979, 'model2_lr': 0.1647984162994229, 'model4_xgb': 0.1610619155649354, 'model5_lgbm': 0.1610619155649354, 'model_baseline': 0.15358891409596043}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.088,250,0.283863,22.0,0.276,250,0.447914,69.0,0.20042,0.20042,0.187217
0,model1_rf,0.072,250,0.259006,18.0,0.168,250,0.374616,42.0,0.18442,0.18442,0.172271
1,model2_lr,0.064,250,0.245244,16.0,0.216,250,0.41234,54.0,0.17642,0.17642,0.164798
3,model4_xgb,0.06,250,0.237963,15.0,0.052,250,0.222472,13.0,0.17242,0.17242,0.161062
4,model5_lgbm,0.06,250,0.237963,15.0,0.144,250,0.351794,36.0,0.17242,0.17242,0.161062
5,model_baseline,0.052,250,0.222472,13.0,0.22,250,0.415077,55.0,0.16442,0.16442,0.153589


Allocations after round 39 :
 {'model3_lr': 0.18459209798708104, 'model4_xgb': 0.18459209798708104, 'model5_lgbm': 0.17418378302684043, 'model2_lr': 0.16724490638668002, 'model_baseline': 0.14989771478627903, 'model1_rf': 0.13948939982603847}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.1,250,0.300602,25.0,0.332,250,0.471876,83.0,0.212821,0.212821,0.184592
3,model4_xgb,0.1,250,0.300602,25.0,0.04,250,0.196352,10.0,0.212821,0.212821,0.184592
4,model5_lgbm,0.088,250,0.283863,22.0,0.172,250,0.378137,43.0,0.200821,0.200821,0.174184
1,model2_lr,0.08,250,0.271837,20.0,0.272,250,0.445883,68.0,0.192821,0.192821,0.167245
5,model_baseline,0.06,250,0.237963,15.0,0.312,250,0.464239,78.0,0.172821,0.172821,0.149898
0,model1_rf,0.048,250,0.214195,12.0,0.296,250,0.457407,74.0,0.160821,0.160821,0.139489


Allocations after round 40 :
 {'model3_lr': 0.18643330861890364, 'model1_rf': 0.1716083271547259, 'model5_lgbm': 0.16790208178868146, 'model4_xgb': 0.16419583642263702, 'model2_lr': 0.16048959105659258, 'model_baseline': 0.14937085495845925}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.088,250,0.283863,22.0,0.248,250,0.432718,62.0,0.20121,0.20121,0.186433
0,model1_rf,0.072,250,0.259006,18.0,0.208,250,0.406691,52.0,0.18521,0.18521,0.171608
4,model5_lgbm,0.068,250,0.252251,17.0,0.132,250,0.33917,33.0,0.18121,0.18121,0.167902
3,model4_xgb,0.064,250,0.245244,16.0,0.06,250,0.237963,15.0,0.17721,0.17721,0.164196
1,model2_lr,0.06,250,0.237963,15.0,0.224,250,0.417758,56.0,0.17321,0.17321,0.16049
5,model_baseline,0.048,250,0.214195,12.0,0.244,250,0.430354,61.0,0.16121,0.16121,0.149371


Allocations after round 41 :
 {'model3_lr': 0.18570500377720198, 'model1_rf': 0.17096564601420688, 'model4_xgb': 0.16359596713270935, 'model5_lgbm': 0.16359596713270935, 'model2_lr': 0.1599111276919606, 'model_baseline': 0.15622628825121182}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.088,250,0.283863,22.0,0.228,250,0.420384,57.0,0.201588,0.201588,0.185705
0,model1_rf,0.072,250,0.259006,18.0,0.172,250,0.378137,43.0,0.185588,0.185588,0.170966
3,model4_xgb,0.064,250,0.245244,16.0,0.044,250,0.205507,11.0,0.177588,0.177588,0.163596
4,model5_lgbm,0.064,250,0.245244,16.0,0.152,250,0.359741,38.0,0.177588,0.177588,0.163596
1,model2_lr,0.06,250,0.237963,15.0,0.196,250,0.397765,49.0,0.173588,0.173588,0.159911
5,model_baseline,0.056,250,0.230383,14.0,0.236,250,0.425474,59.0,0.169588,0.169588,0.156226


Allocations after round 42 :
 {'model3_lr': 0.20324607327230115, 'model5_lgbm': 0.17086430349026407, 'model1_rf': 0.1636683546498114, 'model2_lr': 0.1636683546498114, 'model4_xgb': 0.1528744313891324, 'model_baseline': 0.1456784825486797}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.112,250,0.315999,28.0,0.268,250,0.443806,67.0,0.225956,0.225956,0.203246
4,model5_lgbm,0.076,250,0.26553,19.0,0.164,250,0.371018,41.0,0.189956,0.189956,0.170864
0,model1_rf,0.068,250,0.252251,17.0,0.216,250,0.41234,54.0,0.181956,0.181956,0.163668
1,model2_lr,0.068,250,0.252251,17.0,0.196,250,0.397765,49.0,0.181956,0.181956,0.163668
3,model4_xgb,0.056,250,0.230383,14.0,0.056,250,0.230383,14.0,0.169956,0.169956,0.152874
5,model_baseline,0.048,250,0.214195,12.0,0.244,250,0.430354,61.0,0.161956,0.161956,0.145678


Allocations after round 43 :
 {'model5_lgbm': 0.18967784794607737, 'model3_lr': 0.17197693926960758, 'model2_lr': 0.1648965757990197, 'model1_rf': 0.15781621232843182, 'model4_xgb': 0.15781621232843182, 'model_baseline': 0.15781621232843182}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.1,250,0.300602,25.0,0.164,250,0.371018,41.0,0.214314,0.214314,0.189678
2,model3_lr,0.08,250,0.271837,20.0,0.308,250,0.462593,77.0,0.194314,0.194314,0.171977
1,model2_lr,0.072,250,0.259006,18.0,0.208,250,0.406691,52.0,0.186314,0.186314,0.164897
0,model1_rf,0.064,250,0.245244,16.0,0.204,250,0.403777,51.0,0.178314,0.178314,0.157816
3,model4_xgb,0.064,250,0.245244,16.0,0.076,250,0.26553,19.0,0.178314,0.178314,0.157816
5,model_baseline,0.064,250,0.245244,16.0,0.284,250,0.451841,71.0,0.178314,0.178314,0.157816


Allocations after round 44 :
 {'model5_lgbm': 0.1802471843501859, 'model2_lr': 0.16913585170003378, 'model1_rf': 0.16543207414998307, 'model3_lr': 0.16543207414998307, 'model_baseline': 0.16172829659993238, 'model4_xgb': 0.15802451904988168}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.08,250,0.271837,20.0,0.148,250,0.355812,37.0,0.194663,0.194663,0.180247
1,model2_lr,0.068,250,0.252251,17.0,0.204,250,0.403777,51.0,0.182663,0.182663,0.169136
0,model1_rf,0.064,250,0.245244,16.0,0.208,250,0.406691,52.0,0.178663,0.178663,0.165432
2,model3_lr,0.064,250,0.245244,16.0,0.28,250,0.4499,70.0,0.178663,0.178663,0.165432
5,model_baseline,0.06,250,0.237963,15.0,0.224,250,0.417758,56.0,0.174663,0.174663,0.161728
3,model4_xgb,0.056,250,0.230383,14.0,0.06,250,0.237963,15.0,0.170663,0.170663,0.158025


Allocations after round 45 :
 {'model3_lr': 0.17999978825260676, 'model5_lgbm': 0.17304337699037714, 'model2_lr': 0.16608696572814755, 'model_baseline': 0.16608696572814755, 'model4_xgb': 0.15913055446591795, 'model1_rf': 0.15565234883480314}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.092,250,0.289606,23.0,0.296,250,0.457407,74.0,0.207003,0.207003,0.18
4,model5_lgbm,0.084,250,0.277944,21.0,0.176,250,0.381584,44.0,0.199003,0.199003,0.173043
1,model2_lr,0.076,250,0.26553,19.0,0.184,250,0.388261,46.0,0.191003,0.191003,0.166087
5,model_baseline,0.076,250,0.26553,19.0,0.24,250,0.42794,60.0,0.191003,0.191003,0.166087
3,model4_xgb,0.068,250,0.252251,17.0,0.096,250,0.295182,24.0,0.183003,0.183003,0.159131
0,model1_rf,0.064,250,0.245244,16.0,0.192,250,0.394663,48.0,0.179003,0.179003,0.155652


Allocations after round 46 :
 {'model3_lr': 0.18121202322902943, 'model2_lr': 0.17030300580725735, 'model1_rf': 0.16666666666666669, 'model5_lgbm': 0.16666666666666669, 'model4_xgb': 0.15939398838548527, 'model_baseline': 0.15575764924489457}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.084,250,0.277944,21.0,0.216,250,0.41234,54.0,0.199335,0.199335,0.181212
1,model2_lr,0.072,250,0.259006,18.0,0.176,250,0.381584,44.0,0.187335,0.187335,0.170303
0,model1_rf,0.068,250,0.252251,17.0,0.172,250,0.378137,43.0,0.183335,0.183335,0.166667
4,model5_lgbm,0.068,250,0.252251,17.0,0.152,250,0.359741,38.0,0.183335,0.183335,0.166667
3,model4_xgb,0.06,250,0.237963,15.0,0.048,250,0.214195,12.0,0.175335,0.175335,0.159394
5,model_baseline,0.056,250,0.230383,14.0,0.248,250,0.432718,62.0,0.171335,0.171335,0.155758


Allocations after round 47 :
 {'model3_lr': 0.18118640468665306, 'model2_lr': 0.17392653567665986, 'model1_rf': 0.17029660117166323, 'model4_xgb': 0.16303673216167006, 'model_baseline': 0.15940679765667345, 'model5_lgbm': 0.15214692864668025}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.084,250,0.277944,21.0,0.284,250,0.451841,71.0,0.199658,0.199658,0.181186
1,model2_lr,0.076,250,0.26553,19.0,0.2,250,0.400802,50.0,0.191658,0.191658,0.173927
0,model1_rf,0.072,250,0.259006,18.0,0.196,250,0.397765,49.0,0.187658,0.187658,0.170297
3,model4_xgb,0.064,250,0.245244,16.0,0.06,250,0.237963,15.0,0.179658,0.179658,0.163037
5,model_baseline,0.06,250,0.237963,15.0,0.212,250,0.409545,53.0,0.175658,0.175658,0.159407
4,model5_lgbm,0.052,250,0.222472,13.0,0.152,250,0.359741,38.0,0.167658,0.167658,0.152147


Allocations after round 48 :
 {'model3_lr': 0.20217109594287921, 'model5_lgbm': 0.16967551660532876, 'model4_xgb': 0.1624542767525398, 'model2_lr': 0.1588436568261453, 'model_baseline': 0.1588436568261453, 'model1_rf': 0.1480117970469618}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.108,250,0.311003,27.0,0.248,250,0.432718,62.0,0.223974,0.223974,0.202171
4,model5_lgbm,0.072,250,0.259006,18.0,0.168,250,0.374616,42.0,0.187974,0.187974,0.169676
3,model4_xgb,0.064,250,0.245244,16.0,0.06,250,0.237963,15.0,0.179974,0.179974,0.162454
1,model2_lr,0.06,250,0.237963,15.0,0.228,250,0.420384,57.0,0.175974,0.175974,0.158844
5,model_baseline,0.06,250,0.237963,15.0,0.228,250,0.420384,57.0,0.175974,0.175974,0.158844
0,model1_rf,0.048,250,0.214195,12.0,0.208,250,0.406691,52.0,0.163974,0.163974,0.148012


Allocations after round 49 :
 {'model3_lr': 0.19994966896980298, 'model4_xgb': 0.17855345320350105, 'model1_rf': 0.1642893093592998, 'model_baseline': 0.15715723743719914, 'model2_lr': 0.1500251655150985, 'model5_lgbm': 0.1500251655150985}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.108,250,0.311003,27.0,0.26,250,0.439514,65.0,0.224282,0.224282,0.19995
3,model4_xgb,0.084,250,0.277944,21.0,0.06,250,0.237963,15.0,0.200282,0.200282,0.178553
0,model1_rf,0.068,250,0.252251,17.0,0.204,250,0.403777,51.0,0.184282,0.184282,0.164289
5,model_baseline,0.06,250,0.237963,15.0,0.264,250,0.441684,66.0,0.176282,0.176282,0.157157
1,model2_lr,0.052,250,0.222472,13.0,0.22,250,0.415077,55.0,0.168282,0.168282,0.150025
4,model5_lgbm,0.052,250,0.222472,13.0,0.152,250,0.359741,38.0,0.168282,0.168282,0.150025


Allocations after round 50 :
 {'model1_rf': 0.17750185935610696, 'model2_lr': 0.17750185935610696, 'model5_lgbm': 0.17027839756314678, 'model3_lr': 0.16305493577018657, 'model4_xgb': 0.15583147397722638, 'model_baseline': 0.15583147397722638}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
0,model1_rf,0.08,250,0.271837,20.0,0.196,250,0.397765,49.0,0.196584,0.196584,0.177502
1,model2_lr,0.08,250,0.271837,20.0,0.18,250,0.384958,45.0,0.196584,0.196584,0.177502
4,model5_lgbm,0.072,250,0.259006,18.0,0.152,250,0.359741,38.0,0.188584,0.188584,0.170278
2,model3_lr,0.064,250,0.245244,16.0,0.264,250,0.441684,66.0,0.180584,0.180584,0.163055
3,model4_xgb,0.056,250,0.230383,14.0,0.056,250,0.230383,14.0,0.172584,0.172584,0.155831
5,model_baseline,0.056,250,0.230383,14.0,0.208,250,0.406691,52.0,0.172584,0.172584,0.155831


In [10]:
timesteps

{1: [{'model_id': 'model1_rf',
   'total_pool_size': 250,
   'rewards': 12.0,
   'regrets': 53.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model2_lr',
   'total_pool_size': 250,
   'rewards': 18.0,
   'regrets': 44.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model3_lr',
   'total_pool_size': 250,
   'rewards': 24.0,
   'regrets': 56.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model4_xgb',
   'total_pool_size': 250,
   'rewards': 17.0,
   'regrets': 10.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model5_lgbm',
   'total_pool_size': 250,
   'rewards': 13.0,
   'regrets': 39.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model_baseline',
   'total_pool_size': 250,
   'rewards': 20.0,
   'regrets': 53.0,
   'allocation': 0.16666666666666666}],
 2: [{'model_id': 'model1_rf',
   'total_pool_size': 250,
   'rewards': 14.0,
   'regrets': 50.0,
   'allocation': 0.11538461538461538},
  {'model_id': 'model2_lr',
   'total_pool_size

In [11]:
# write simulation sequence to a json file
with open('timesteps.json', 'w') as json_file:
    json.dump(timesteps, json_file)