In [1]:
import pandas as pd
import numpy as np
import re
import datetime
import os
import time
import json

from darts.allocation import Allocator
from darts.bandit import Bandit

In [2]:
# load in the data for the target pool
start = time.time()
file_loc = 'data/'
allocation_pool_df = pd.read_csv(file_loc + 'target_voter_universe.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.019558191299438477 seconds


In [3]:
allocation_pool_df.head(10)

Unnamed: 0,target_id,model_id,probability,target_round,target_result,target_reward,target_regret
0,0,model1_rf,0.0,0,0,0,0
1,0,model2_lr,1.0,0,0,0,0
2,0,model3_lr,0.0,0,0,0,0
3,0,model4_xgb,1.0,0,0,0,0
4,0,model5_lgbm,0.0,0,0,0,0
5,0,model_baseline,1.0,0,0,0,0
6,1,model1_rf,0.0,0,0,0,0
7,1,model2_lr,0.0,0,0,0,0
8,1,model3_lr,0.0,0,0,0,0
9,1,model4_xgb,0.0,0,0,0,0


In [4]:
(allocation_pool_df.shape[0]/6)/250

54.54

In [5]:
# load in our target results
start = time.time()
file_loc = 'data/'
target_results_df = pd.read_csv(file_loc + 'voter_responses.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.0018126964569091797 seconds


In [6]:
target_results_df.head(10)

Unnamed: 0,target_id,target_result
0,0,1
1,1,0
2,3,0
3,4,0
4,16,1
5,22,0
6,26,0
7,27,1
8,29,0
9,31,0


In [7]:
# Specify the column that indicates the id of an individual target
target_id_col = 'target_id'

# Specify the column that provides the confidence ranking (e.g. probability)
# that any of the models have on this target resulting in a 'reward'
# (e.g. the target outcome of the model)
confidence_rank_col = 'probability'

# Specify the column containing the names of the different 'arms' or
# models we're using in the target pool
arm_name_col = 'model_id'

# Specify column to track the pool round
pool_round_col = 'target_round'

# Specify the column that indicates the 'result'
result_col = 'target_result'

# Specify the column that indicates the 'reward'
reward_col = 'target_reward'

# Specify the column that indicates the 'regret'
regret_col = 'target_regret'

# Specify the column that indicates which arm was 'picked'
picked_col = 'model_picked'

# Specify an initial allocation method across the pool of models. These must sum to 1.
# For the simulation, we'll start with an equal allocation for each model.
allocation_method = {
    'model_baseline': (1/6),
    'model1_rf': (1/6),
    'model2_lr': (1/6),
    'model3_lr': (1/6),
    'model4_xgb': (1/6),
    'model5_lgbm': (1/6)
}

# Specify number of rounds to simulate
simulation_rounds = 50

# Specify the number of targets to pull in the first round.
num_targets = 250

# Specify the allocation policy
allocation_policy = 'UCB1'

# Specify the allocation strategy
allocation_strategy = 'round-robin'

# Specify the allocation order
allocation_order = 'best'


In [8]:
# Set up defaults - nothing picked yet
allocation_pool_df[picked_col]  = 0

# Index the file of responses for updating of results
target_results_df = target_results_df.set_index([target_id_col])

In [9]:
timesteps = {}

for allocation_round in range(1, simulation_rounds+1):
    
    remaining_allocation_pool_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col]==0])

    # set up an allocator
    # TODO: I think this should be in the sequence of:
    #  allocation_pool_df, arm_name_col, target_id_col, confidence_rank_col, allocation_method, strategy, order
    # TODO: Would be good to explain that Allocator introduces a picked column to the dataframe. This is masked behavior.
    #       I may rely on this column if I know about it, but not sure if it will ever go away?
    allocator = Allocator(allocation_method, num_targets, remaining_allocation_pool_df, arm_name_col,
                          confidence_rank_col, target_id_col, strategy=allocation_strategy,
                          order=allocation_order) # another idea is to provide a dictionary mapping to simplify the calling interface

    # retreive targets
    targets = allocator.allocate_pool() # TODO: can we change this to allocator.retrieve_targets(num_targets) ? Also, might be nice to have a DF return option

    # convert to dataframe
    targets_df = pd.DataFrame(targets, columns=[target_id_col, arm_name_col])
    targets_df[pool_round_col] = allocation_round
    targets_df = targets_df[[target_id_col, pool_round_col]]

    # join targets with universe of allocations
    targets_df = targets_df.set_index([target_id_col])
    allocation_pool_df = allocation_pool_df.set_index([target_id_col])
    allocation_pool_df.update(targets_df)
    allocation_pool_df = allocation_pool_df.reset_index()   

    # add indicator for the arm we picked
    target_arm_picked_df = pd.DataFrame(targets, columns=[target_id_col, arm_name_col])
    target_arm_picked_df[picked_col] = 1
    target_arm_picked_df = target_arm_picked_df[[target_id_col, arm_name_col, picked_col]]

    # join target model we picked with the universe of allocations
    target_arm_picked_df = target_arm_picked_df.set_index([target_id_col, arm_name_col])
    allocation_pool_df = allocation_pool_df.set_index([target_id_col, arm_name_col])
    allocation_pool_df.update(target_arm_picked_df)
    allocation_pool_df = allocation_pool_df.reset_index()

    # evaluate this allocation pool with the bandit
    last_allocation_pool_df = allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round]

    # sync up results
    last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col])
    last_allocation_pool_df.update(target_results_df)
    last_allocation_pool_df = last_allocation_pool_df.reset_index(level=0)

    # update results in the master pool set
    allocation_pool_df = allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, regret_col, picked_col])
    last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, regret_col, picked_col])
    allocation_pool_df.update(last_allocation_pool_df)
    allocation_pool_df = allocation_pool_df.reset_index()
    
    TP = allocation_pool_df[result_col] * allocation_pool_df[confidence_rank_col] * allocation_pool_df[picked_col]
    TN = (1 - allocation_pool_df[result_col]) * (1 - allocation_pool_df[confidence_rank_col]) * (1 - allocation_pool_df[picked_col])
    FN = allocation_pool_df[result_col] * (1 - allocation_pool_df[confidence_rank_col]) * (1 - allocation_pool_df[picked_col])
    FP = (1 - allocation_pool_df[result_col]) * allocation_pool_df[confidence_rank_col] * allocation_pool_df[picked_col]
    
    # update rewards from the results we synced
    allocation_pool_df[reward_col] = TP

    # update regrets from the results we synced
    # regrets can be false positives or false negatives (type 1 and type 2 error)
    allocation_pool_df[regret_col] = FN

    # prepare a dataframe with just our results of this round for the bandit to evaluate
    results_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round])
    
    if results_df[(results_df[result_col]==1)&(results_df[reward_col]==1)].shape[0] == 0:
        # no more results left after a steady state - reset selections to include possibilities from the other models

        TP = allocation_pool_df[result_col] * allocation_pool_df[confidence_rank_col]
        TN = (1 - allocation_pool_df[result_col]) * (1 - allocation_pool_df[confidence_rank_col])
        FN = allocation_pool_df[result_col] * (1 - allocation_pool_df[confidence_rank_col])
        FP = (1 - allocation_pool_df[result_col]) * allocation_pool_df[confidence_rank_col]

        # update rewards from the results we synced
        allocation_pool_df[reward_col] = TP

        # update regrets from the results we synced
        # regrets can be false positives or false negatives (type 1 and type 2 error)
        allocation_pool_df[regret_col] = FN

        # prepare a dataframe with just our results of this round for the bandit to evaluate
        results_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round])
        
    
    # prepare dictionary of results for timestep tracking
    stats = results_df[[arm_name_col, reward_col, regret_col]].groupby(arm_name_col).agg({reward_col:['count','sum'],
                                                                                          regret_col:['sum']})
    stats.columns = stats.columns.to_flat_index()
    stats.columns = ['_'.join(tup).rstrip('_') for tup in stats.columns.values]
    stats = pd.DataFrame(stats).reset_index()
    stats.rename(columns={'target_reward_count': 'total_pool_size', 'target_reward_sum': 'rewards', 'target_regret_sum':'regrets'}, inplace=True)
    stats = stats.set_index([arm_name_col])
    stats['allocation'] = 0.0
    allocs = pd.DataFrame.from_dict(allocation_method, orient='index', columns = ['allocation'])
    allocs.index.name=arm_name_col
    stats.update(allocs)
    stats = stats.reset_index()
    timesteps[allocation_round] = stats.to_dict(orient='records')

    # set up a multi-arm bandit and calculate allocations to each arm.
    bandit = Bandit(results_df, arm_name_col, reward_col, regret_col, policy = allocation_policy)

    # use these allocations for the next round
    allocation_method = bandit.get_new_allocations()# bandit.make_allocs().set_index(arm_name_col)['allocation'].to_dict()
    print("Allocations after round", allocation_round, ":\n", allocation_method)
    display(bandit.get_allocation_stats())

    # useful for outputting the result of each timestep into its own directory
    # and keeping an updated master file
    
    #allocation_pool_df[[target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, result_col]].sort_values(by=[target_id_col,arm_name_col]).to_csv(file_loc + 'simulated_target_universe_updates.csv', index=False)
    
    #round_loc = file_loc + 'round_{:05}/'.format(allocation_round)
    #if not os.path.exists(round_loc):
    #    os.makedirs(round_loc)
    #    
    #targets_df.sort_values(by=[target_id_col])[target_id_col].to_csv(round_loc + 'targets.csv', index=False)
    #allocation_pool_df[[target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, result_col]].sort_values(by=[target_id_col,arm_name_col]).to_csv(round_loc + 'simulated_allocation_round_{:05}'.format(allocation_round) +'.csv', index=False)
    

Allocations after round 1 :
 {'model3_lr': 0.23076923076923075, 'model_baseline': 0.1923076923076923, 'model2_lr': 0.17307692307692304, 'model4_xgb': 0.16346153846153846, 'model5_lgbm': 0.12499999999999999, 'model1_rf': 0.11538461538461538}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.096,250,0.295182,24.0,0.224,250,0.417758,56.0,0.096,0.096,0.230769
5,model_baseline,0.08,250,0.271837,20.0,0.212,250,0.409545,53.0,0.08,0.08,0.192308
1,model2_lr,0.072,250,0.259006,18.0,0.176,250,0.381584,44.0,0.072,0.072,0.173077
3,model4_xgb,0.068,250,0.252251,17.0,0.04,250,0.196352,10.0,0.068,0.068,0.163462
4,model5_lgbm,0.052,250,0.222472,13.0,0.156,250,0.363583,39.0,0.052,0.052,0.125
0,model1_rf,0.048,250,0.214195,12.0,0.212,250,0.409545,53.0,0.048,0.048,0.115385


Allocations after round 2 :
 {'model3_lr': 0.28448275862068967, 'model2_lr': 0.17241379310344826, 'model4_xgb': 0.15517241379310343, 'model_baseline': 0.14655172413793105, 'model1_rf': 0.12068965517241378, 'model5_lgbm': 0.12068965517241378}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.132,250,0.33917,33.0,0.26,250,0.439514,65.0,0.132,0.132,0.284483
1,model2_lr,0.08,250,0.271837,20.0,0.184,250,0.388261,46.0,0.08,0.08,0.172414
3,model4_xgb,0.072,250,0.259006,18.0,0.052,250,0.222472,13.0,0.072,0.072,0.155172
5,model_baseline,0.068,250,0.252251,17.0,0.236,250,0.425474,59.0,0.068,0.068,0.146552
0,model1_rf,0.056,250,0.230383,14.0,0.2,250,0.400802,50.0,0.056,0.056,0.12069
4,model5_lgbm,0.056,250,0.230383,14.0,0.168,250,0.374616,42.0,0.056,0.056,0.12069


Allocations after round 3 :
 {'model3_lr': 0.2990654205607477, 'model2_lr': 0.19626168224299068, 'model5_lgbm': 0.14018691588785046, 'model_baseline': 0.14018691588785046, 'model4_xgb': 0.12149532710280374, 'model1_rf': 0.102803738317757}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.128,250,0.33476,32.0,0.252,250,0.435032,63.0,0.128,0.128,0.299065
1,model2_lr,0.084,250,0.277944,21.0,0.188,250,0.391496,47.0,0.084,0.084,0.196262
4,model5_lgbm,0.06,250,0.237963,15.0,0.116,250,0.320867,29.0,0.06,0.06,0.140187
5,model_baseline,0.06,250,0.237963,15.0,0.24,250,0.42794,60.0,0.06,0.06,0.140187
3,model4_xgb,0.052,250,0.222472,13.0,0.048,250,0.214195,12.0,0.052,0.052,0.121495
0,model1_rf,0.044,250,0.205507,11.0,0.192,250,0.394663,48.0,0.044,0.044,0.102804


Allocations after round 4 :
 {'model3_lr': 0.2857142857142857, 'model2_lr': 0.25, 'model5_lgbm': 0.14285714285714285, 'model_baseline': 0.125, 'model4_xgb': 0.10714285714285714, 'model1_rf': 0.08928571428571429}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.128,250,0.33476,32.0,0.304,250,0.460905,76.0,0.128,0.128,0.285714
1,model2_lr,0.112,250,0.315999,28.0,0.208,250,0.406691,52.0,0.112,0.112,0.25
4,model5_lgbm,0.064,250,0.245244,16.0,0.156,250,0.363583,39.0,0.064,0.064,0.142857
5,model_baseline,0.056,250,0.230383,14.0,0.26,250,0.439514,65.0,0.056,0.056,0.125
3,model4_xgb,0.048,250,0.214195,12.0,0.092,250,0.289606,23.0,0.048,0.048,0.107143
0,model1_rf,0.04,250,0.196352,10.0,0.204,250,0.403777,51.0,0.04,0.04,0.089286


Allocations after round 5 :
 {'model3_lr': 0.3142857142857143, 'model2_lr': 0.27619047619047615, 'model5_lgbm': 0.18095238095238092, 'model1_rf': 0.07619047619047618, 'model4_xgb': 0.07619047619047618, 'model_baseline': 0.07619047619047618}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.132,250,0.33917,33.0,0.288,250,0.453739,72.0,0.132,0.132,0.314286
1,model2_lr,0.116,250,0.320867,29.0,0.184,250,0.388261,46.0,0.116,0.116,0.27619
4,model5_lgbm,0.076,250,0.26553,19.0,0.144,250,0.351794,36.0,0.076,0.076,0.180952
0,model1_rf,0.032,250,0.176353,8.0,0.196,250,0.397765,49.0,0.032,0.032,0.07619
3,model4_xgb,0.032,250,0.176353,8.0,0.068,250,0.252251,17.0,0.032,0.032,0.07619
5,model_baseline,0.032,250,0.176353,8.0,0.256,250,0.437297,64.0,0.032,0.032,0.07619


Allocations after round 6 :
 {'model3_lr': 0.33333333333333326, 'model2_lr': 0.27272727272727265, 'model5_lgbm': 0.1616161616161616, 'model1_rf': 0.11111111111111108, 'model4_xgb': 0.060606060606060594, 'model_baseline': 0.060606060606060594}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.132,250,0.33917,33.0,0.252,250,0.435032,63.0,0.132,0.132,0.333333
1,model2_lr,0.108,250,0.311003,27.0,0.18,250,0.384958,45.0,0.108,0.108,0.272727
4,model5_lgbm,0.064,250,0.245244,16.0,0.136,250,0.343476,34.0,0.064,0.064,0.161616
0,model1_rf,0.044,250,0.205507,11.0,0.176,250,0.381584,44.0,0.044,0.044,0.111111
3,model4_xgb,0.024,250,0.153356,6.0,0.044,250,0.205507,11.0,0.024,0.024,0.060606
5,model_baseline,0.024,250,0.153356,6.0,0.204,250,0.403777,51.0,0.024,0.024,0.060606


Allocations after round 7 :
 {'model3_lr': 0.33636363636363636, 'model2_lr': 0.2909090909090909, 'model5_lgbm': 0.16363636363636364, 'model1_rf': 0.11818181818181818, 'model_baseline': 0.06363636363636364, 'model4_xgb': 0.027272727272727275}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.148,250,0.355812,37.0,0.256,250,0.437297,64.0,0.148,0.148,0.336364
1,model2_lr,0.128,250,0.33476,32.0,0.204,250,0.403777,51.0,0.128,0.128,0.290909
4,model5_lgbm,0.072,250,0.259006,18.0,0.16,250,0.367341,40.0,0.072,0.072,0.163636
0,model1_rf,0.052,250,0.222472,13.0,0.164,250,0.371018,41.0,0.052,0.052,0.118182
5,model_baseline,0.028,250,0.165304,7.0,0.236,250,0.425474,59.0,0.028,0.028,0.063636
3,model4_xgb,0.012,250,0.109104,3.0,0.088,250,0.283863,22.0,0.012,0.012,0.027273


Allocations after round 8 :
 {'model3_lr': 0.3592233009708738, 'model2_lr': 0.3009708737864078, 'model5_lgbm': 0.1650485436893204, 'model1_rf': 0.14563106796116504, 'model_baseline': 0.01941747572815534, 'model4_xgb': 0.00970873786407767}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.148,250,0.355812,37.0,0.264,250,0.441684,66.0,0.148,0.148,0.359223
1,model2_lr,0.124,250,0.330243,31.0,0.152,250,0.359741,38.0,0.124,0.124,0.300971
4,model5_lgbm,0.068,250,0.252251,17.0,0.136,250,0.343476,34.0,0.068,0.068,0.165049
0,model1_rf,0.06,250,0.237963,15.0,0.156,250,0.363583,39.0,0.06,0.06,0.145631
5,model_baseline,0.008,250,0.089263,2.0,0.248,250,0.432718,62.0,0.008,0.008,0.019417
3,model4_xgb,0.004,250,0.063246,1.0,0.068,250,0.252251,17.0,0.004,0.004,0.009709


Allocations after round 9 :
 {'model3_lr': 0.42016806722689076, 'model2_lr': 0.25210084033613445, 'model5_lgbm': 0.17647058823529413, 'model1_rf': 0.12605042016806722, 'model_baseline': 0.025210084033613443, 'model4_xgb': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.2,250,0.400802,50.0,0.276,250,0.447914,69.0,0.2,0.2,0.420168
1,model2_lr,0.12,250,0.325613,30.0,0.216,250,0.41234,54.0,0.12,0.12,0.252101
4,model5_lgbm,0.084,250,0.277944,21.0,0.12,250,0.325613,30.0,0.084,0.084,0.176471
0,model1_rf,0.06,250,0.237963,15.0,0.2,250,0.400802,50.0,0.06,0.06,0.12605
5,model_baseline,0.012,250,0.109104,3.0,0.292,250,0.455594,73.0,0.012,0.012,0.02521
3,model4_xgb,0.0,250,0.0,0.0,0.08,250,0.271837,20.0,0.0,0.0,0.0


Allocations after round 10 :
 {'model3_lr': 0.43362831858407086, 'model2_lr': 0.22123893805309738, 'model5_lgbm': 0.19469026548672566, 'model1_rf': 0.13274336283185842, 'model_baseline': 0.01769911504424779, 'model4_xgb': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.196,250,0.397765,49.0,0.252,250,0.435032,63.0,0.196,0.196,0.433628
1,model2_lr,0.1,250,0.300602,25.0,0.16,250,0.367341,40.0,0.1,0.1,0.221239
4,model5_lgbm,0.088,250,0.283863,22.0,0.124,250,0.330243,31.0,0.088,0.088,0.19469
0,model1_rf,0.06,250,0.237963,15.0,0.196,250,0.397765,49.0,0.06,0.06,0.132743
5,model_baseline,0.008,250,0.089263,2.0,0.224,250,0.417758,56.0,0.008,0.008,0.017699
3,model4_xgb,0.0,250,0.0,0.0,0.068,250,0.252251,17.0,0.0,0.0,0.0


Allocations after round 11 :
 {'model3_lr': 0.4954954954954955, 'model5_lgbm': 0.1891891891891892, 'model2_lr': 0.18018018018018017, 'model1_rf': 0.13513513513513511, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.22,250,0.415077,55.0,0.224,250,0.417758,56.0,0.22,0.22,0.495495
4,model5_lgbm,0.084,250,0.277944,21.0,0.1,250,0.300602,25.0,0.084,0.084,0.189189
1,model2_lr,0.08,250,0.271837,20.0,0.18,250,0.384958,45.0,0.08,0.08,0.18018
0,model1_rf,0.06,250,0.237963,15.0,0.18,250,0.384958,45.0,0.06,0.06,0.135135
3,model4_xgb,0.0,250,0.0,0.0,0.04,250,0.196352,10.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.22,250,0.415077,55.0,0.0,0.0,0.0


Allocations after round 12 :
 {'model3_lr': 0.47619047619047616, 'model2_lr': 0.19047619047619047, 'model5_lgbm': 0.19047619047619047, 'model1_rf': 0.14285714285714285, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.2,250,0.400802,50.0,0.22,250,0.415077,55.0,0.2,0.2,0.47619
1,model2_lr,0.08,250,0.271837,20.0,0.188,250,0.391496,47.0,0.08,0.08,0.190476
4,model5_lgbm,0.08,250,0.271837,20.0,0.088,250,0.283863,22.0,0.08,0.08,0.190476
0,model1_rf,0.06,250,0.237963,15.0,0.184,250,0.388261,46.0,0.06,0.06,0.142857
3,model4_xgb,0.0,250,0.0,0.0,0.044,250,0.205507,11.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.228,250,0.420384,57.0,0.0,0.0,0.0


Allocations after round 13 :
 {'model3_lr': 0.5436893203883496, 'model5_lgbm': 0.17475728155339806, 'model2_lr': 0.14563106796116504, 'model1_rf': 0.1359223300970874, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.224,250,0.417758,56.0,0.188,250,0.391496,47.0,0.224,0.224,0.543689
4,model5_lgbm,0.072,250,0.259006,18.0,0.104,250,0.305873,26.0,0.072,0.072,0.174757
1,model2_lr,0.06,250,0.237963,15.0,0.16,250,0.367341,40.0,0.06,0.06,0.145631
0,model1_rf,0.056,250,0.230383,14.0,0.164,250,0.371018,41.0,0.056,0.056,0.135922
3,model4_xgb,0.0,250,0.0,0.0,0.064,250,0.245244,16.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.224,250,0.417758,56.0,0.0,0.0,0.0


Allocations after round 14 :
 {'model3_lr': 0.5736434108527131, 'model5_lgbm': 0.17054263565891473, 'model2_lr': 0.14728682170542634, 'model1_rf': 0.10852713178294573, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.296,250,0.457407,74.0,0.22,250,0.415077,55.0,0.296,0.296,0.573643
4,model5_lgbm,0.088,250,0.283863,22.0,0.14,250,0.347683,35.0,0.088,0.088,0.170543
1,model2_lr,0.076,250,0.26553,19.0,0.2,250,0.400802,50.0,0.076,0.076,0.147287
0,model1_rf,0.056,250,0.230383,14.0,0.172,250,0.378137,43.0,0.056,0.056,0.108527
3,model4_xgb,0.0,250,0.0,0.0,0.064,250,0.245244,16.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.316,250,0.465846,79.0,0.0,0.0,0.0


Allocations after round 15 :
 {'model3_lr': 0.5853658536585366, 'model5_lgbm': 0.16260162601626016, 'model1_rf': 0.13821138211382114, 'model2_lr': 0.11382113821138212, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.288,250,0.453739,72.0,0.204,250,0.403777,51.0,0.288,0.288,0.585366
4,model5_lgbm,0.08,250,0.271837,20.0,0.108,250,0.311003,27.0,0.08,0.08,0.162602
0,model1_rf,0.068,250,0.252251,17.0,0.172,250,0.378137,43.0,0.068,0.068,0.138211
1,model2_lr,0.056,250,0.230383,14.0,0.176,250,0.381584,44.0,0.056,0.056,0.113821
3,model4_xgb,0.0,250,0.0,0.0,0.044,250,0.205507,11.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.252,250,0.435032,63.0,0.0,0.0,0.0


Allocations after round 16 :
 {'model3_lr': 0.582608695652174, 'model5_lgbm': 0.2173913043478261, 'model1_rf': 0.13043478260869565, 'model2_lr': 0.06956521739130435, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.268,250,0.443806,67.0,0.192,250,0.394663,48.0,0.268,0.268,0.582609
4,model5_lgbm,0.1,250,0.300602,25.0,0.092,250,0.289606,23.0,0.1,0.1,0.217391
0,model1_rf,0.06,250,0.237963,15.0,0.164,250,0.371018,41.0,0.06,0.06,0.130435
1,model2_lr,0.032,250,0.176353,8.0,0.172,250,0.378137,43.0,0.032,0.032,0.069565
3,model4_xgb,0.0,250,0.0,0.0,0.048,250,0.214195,12.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.252,250,0.435032,63.0,0.0,0.0,0.0


Allocations after round 17 :
 {'model3_lr': 0.5514018691588785, 'model5_lgbm': 0.26168224299065423, 'model1_rf': 0.11214953271028037, 'model2_lr': 0.07476635514018692, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.236,250,0.425474,59.0,0.192,250,0.394663,48.0,0.236,0.236,0.551402
4,model5_lgbm,0.112,250,0.315999,28.0,0.072,250,0.259006,18.0,0.112,0.112,0.261682
0,model1_rf,0.048,250,0.214195,12.0,0.204,250,0.403777,51.0,0.048,0.048,0.11215
1,model2_lr,0.032,250,0.176353,8.0,0.196,250,0.397765,49.0,0.032,0.032,0.074766
3,model4_xgb,0.0,250,0.0,0.0,0.04,250,0.196352,10.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.228,250,0.420384,57.0,0.0,0.0,0.0


Allocations after round 18 :
 {'model3_lr': 0.6129032258064516, 'model5_lgbm': 0.2338709677419355, 'model1_rf': 0.10483870967741934, 'model2_lr': 0.04838709677419355, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.304,250,0.460905,76.0,0.192,250,0.394663,48.0,0.304,0.304,0.612903
4,model5_lgbm,0.116,250,0.320867,29.0,0.092,250,0.289606,23.0,0.116,0.116,0.233871
0,model1_rf,0.052,250,0.222472,13.0,0.184,250,0.388261,46.0,0.052,0.052,0.104839
1,model2_lr,0.024,250,0.153356,6.0,0.172,250,0.378137,43.0,0.024,0.024,0.048387
3,model4_xgb,0.0,250,0.0,0.0,0.06,250,0.237963,15.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.276,250,0.447914,69.0,0.0,0.0,0.0


Allocations after round 19 :
 {'model3_lr': 0.6153846153846154, 'model5_lgbm': 0.2820512820512821, 'model1_rf': 0.06837606837606838, 'model2_lr': 0.03418803418803419, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.288,250,0.453739,72.0,0.18,250,0.384958,45.0,0.288,0.288,0.615385
4,model5_lgbm,0.132,250,0.33917,33.0,0.08,250,0.271837,20.0,0.132,0.132,0.282051
0,model1_rf,0.032,250,0.176353,8.0,0.192,250,0.394663,48.0,0.032,0.032,0.068376
1,model2_lr,0.016,250,0.125727,4.0,0.196,250,0.397765,49.0,0.016,0.016,0.034188
3,model4_xgb,0.0,250,0.0,0.0,0.028,250,0.165304,7.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.26,250,0.439514,65.0,0.0,0.0,0.0


Allocations after round 20 :
 {'model3_lr': 0.6, 'model5_lgbm': 0.2818181818181818, 'model1_rf': 0.07272727272727272, 'model2_lr': 0.045454545454545456, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.264,250,0.441684,66.0,0.176,250,0.381584,44.0,0.264,0.264,0.6
4,model5_lgbm,0.124,250,0.330243,31.0,0.088,250,0.283863,22.0,0.124,0.124,0.281818
0,model1_rf,0.032,250,0.176353,8.0,0.168,250,0.374616,42.0,0.032,0.032,0.072727
1,model2_lr,0.02,250,0.140281,5.0,0.16,250,0.367341,40.0,0.02,0.02,0.045455
3,model4_xgb,0.0,250,0.0,0.0,0.028,250,0.165304,7.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.304,250,0.460905,76.0,0.0,0.0,0.0


Allocations after round 21 :
 {'model3_lr': 0.6666666666666666, 'model5_lgbm': 0.28703703703703703, 'model1_rf': 0.02777777777777778, 'model2_lr': 0.018518518518518517, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.288,250,0.453739,72.0,0.144,250,0.351794,36.0,0.288,0.288,0.666667
4,model5_lgbm,0.124,250,0.330243,31.0,0.072,250,0.259006,18.0,0.124,0.124,0.287037
0,model1_rf,0.012,250,0.109104,3.0,0.168,250,0.374616,42.0,0.012,0.012,0.027778
1,model2_lr,0.008,250,0.089263,2.0,0.16,250,0.367341,40.0,0.008,0.008,0.018519
3,model4_xgb,0.0,250,0.0,0.0,0.02,250,0.140281,5.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.264,250,0.441684,66.0,0.0,0.0,0.0


Allocations after round 22 :
 {'model3_lr': 0.6610169491525423, 'model5_lgbm': 0.2966101694915254, 'model1_rf': 0.02542372881355932, 'model2_lr': 0.01694915254237288, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.312,250,0.464239,78.0,0.16,250,0.367341,40.0,0.312,0.312,0.661017
4,model5_lgbm,0.14,250,0.347683,35.0,0.08,250,0.271837,20.0,0.14,0.14,0.29661
0,model1_rf,0.012,250,0.109104,3.0,0.208,250,0.406691,52.0,0.012,0.012,0.025424
1,model2_lr,0.008,250,0.089263,2.0,0.18,250,0.384958,45.0,0.008,0.008,0.016949
3,model4_xgb,0.0,250,0.0,0.0,0.04,250,0.196352,10.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.328,250,0.470427,82.0,0.0,0.0,0.0


Allocations after round 23 :
 {'model3_lr': 0.7073170731707317, 'model5_lgbm': 0.2682926829268293, 'model2_lr': 0.016260162601626018, 'model1_rf': 0.008130081300813009, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.348,250,0.477292,87.0,0.144,250,0.351794,36.0,0.348,0.348,0.707317
4,model5_lgbm,0.132,250,0.33917,33.0,0.052,250,0.222472,13.0,0.132,0.132,0.268293
1,model2_lr,0.008,250,0.089263,2.0,0.168,250,0.374616,42.0,0.008,0.008,0.01626
0,model1_rf,0.004,250,0.063246,1.0,0.168,250,0.374616,42.0,0.004,0.004,0.00813
3,model4_xgb,0.0,250,0.0,0.0,0.012,250,0.109104,3.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.276,250,0.447914,69.0,0.0,0.0,0.0


Allocations after round 24 :
 {'model3_lr': 0.688, 'model5_lgbm': 0.288, 'model1_rf': 0.016, 'model2_lr': 0.008, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.344,250,0.475994,86.0,0.156,250,0.363583,39.0,0.344,0.344,0.688
4,model5_lgbm,0.144,250,0.351794,36.0,0.056,250,0.230383,14.0,0.144,0.144,0.288
0,model1_rf,0.008,250,0.089263,2.0,0.196,250,0.397765,49.0,0.008,0.008,0.016
1,model2_lr,0.004,250,0.063246,1.0,0.176,250,0.381584,44.0,0.004,0.004,0.008
3,model4_xgb,0.0,250,0.0,0.0,0.02,250,0.140281,5.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.292,250,0.455594,73.0,0.0,0.0,0.0


Allocations after round 25 :
 {'model3_lr': 0.7107438016528925, 'model5_lgbm': 0.2809917355371901, 'model1_rf': 0.008264462809917356, 'model2_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.344,250,0.475994,86.0,0.14,250,0.347683,35.0,0.344,0.344,0.710744
4,model5_lgbm,0.136,250,0.343476,34.0,0.048,250,0.214195,12.0,0.136,0.136,0.280992
0,model1_rf,0.004,250,0.063246,1.0,0.22,250,0.415077,55.0,0.004,0.004,0.008264
1,model2_lr,0.0,250,0.0,0.0,0.24,250,0.42794,60.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.016,250,0.125727,4.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.288,250,0.453739,72.0,0.0,0.0,0.0


Allocations after round 26 :
 {'model3_lr': 0.696, 'model5_lgbm': 0.288, 'model1_rf': 0.016, 'model2_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.348,250,0.477292,87.0,0.152,250,0.359741,38.0,0.348,0.348,0.696
4,model5_lgbm,0.144,250,0.351794,36.0,0.064,250,0.245244,16.0,0.144,0.144,0.288
0,model1_rf,0.008,250,0.089263,2.0,0.168,250,0.374616,42.0,0.008,0.008,0.016
1,model2_lr,0.0,250,0.0,0.0,0.16,250,0.367341,40.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.012,250,0.109104,3.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.272,250,0.445883,68.0,0.0,0.0,0.0


Allocations after round 27 :
 {'model3_lr': 0.7181818181818181, 'model5_lgbm': 0.2727272727272727, 'model1_rf': 0.00909090909090909, 'model2_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.316,250,0.465846,79.0,0.124,250,0.330243,31.0,0.316,0.316,0.718182
4,model5_lgbm,0.12,250,0.325613,30.0,0.024,250,0.153356,6.0,0.12,0.12,0.272727
0,model1_rf,0.004,250,0.063246,1.0,0.188,250,0.391496,47.0,0.004,0.004,0.009091
1,model2_lr,0.0,250,0.0,0.0,0.188,250,0.391496,47.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.02,250,0.140281,5.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.296,250,0.457407,74.0,0.0,0.0,0.0


Allocations after round 28 :
 {'model3_lr': 0.7190082644628099, 'model5_lgbm': 0.2644628099173554, 'model1_rf': 0.01652892561983471, 'model2_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.348,250,0.477292,87.0,0.136,250,0.343476,34.0,0.348,0.348,0.719008
4,model5_lgbm,0.128,250,0.33476,32.0,0.056,250,0.230383,14.0,0.128,0.128,0.264463
0,model1_rf,0.008,250,0.089263,2.0,0.18,250,0.384958,45.0,0.008,0.008,0.016529
1,model2_lr,0.0,250,0.0,0.0,0.172,250,0.378137,43.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.008,250,0.089263,2.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.292,250,0.455594,73.0,0.0,0.0,0.0


Allocations after round 29 :
 {'model3_lr': 0.7166666666666667, 'model5_lgbm': 0.26666666666666666, 'model1_rf': 0.016666666666666666, 'model2_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.344,250,0.475994,86.0,0.136,250,0.343476,34.0,0.344,0.344,0.716667
4,model5_lgbm,0.128,250,0.33476,32.0,0.072,250,0.259006,18.0,0.128,0.128,0.266667
0,model1_rf,0.008,250,0.089263,2.0,0.172,250,0.378137,43.0,0.008,0.008,0.016667
1,model2_lr,0.0,250,0.0,0.0,0.236,250,0.425474,59.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.012,250,0.109104,3.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.3,250,0.459177,75.0,0.0,0.0,0.0


Allocations after round 30 :
 {'model3_lr': 0.7130434782608696, 'model5_lgbm': 0.2782608695652174, 'model1_rf': 0.008695652173913044, 'model2_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
2,model3_lr,0.328,250,0.470427,82.0,0.132,250,0.33917,33.0,0.328,0.328,0.713043
4,model5_lgbm,0.128,250,0.33476,32.0,0.032,250,0.176353,8.0,0.128,0.128,0.278261
0,model1_rf,0.004,250,0.063246,1.0,0.2,250,0.400802,50.0,0.004,0.004,0.008696
1,model2_lr,0.0,250,0.0,0.0,0.16,250,0.367341,40.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.004,250,0.063246,1.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.26,250,0.439514,65.0,0.0,0.0,0.0


Allocations after round 31 :
 {'model5_lgbm': 1.0, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.156,250,0.363583,39.0,0.212,250,0.409545,53.0,0.156,0.156,1.0
0,model1_rf,0.0,250,0.0,0.0,0.296,250,0.457407,74.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.312,250,0.464239,78.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.156,250,0.363583,39.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.184,250,0.388261,46.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.284,250,0.451841,71.0,0.0,0.0,0.0


Allocations after round 32 :
 {'model5_lgbm': 1.0, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.512,250,0.500859,128.0,0.0,250,0.0,0.0,0.512,0.512,1.0
0,model1_rf,0.0,250,0.0,0.0,0.308,250,0.462593,77.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.344,250,0.475994,86.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.512,250,0.500859,128.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.012,250,0.109104,3.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.296,250,0.457407,74.0,0.0,0.0,0.0


Allocations after round 33 :
 {'model5_lgbm': 1.0, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.424,250,0.495182,106.0,0.0,250,0.0,0.0,0.424,0.424,1.0
0,model1_rf,0.0,250,0.0,0.0,0.232,250,0.422956,58.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.276,250,0.447914,69.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.424,250,0.495182,106.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.016,250,0.125727,4.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.24,250,0.42794,60.0,0.0,0.0,0.0


Allocations after round 34 :
 {'model5_lgbm': 1.0, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.428,250,0.495781,107.0,0.0,250,0.0,0.0,0.428,0.428,1.0
0,model1_rf,0.0,250,0.0,0.0,0.268,250,0.443806,67.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.296,250,0.457407,74.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.428,250,0.495781,107.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.012,250,0.109104,3.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.276,250,0.447914,69.0,0.0,0.0,0.0


Allocations after round 35 :
 {'model5_lgbm': 1.0, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.476,250,0.500426,119.0,0.0,250,0.0,0.0,0.476,0.476,1.0
0,model1_rf,0.0,250,0.0,0.0,0.276,250,0.447914,69.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.268,250,0.443806,67.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.476,250,0.500426,119.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.016,250,0.125727,4.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.272,250,0.445883,68.0,0.0,0.0,0.0


Allocations after round 36 :
 {'model5_lgbm': 1.0, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.436,250,0.496882,109.0,0.0,250,0.0,0.0,0.436,0.436,1.0
0,model1_rf,0.0,250,0.0,0.0,0.26,250,0.439514,65.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.276,250,0.447914,69.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.436,250,0.496882,109.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.008,250,0.089263,2.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.208,250,0.406691,52.0,0.0,0.0,0.0


Allocations after round 37 :
 {'model5_lgbm': 1.0, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model4_xgb': 0.0, 'model_baseline': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
4,model5_lgbm,0.336,250,0.473286,84.0,0.0,250,0.0,0.0,0.336,0.336,1.0
0,model1_rf,0.0,250,0.0,0.0,0.284,250,0.451841,71.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.316,250,0.465846,79.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.432,250,0.496348,108.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.044,250,0.205507,11.0,0.0,0.0,0.0
5,model_baseline,0.0,250,0.0,0.0,0.24,250,0.42794,60.0,0.0,0.0,0.0


Allocations after round 38 :
 {'model4_xgb': 0.5909090909090908, 'model_baseline': 0.40909090909090906, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.156,250,0.363583,39.0,0.164,250,0.371018,41.0,0.156,0.156,0.590909
5,model_baseline,0.108,250,0.311003,27.0,0.212,250,0.409545,53.0,0.108,0.108,0.409091
0,model1_rf,0.0,250,0.0,0.0,0.32,250,0.467412,80.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.32,250,0.467412,80.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.32,250,0.467412,80.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.32,250,0.467412,80.0,0.0,0.0,0.0


Allocations after round 39 :
 {'model4_xgb': 0.6351351351351352, 'model_baseline': 0.36486486486486486, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.188,250,0.391496,47.0,0.076,250,0.26553,19.0,0.188,0.188,0.635135
5,model_baseline,0.108,250,0.311003,27.0,0.112,250,0.315999,28.0,0.108,0.108,0.364865
0,model1_rf,0.0,250,0.0,0.0,0.296,250,0.457407,74.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.296,250,0.457407,74.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.296,250,0.457407,74.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.296,250,0.457407,74.0,0.0,0.0,0.0


Allocations after round 40 :
 {'model4_xgb': 0.5595238095238095, 'model_baseline': 0.4404761904761905, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.188,250,0.391496,47.0,0.112,250,0.315999,28.0,0.188,0.188,0.559524
5,model_baseline,0.148,250,0.355812,37.0,0.136,250,0.343476,34.0,0.148,0.148,0.440476
0,model1_rf,0.0,250,0.0,0.0,0.308,250,0.462593,77.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.284,250,0.451841,71.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.336,250,0.473286,84.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.336,250,0.473286,84.0,0.0,0.0,0.0


Allocations after round 41 :
 {'model4_xgb': 0.5048543689320388, 'model_baseline': 0.49514563106796117, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.208,250,0.406691,52.0,0.16,250,0.367341,40.0,0.208,0.208,0.504854
5,model_baseline,0.204,250,0.403777,51.0,0.132,250,0.33917,33.0,0.204,0.204,0.495146
0,model1_rf,0.0,250,0.0,0.0,0.272,250,0.445883,68.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.284,250,0.451841,71.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.412,250,0.493182,103.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.412,250,0.493182,103.0,0.0,0.0,0.0


Allocations after round 42 :
 {'model4_xgb': 0.6136363636363636, 'model_baseline': 0.3863636363636364, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.216,250,0.41234,54.0,0.088,250,0.283863,22.0,0.216,0.216,0.613636
5,model_baseline,0.136,250,0.343476,34.0,0.148,250,0.355812,37.0,0.136,0.136,0.386364
0,model1_rf,0.0,250,0.0,0.0,0.22,250,0.415077,55.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.248,250,0.432718,62.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.352,250,0.478552,88.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.352,250,0.478552,88.0,0.0,0.0,0.0


Allocations after round 43 :
 {'model4_xgb': 0.6395348837209303, 'model_baseline': 0.3604651162790698, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.22,250,0.415077,55.0,0.076,250,0.26553,19.0,0.22,0.22,0.639535
5,model_baseline,0.124,250,0.330243,31.0,0.152,250,0.359741,38.0,0.124,0.124,0.360465
0,model1_rf,0.0,250,0.0,0.0,0.224,250,0.417758,56.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.236,250,0.425474,59.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.344,250,0.475994,86.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.344,250,0.475994,86.0,0.0,0.0,0.0


Allocations after round 44 :
 {'model4_xgb': 0.6521739130434783, 'model_baseline': 0.34782608695652173, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.24,250,0.42794,60.0,0.08,250,0.271837,20.0,0.24,0.24,0.652174
5,model_baseline,0.128,250,0.33476,32.0,0.148,250,0.355812,37.0,0.128,0.128,0.347826
0,model1_rf,0.0,250,0.0,0.0,0.236,250,0.425474,59.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.236,250,0.425474,59.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.368,250,0.483229,92.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.368,250,0.483229,92.0,0.0,0.0,0.0


Allocations after round 45 :
 {'model4_xgb': 0.6585365853658536, 'model_baseline': 0.3414634146341463, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.216,250,0.41234,54.0,0.1,250,0.300602,25.0,0.216,0.216,0.658537
5,model_baseline,0.112,250,0.315999,28.0,0.148,250,0.355812,37.0,0.112,0.112,0.341463
0,model1_rf,0.0,250,0.0,0.0,0.208,250,0.406691,52.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.18,250,0.384958,45.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.328,250,0.470427,82.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.328,250,0.470427,82.0,0.0,0.0,0.0


Allocations after round 46 :
 {'model4_xgb': 0.6956521739130435, 'model_baseline': 0.30434782608695654, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.256,250,0.437297,64.0,0.08,250,0.271837,20.0,0.256,0.256,0.695652
5,model_baseline,0.112,250,0.315999,28.0,0.18,250,0.384958,45.0,0.112,0.112,0.304348
0,model1_rf,0.0,250,0.0,0.0,0.228,250,0.420384,57.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.24,250,0.42794,60.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.368,250,0.483229,92.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.368,250,0.483229,92.0,0.0,0.0,0.0


Allocations after round 47 :
 {'model4_xgb': 0.8023255813953488, 'model_baseline': 0.19767441860465115, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.276,250,0.447914,69.0,0.048,250,0.214195,12.0,0.276,0.276,0.802326
5,model_baseline,0.068,250,0.252251,17.0,0.172,250,0.378137,43.0,0.068,0.068,0.197674
0,model1_rf,0.0,250,0.0,0.0,0.228,250,0.420384,57.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.216,250,0.41234,54.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.344,250,0.475994,86.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.344,250,0.475994,86.0,0.0,0.0,0.0


Allocations after round 48 :
 {'model4_xgb': 0.8275862068965517, 'model_baseline': 0.1724137931034483, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.288,250,0.453739,72.0,0.06,250,0.237963,15.0,0.288,0.288,0.827586
5,model_baseline,0.06,250,0.237963,15.0,0.168,250,0.374616,42.0,0.06,0.06,0.172414
0,model1_rf,0.0,250,0.0,0.0,0.2,250,0.400802,50.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.22,250,0.415077,55.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.348,250,0.477292,87.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.348,250,0.477292,87.0,0.0,0.0,0.0


Allocations after round 49 :
 {'model4_xgb': 0.7710843373493975, 'model_baseline': 0.2289156626506024, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
3,model4_xgb,0.256,250,0.437297,64.0,0.076,250,0.26553,19.0,0.256,0.256,0.771084
5,model_baseline,0.076,250,0.26553,19.0,0.196,250,0.397765,49.0,0.076,0.076,0.228916
0,model1_rf,0.0,250,0.0,0.0,0.236,250,0.425474,59.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.248,250,0.432718,62.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.352,250,0.478552,88.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.352,250,0.478552,88.0,0.0,0.0,0.0


Allocations after round 50 :
 {'model_baseline': 1.0, 'model1_rf': 0.0, 'model2_lr': 0.0, 'model3_lr': 0.0, 'model4_xgb': 0.0, 'model5_lgbm': 0.0}


Unnamed: 0,model_id,target_reward_mean,target_reward_count,target_reward_std,target_reward_sum,target_regret_mean,target_regret_count,target_regret_std,target_regret_sum,score,exp_score,allocation
5,model_baseline,0.064,250,0.245244,16.0,0.172,250,0.378137,43.0,0.064,0.064,1.0
0,model1_rf,0.0,250,0.0,0.0,0.212,250,0.409545,53.0,0.0,0.0,0.0
1,model2_lr,0.0,250,0.0,0.0,0.204,250,0.403777,51.0,0.0,0.0,0.0
2,model3_lr,0.0,250,0.0,0.0,0.236,250,0.425474,59.0,0.0,0.0,0.0
3,model4_xgb,0.0,250,0.0,0.0,0.064,250,0.245244,16.0,0.0,0.0,0.0
4,model5_lgbm,0.0,250,0.0,0.0,0.236,250,0.425474,59.0,0.0,0.0,0.0


In [10]:
timesteps

{1: [{'model_id': 'model1_rf',
   'total_pool_size': 250,
   'rewards': 12.0,
   'regrets': 53.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model2_lr',
   'total_pool_size': 250,
   'rewards': 18.0,
   'regrets': 44.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model3_lr',
   'total_pool_size': 250,
   'rewards': 24.0,
   'regrets': 56.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model4_xgb',
   'total_pool_size': 250,
   'rewards': 17.0,
   'regrets': 10.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model5_lgbm',
   'total_pool_size': 250,
   'rewards': 13.0,
   'regrets': 39.0,
   'allocation': 0.16666666666666666},
  {'model_id': 'model_baseline',
   'total_pool_size': 250,
   'rewards': 20.0,
   'regrets': 53.0,
   'allocation': 0.16666666666666666}],
 2: [{'model_id': 'model1_rf',
   'total_pool_size': 250,
   'rewards': 14.0,
   'regrets': 50.0,
   'allocation': 0.11538461538461538},
  {'model_id': 'model2_lr',
   'total_pool_size

In [11]:
# write simulation sequence to a json file
with open('timesteps.json', 'w') as json_file:
    json.dump(timesteps, json_file)