In [1]:
import pandas as pd
import numpy as np
import re
import datetime
import os
import time

from darts.allocation import Allocator
from darts.bandit import Bandit

In [2]:
# load in the data for the target pool
start = time.time()
file_loc = 'data/'
allocation_pool_df = pd.read_csv(file_loc + 'target_voter_universe.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.018276453018188477 seconds


In [3]:
allocation_pool_df.head(10)

Unnamed: 0,target_id,model_id,probability,target_round,target_result,target_reward
0,0,model1_rf,0.0,0,0,0
1,0,model2_lr,1.0,0,0,0
2,0,model3_lr,0.0,0,0,0
3,0,model4_xgb,1.0,0,0,0
4,0,model5_lgbm,0.0,0,0,0
5,0,model_baseline,1.0,0,0,0
6,1,model1_rf,0.0,0,0,0
7,1,model2_lr,0.0,0,0,0
8,1,model3_lr,0.0,0,0,0
9,1,model4_xgb,0.0,0,0,0


In [4]:
(allocation_pool_df.shape[0]/6)/250

54.54

In [5]:
# load in our target results
start = time.time()
file_loc = 'data/'
target_results_df = pd.read_csv(file_loc + 'voter_responses.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.003355741500854492 seconds


In [6]:
target_results_df.head(10)

Unnamed: 0,target_id,target_result
0,0,1
1,1,0
2,3,0
3,4,0
4,16,1
5,22,0
6,26,0
7,27,1
8,29,0
9,31,0


In [7]:
# Specify the column that indicates the id of an individual target
target_id_col = 'target_id'

# Specify the column that provides the confidence ranking (e.g. probability)
# that any of the models have on this target resulting in a 'reward'
# (e.g. the target outcome of the model)
confidence_rank_col = 'probability'

# Specify the column containing the names of the different 'arms' or
# models we're using in the target pool
arm_name_col = 'model_id'

# Specify column to track the pool round
pool_round_col = 'target_round'

# Specify the column that indicates the 'result'
result_col = 'target_result'

# Specify the column that indicates the 'reward'
reward_col = 'target_reward'

# Specify an initial allocation method across the pool of models. These must sum to 1.
# For the simulation, we'll start with an equal allocation for each model.
allocation_method = {
    'model_baseline': (1/6),
    'model1_rf': (1/6),
    'model2_lr': (1/6),
    'model3_lr': (1/6),
    'model4_xgb': (1/6),
    'model5_lgbm': (1/6)
}

# Specify number of rounds to simulate
simulation_rounds = 50

# Specify the number of targets to pull in the first round.
num_targets = 250

In [8]:
# This is our voter response file. Here is where we retrieve our results after a targetting round to see how we did.
# We'll set the index to the target_id for easy joining
target_results_df = target_results_df.set_index([target_id_col])

In [9]:
for allocation_round in range(1, simulation_rounds+1):   

    remaining_allocation_pool_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col]==0])
    
    # set up an allocator
    # TODO: I think this should be in the sequence of:
    #  allocation_pool_df, arm_name_col, target_id_col, confidence_rank_col, allocation_method, strategy, order
    # TODO: Would be good to explain that Allocator introduces a picked column to the dataframe. This is masked behavior.
    #       I may rely on this column if I know about it, but not sure if it will ever go away?
    allocator = Allocator(allocation_method, num_targets, remaining_allocation_pool_df, arm_name_col,
                          confidence_rank_col, target_id_col, strategy='round-robin',
                          order='best') # another idea is to provide a dictionary mapping to simplify the calling interface

    # retreive targets
    targets = allocator.allocate_pool() # TODO: can we change this to allocator.retrieve_targets(num_targets) ? Also, might be nice to have a DF return option

    # convert to dataframe
    targets_df = pd.DataFrame(targets, columns=[target_id_col, arm_name_col])
    targets_df[pool_round_col] = allocation_round
    targets_df = targets_df[[target_id_col, pool_round_col]]
    
    # join targets with universe of allocations
    targets_df = targets_df.set_index([target_id_col])
    allocation_pool_df = allocation_pool_df.set_index([target_id_col])
    allocation_pool_df.update(targets_df)
    targets_df = targets_df.reset_index(level=0)
    allocation_pool_df = allocation_pool_df.reset_index(level=0)
    
    # evaluate this allocation pool with the bandit
    last_allocation_pool_df = allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round]

    # sync up results
    last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col])
    last_allocation_pool_df.update(target_results_df)
    last_allocation_pool_df = last_allocation_pool_df.reset_index(level=0)

    # update results in the master pool set
    allocation_pool_df = allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col])
    last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col])
    allocation_pool_df.update(last_allocation_pool_df)
    allocation_pool_df = allocation_pool_df.reset_index()

    # update rewards from the results we synced
    allocation_pool_df[reward_col] = allocation_pool_df[result_col] * allocation_pool_df[confidence_rank_col]

    # prepare a dataframe with just our results of this round for the bandit to evaluate
    results_df = allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round]

    # set up a multi-arm bandit and calculate allocations to each arm.
    bandit = Bandit(results_df, arm_name_col, reward_col, policy = 'Bayes_UCB')

    # use these allocations for the next round
    allocation_method = bandit.make_allocs().set_index(arm_name_col)['allocation'].to_dict()
    print("Allocations from round", allocation_round, ":\n", bandit.allocs)
    
    # useful for outputting the result of each timestep into its own directory
    # and keeping an updated master file
    
    
    #allocation_pool_df[[target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, result_col]].sort_values(by=[target_id_col,arm_name_col]).to_csv(file_loc + 'simulated_target_universe_updates.csv', index=False)
    
    #round_loc = file_loc + 'round_{:05}/'.format(allocation_round)
    #if not os.path.exists(round_loc):
    #    os.makedirs(round_loc)
    #    
    #targets_df.sort_values(by=[target_id_col])[target_id_col].to_csv(round_loc + 'targets.csv', index=False)
    #allocation_pool_df[[target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, result_col]].sort_values(by=[target_id_col,arm_name_col]).to_csv(round_loc + 'simulated_allocation_round_{:05}'.format(allocation_round) +'.csv', index=False)
    

Allocations from round 1 :
          model_id   mean  count       std     score  exp_score  allocation
3      model4_xgb  0.376    250  0.485352  0.436165   0.436165    0.243297
4     model5_lgbm  0.260    250  0.439514  0.314483   0.314483    0.175422
1       model2_lr  0.240    250  0.427940  0.293048   0.293048    0.163465
5  model_baseline  0.204    250  0.403777  0.254053   0.254053    0.141713
0       model1_rf  0.204    250  0.403777  0.254053   0.254053    0.141713
2       model3_lr  0.192    250  0.394663  0.240923   0.240923    0.134389
Allocations from round 2 :
          model_id   mean  count       std     score  exp_score  allocation
3      model4_xgb  0.408    250  0.492449  0.469045   0.469045    0.239458
4     model5_lgbm  0.288    250  0.453739  0.344246   0.344246    0.175746
1       model2_lr  0.268    250  0.443806  0.323015   0.323015    0.164907
0       model1_rf  0.260    250  0.439514  0.314483   0.314483    0.160551
5  model_baseline  0.220    250  0.415077  0