In [166]:
import pandas as pd
import numpy as np
import re
import datetime
import os
import time
import json

from darts.allocation import Allocator
from darts.bandit import Bandit

In [167]:
# load in the data for the target pool
start = time.time()
file_loc = 'data/'
allocation_pool_df = pd.read_csv(file_loc + 'target_voter_universe.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.024604082107543945 seconds


In [168]:
allocation_pool_df.head(10)

Unnamed: 0,target_id,model_id,probability,target_round,target_result,target_reward,target_regret
0,0,model1_rf,0.0,0,0,0,0
1,0,model2_lr,1.0,0,0,0,0
2,0,model3_lr,0.0,0,0,0,0
3,0,model4_xgb,1.0,0,0,0,0
4,0,model5_lgbm,0.0,0,0,0,0
5,0,model_baseline,1.0,0,0,0,0
6,1,model1_rf,0.0,0,0,0,0
7,1,model2_lr,0.0,0,0,0,0
8,1,model3_lr,0.0,0,0,0,0
9,1,model4_xgb,0.0,0,0,0,0


In [169]:
(allocation_pool_df.shape[0]/6)/250

54.54

In [170]:
# load in our target results
start = time.time()
file_loc = 'data/'
target_results_df = pd.read_csv(file_loc + 'voter_responses.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.0019414424896240234 seconds


In [171]:
target_results_df.head(10)

Unnamed: 0,target_id,target_result
0,0,1
1,1,0
2,3,0
3,4,0
4,16,1
5,22,0
6,26,0
7,27,1
8,29,0
9,31,0


In [172]:
# Specify the column that indicates the id of an individual target
target_id_col = 'target_id'

# Specify the column that provides the confidence ranking (e.g. probability)
# that any of the models have on this target resulting in a 'reward'
# (e.g. the target outcome of the model)
confidence_rank_col = 'probability'

# Specify the column containing the names of the different 'arms' or
# models we're using in the target pool
arm_name_col = 'model_id'

# Specify column to track the pool round
pool_round_col = 'target_round'

# Specify the column that indicates the 'result'
result_col = 'target_result'

# Specify the column that indicates the 'reward'
reward_col = 'target_reward'

# Specify the column that indicates the 'regret'
regret_col = 'target_regret'

# Specify an initial allocation method across the pool of models. These must sum to 1.
# For the simulation, we'll start with an equal allocation for each model.
allocation_method = {
    'model_baseline': (1/6),
    'model1_rf': (1/6),
    'model2_lr': (1/6),
    'model3_lr': (1/6),
    'model4_xgb': (1/6),
    'model5_lgbm': (1/6)
}

# Specify number of rounds to simulate
simulation_rounds = 50

# Specify the number of targets to pull in the first round.
num_targets = 250

In [173]:
target_results_df = target_results_df.set_index([target_id_col])

In [174]:
timesteps = {}

for allocation_round in range(1, simulation_rounds+1):
    
    remaining_allocation_pool_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col]==0])

    # set up an allocator
    # TODO: I think this should be in the sequence of:
    #  allocation_pool_df, arm_name_col, target_id_col, confidence_rank_col, allocation_method, strategy, order
    # TODO: Would be good to explain that Allocator introduces a picked column to the dataframe. This is masked behavior.
    #       I may rely on this column if I know about it, but not sure if it will ever go away?
    allocator = Allocator(allocation_method, num_targets, remaining_allocation_pool_df, arm_name_col,
                          confidence_rank_col, target_id_col, strategy='round-robin',
                          order='best') # another idea is to provide a dictionary mapping to simplify the calling interface

    # retreive targets
    targets = allocator.allocate_pool() # TODO: can we change this to allocator.retrieve_targets(num_targets) ? Also, might be nice to have a DF return option

    # convert to dataframe
    targets_df = pd.DataFrame(targets, columns=[target_id_col, arm_name_col])
    targets_df[pool_round_col] = allocation_round
    targets_df = targets_df[[target_id_col, pool_round_col]]

    # join targets with universe of allocations
    targets_df = targets_df.set_index([target_id_col])
    allocation_pool_df = allocation_pool_df.set_index([target_id_col])
    allocation_pool_df.update(targets_df)
    targets_df = targets_df.reset_index(level=0)
    allocation_pool_df = allocation_pool_df.reset_index(level=0)

    # evaluate this allocation pool with the bandit
    last_allocation_pool_df = allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round]

    # sync up results
    last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col])
    last_allocation_pool_df.update(target_results_df)
    last_allocation_pool_df = last_allocation_pool_df.reset_index(level=0)

    # update results in the master pool set
    allocation_pool_df = allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, regret_col])
    last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, regret_col])
    allocation_pool_df.update(last_allocation_pool_df)
    allocation_pool_df = allocation_pool_df.reset_index()

    TP = allocation_pool_df[result_col] * allocation_pool_df[confidence_rank_col]
    TN = (1 - allocation_pool_df[result_col]) * (1 - allocation_pool_df[confidence_rank_col])
    FN = allocation_pool_df[result_col] * (1 - allocation_pool_df[confidence_rank_col])
    FP = (1 - allocation_pool_df[result_col]) * allocation_pool_df[confidence_rank_col] 

    # update rewards from the results we synced
    allocation_pool_df[reward_col] = TP

    # update regrets from the results we synced
    # regrets are false positives + false negatives (type 1 and type 2 error)
    allocation_pool_df[regret_col] = FP + FP

    # prepare a dataframe with just our results of this round for the bandit to evaluate
    results_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round])
    
    # prepare dictionary of results for timestep tracking
    stats = results_df[[arm_name_col, reward_col, regret_col]].groupby(arm_name_col).agg({reward_col:['count','sum'],
                                                                                          regret_col:['sum']})
    stats.columns = stats.columns.to_flat_index()
    stats.columns = ['_'.join(tup).rstrip('_') for tup in stats.columns.values]
    stats = pd.DataFrame(stats).reset_index()
    stats.rename(columns={'target_reward_count': 'total_pool_size', 'target_reward_sum': 'rewards', 'target_regret_sum':'regrets'}, inplace=True)
    stats = stats.set_index([arm_name_col])
    stats['allocation'] = 0.0
    allocs = pd.DataFrame.from_dict(allocation_method, orient='index', columns = ['allocation'])
    allocs.index.name=arm_name_col
    stats.update(allocs)
    stats = stats.reset_index()
    timesteps[allocation_round] = stats.to_dict(orient='records')

    # set up a multi-arm bandit and calculate allocations to each arm.
    bandit = Bandit(results_df, arm_name_col, reward_col, regret_col, policy = 'Bayes_UCB')

    # use these allocations for the next round
    allocation_method = bandit.get_new_allocations()# bandit.make_allocs().set_index(arm_name_col)['allocation'].to_dict()
    print("Allocations from round", allocation_round, ":\n", allocation_method)
    #display(bandit.get_allocation_stats())

    # useful for outputting the result of each timestep into its own directory
    # and keeping an updated master file
    
    
    #allocation_pool_df[[target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, result_col]].sort_values(by=[target_id_col,arm_name_col]).to_csv(file_loc + 'simulated_target_universe_updates.csv', index=False)
    
    #round_loc = file_loc + 'round_{:05}/'.format(allocation_round)
    #if not os.path.exists(round_loc):
    #    os.makedirs(round_loc)
    #    
    #targets_df.sort_values(by=[target_id_col])[target_id_col].to_csv(round_loc + 'targets.csv', index=False)
    #allocation_pool_df[[target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, result_col]].sort_values(by=[target_id_col,arm_name_col]).to_csv(round_loc + 'simulated_allocation_round_{:05}'.format(allocation_round) +'.csv', index=False)
    

Statistics from round 1 :
 {'model4_xgb': 0.2432972490632955, 'model5_lgbm': 0.17542172568363634, 'model2_lr': 0.16346519292097564, 'model_baseline': 0.14171325724470418, 'model1_rf': 0.14171325724470415, 'model3_lr': 0.13438931784268407}
Statistics from round 2 :
 {'model4_xgb': 0.23945849006685005, 'model5_lgbm': 0.1757458806131036, 'model2_lr': 0.16490676633437204, 'model1_rf': 0.160550958243706, 'model_baseline': 0.13858350997884208, 'model3_lr': 0.12075439476312623}
Statistics from round 3 :
 {'model4_xgb': 0.24623573978615584, 'model5_lgbm': 0.1992260827547115, 'model2_lr': 0.1533385361516783, 'model1_rf': 0.14867834237471825, 'model_baseline': 0.13931299914871087, 'model3_lr': 0.1132082997840253}
Statistics from round 4 :
 {'model4_xgb': 0.23210412832923263, 'model5_lgbm': 0.1999596671569204, 'model2_lr': 0.1723409047981013, 'model1_rf': 0.1672727724193213, 'model_baseline': 0.13132323885915712, 'model3_lr': 0.09699928843726718}
Statistics from round 5 :
 {'model4_xgb': 0.252135

In [178]:
# write simulation sequence to a json file
with open('timesteps.json', 'w') as json_file:
    json.dump(timesteps, json_file)