In [83]:
import pandas as pd
import numpy as np
import re
import datetime
import os
import time
import json

from darts.allocation import Allocator
from darts.bandit import Bandit

In [84]:
# load in the data for the target pool
start = time.time()
file_loc = 'data/'
allocation_pool_df = pd.read_csv(file_loc + 'target_voter_universe.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.028928518295288086 seconds


In [85]:
allocation_pool_df.head(10)

Unnamed: 0,target_id,model_id,probability,target_round,target_result,target_reward,target_regret
0,0,model1_rf,0.0,0,0,0,0
1,0,model2_lr,1.0,0,0,0,0
2,0,model3_lr,0.0,0,0,0,0
3,0,model4_xgb,1.0,0,0,0,0
4,0,model5_lgbm,0.0,0,0,0,0
5,0,model_baseline,1.0,0,0,0,0
6,1,model1_rf,0.0,0,0,0,0
7,1,model2_lr,0.0,0,0,0,0
8,1,model3_lr,0.0,0,0,0,0
9,1,model4_xgb,0.0,0,0,0,0


In [86]:
(allocation_pool_df.shape[0]/6)/250

54.54

In [87]:
# load in our target results
start = time.time()
file_loc = 'data/'
target_results_df = pd.read_csv(file_loc + 'voter_responses.csv')
print(f"... completed in {time.time() - start} seconds")

... completed in 0.007098674774169922 seconds


In [88]:
target_results_df.head(10)

Unnamed: 0,target_id,target_result
0,0,1
1,1,0
2,3,0
3,4,0
4,16,1
5,22,0
6,26,0
7,27,1
8,29,0
9,31,0


In [89]:
# Specify the column that indicates the id of an individual target
target_id_col = 'target_id'

# Specify the column that provides the confidence ranking (e.g. probability)
# that any of the models have on this target resulting in a 'reward'
# (e.g. the target outcome of the model)
confidence_rank_col = 'probability'

# Specify the column containing the names of the different 'arms' or
# models we're using in the target pool
arm_name_col = 'model_id'

# Specify column to track the pool round
pool_round_col = 'target_round'

# Specify the column that indicates the 'result'
result_col = 'target_result'

# Specify the column that indicates the 'reward'
reward_col = 'target_reward'

# Specify the column that indicates the 'regret'
regret_col = 'target_regret'

# Specify the column that indicates which arm was 'picked'
picked_col = 'model_picked'

# Specify an initial allocation method across the pool of models. These must sum to 1.
# For the simulation, we'll start with an equal allocation for each model.
allocation_method = {
    'model_baseline': (1/6),
    'model1_rf': (1/6),
    'model2_lr': (1/6),
    'model3_lr': (1/6),
    'model4_xgb': (1/6),
    'model5_lgbm': (1/6)
}

# Specify number of rounds to simulate
simulation_rounds = 50

# Specify the number of targets to pull in the first round.
num_targets = 250

# Specify the allocation policy
allocation_policy = 'UCB1'

# Specify the allocation strategy
allocation_strategy = 'round-robin'

# Specify the allocation order
allocation_order = 'best'

# bandit parameters
ucb_scale = 1.96
epsilon = 0.1
greed_factor = 1

In [90]:
# Index the file of responses for updating of results
target_results_df = target_results_df.set_index([target_id_col])

In [91]:
permutations = {
    1: {'policy': 'UCB1',
        'options': {},
        'allocations': [('round-robin', 'best'), ('round-robin', 'random'), ('greedy', 'best')]
       },
    2: {'policy': 'Bayes_UCB',
        'options': {'ucb_scale': [1, 2, 3]},
        'allocations': [('round-robin', 'best'), ('round-robin', 'random'), ('greedy', 'best')]
       },
    3: {'policy': 'epsilon_greedy',
        'options': {'episilon': [0.05, 0.10, 0.25],
                    'greed_factor': [1, 10, 100]},
        'allocations': [('round-robin', 'best'), ('round-robin', 'random'), ('greedy', 'best')]
       }
}


In [92]:
def make_perm_idx(idx, policy, ucb_scale, epsilon, greed_factor, strategy, order):
    return tuple({
        'perm_idx': idx,
        'policy': policy,
        'ucb_scale': ucb_scale,
        'epsilon': epsilon,
        'greed_factor': greed_factor,
        'strategy': strategy,
        'order': order
    }.items())

In [95]:
def perform_simulation(allocation_pool_df, target_results_df, allocation_method, allocation_policy, ucb_scale, epsilon, greed_factor, allocation_strategy, allocation_order):
    # Set up defaults - nothing picked yet
    allocation_pool_df[picked_col] = 0
    allocation_pool_df[pool_round_col] = 0
    allocation_pool_df[reward_col] = 0
    allocation_pool_df[regret_col] = 0
    target_reward_cummulative_sum = 0
    target_regret_cummulative_sum = 0

    timesteps = {}

    for allocation_round in range(1, simulation_rounds+1):

        remaining_allocation_pool_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col]==0])

        # set up an allocator
        # TODO: I think this should be in the sequence of:
        #  allocation_pool_df, arm_name_col, target_id_col, confidence_rank_col, allocation_method, strategy, order
        # TODO: Would be good to explain that Allocator introduces a picked column to the dataframe. This is masked behavior.
        #       I may rely on this column if I know about it, but not sure if it will ever go away?
        allocator = Allocator(allocation_method, num_targets, remaining_allocation_pool_df, arm_name_col,
                              confidence_rank_col, target_id_col, strategy=allocation_strategy,
                              order=allocation_order) # another idea is to provide a dictionary mapping to simplify the calling interface

        # retreive targets
        targets = allocator.allocate_pool() # TODO: can we change this to allocator.retrieve_targets(num_targets) ? Also, might be nice to have a DF return option

        # convert to dataframe
        targets_df = pd.DataFrame(targets, columns=[target_id_col, arm_name_col])
        targets_df[pool_round_col] = allocation_round
        targets_df = targets_df[[target_id_col, pool_round_col]]

        # join targets with universe of allocations
        targets_df = targets_df.set_index([target_id_col])
        allocation_pool_df = allocation_pool_df.set_index([target_id_col])
        allocation_pool_df.update(targets_df)
        allocation_pool_df = allocation_pool_df.reset_index()   

        # add indicator for the arm we picked
        target_arm_picked_df = pd.DataFrame(targets, columns=[target_id_col, arm_name_col])
        target_arm_picked_df[picked_col] = 1
        target_arm_picked_df = target_arm_picked_df[[target_id_col, arm_name_col, picked_col]]

        # join target model we picked with the universe of allocations
        target_arm_picked_df = target_arm_picked_df.set_index([target_id_col, arm_name_col])
        allocation_pool_df = allocation_pool_df.set_index([target_id_col, arm_name_col])
        allocation_pool_df.update(target_arm_picked_df)
        allocation_pool_df = allocation_pool_df.reset_index()

        # evaluate this allocation pool with the bandit
        last_allocation_pool_df = allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round]

        # sync up results
        last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col])
        last_allocation_pool_df.update(target_results_df)
        last_allocation_pool_df = last_allocation_pool_df.reset_index(level=0)

        # update results in the master pool set
        allocation_pool_df = allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, regret_col, picked_col])
        last_allocation_pool_df = last_allocation_pool_df.set_index([target_id_col, arm_name_col, confidence_rank_col, pool_round_col, reward_col, regret_col, picked_col])
        allocation_pool_df.update(last_allocation_pool_df)
        allocation_pool_df = allocation_pool_df.reset_index()

        TP = allocation_pool_df[result_col] * allocation_pool_df[confidence_rank_col] * allocation_pool_df[picked_col]
        TN = (1 - allocation_pool_df[result_col]) * (1 - allocation_pool_df[confidence_rank_col]) * (1 - allocation_pool_df[picked_col])
        FN = allocation_pool_df[result_col] * (1 - allocation_pool_df[confidence_rank_col]) * (1 - allocation_pool_df[picked_col])
        FP = (1 - allocation_pool_df[result_col]) * allocation_pool_df[confidence_rank_col] * allocation_pool_df[picked_col]

        # update rewards from the results we synced
        allocation_pool_df[reward_col] = TP

        # update regrets from the results we synced
        # regrets can be false positives or false negatives (type 1 and type 2 error)
        allocation_pool_df[regret_col] = FN

        # prepare a dataframe with just our results of this round for the bandit to evaluate
        results_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round])

        if results_df[(results_df[result_col]==1)&(results_df[reward_col]==1)].shape[0] == 0:
            # no more results left after a steady state - reset selections to include possibilities from the other models

            TP = allocation_pool_df[result_col] * allocation_pool_df[confidence_rank_col]
            TN = (1 - allocation_pool_df[result_col]) * (1 - allocation_pool_df[confidence_rank_col])
            FN = allocation_pool_df[result_col] * (1 - allocation_pool_df[confidence_rank_col])
            FP = (1 - allocation_pool_df[result_col]) * allocation_pool_df[confidence_rank_col]

            # update rewards from the results we synced
            allocation_pool_df[reward_col] = TP

            # update regrets from the results we synced
            # regrets can be false positives or false negatives (type 1 and type 2 error)
            allocation_pool_df[regret_col] = FN

            # prepare a dataframe with just our results of this round for the bandit to evaluate
            results_df = pd.DataFrame.copy(allocation_pool_df[allocation_pool_df[pool_round_col] == allocation_round])


        # prepare dictionary of results for timestep tracking
        stats = results_df[[arm_name_col, reward_col, regret_col]].groupby(arm_name_col).agg({reward_col:['count','sum'],
                                                                                              regret_col:['sum']})
        stats.columns = stats.columns.to_flat_index()
        stats.columns = ['_'.join(tup).rstrip('_') for tup in stats.columns.values]
        stats = pd.DataFrame(stats).reset_index()
        stats.rename(columns={'target_reward_count': 'total_pool_size', 'target_reward_sum': 'rewards', 'target_regret_sum':'regrets'}, inplace=True)
        stats = stats.set_index([arm_name_col])
        stats['allocation'] = 0.0
        allocs = pd.DataFrame.from_dict(allocation_method, orient='index', columns = ['allocation'])
        allocs.index.name=arm_name_col
        stats.update(allocs)
        stats = stats.reset_index()
        target_reward_cummulative_sum = pd.Series(target_reward_cummulative_sum + stats['rewards'])
        stats['cummulative_rewards'] = target_reward_cummulative_sum
        target_regret_cummulative_sum = pd.Series(target_regret_cummulative_sum + stats['regrets'])
        stats['cummulative_regrets'] = target_regret_cummulative_sum
        timesteps[allocation_round] = stats.to_dict(orient='records')

        # set up a multi-arm bandit and calculate allocations to each arm.
        bandit = Bandit(results_df, arm_name_col, reward_col, regret_col, policy = allocation_policy, t = allocation_round, ucb_scale = ucb_scale, epsilon = epsilon, greed_factor = greed_factor)

        # use these allocations for the next round
        allocation_method = bandit.get_new_allocations()# bandit.make_allocs().set_index(arm_name_col)['allocation'].to_dict()
        
    print("Allocations after round", allocation_round, ":\n", allocation_method)
    #display(bandit.get_allocation_stats())
    
    return timesteps

In [96]:
i = 1
simulations = {}
for _, v in permutations.items():
    policy = v['policy']
    if policy == 'UCB1':
        for strategy, order in v['allocations']:
            perm_idx = make_perm_idx(i, policy, None, None, None, strategy, order)
            print(perm_idx)
            simulations[perm_idx] = perform_simulation(allocation_pool_df, target_results_df, allocation_method, policy,
                                                       ucb_scale, epsilon, greed_factor, strategy, order)
            i = i + 1
    if policy == 'Bayes_UCB':
        for strategy, order in v['allocations']:
            for ucb_scale in v['options']['ucb_scale']:
                perm_idx = make_perm_idx(i, policy, ucb_scale, None, None, strategy, order)
                print(perm_idx)
                simulations[perm_idx] = perform_simulation(allocation_pool_df, target_results_df, allocation_method, policy,
                                                           ucb_scale, epsilon, greed_factor, strategy, order)
                i = i + 1
    if policy == 'epsilon_greedy':
        for strategy, order in v['allocations']:
            for episilon in v['options']['episilon']:
                for greed_factor in v['options']['greed_factor']:
                    perm_idx = make_perm_idx(i, policy, None, episilon, greed_factor, strategy, order)
                    print(perm_idx)
                    simulations[perm_idx] = perform_simulation(allocation_pool_df, target_results_df, allocation_method, policy,
                                                               ucb_scale, epsilon, greed_factor, strategy, order)
                    i = i + 1                   

(('perm_idx', 1), ('policy', 'UCB1'), ('ucb_scale', None), ('epsilon', None), ('greed_factor', None), ('strategy', 'round-robin'), ('order', 'best'))
Allocations after round 50 :
 {'model1_rf': 0.17750185935610696, 'model2_lr': 0.17750185935610696, 'model5_lgbm': 0.17027839756314678, 'model3_lr': 0.16305493577018657, 'model4_xgb': 0.15583147397722638, 'model_baseline': 0.15583147397722638}
(('perm_idx', 2), ('policy', 'UCB1'), ('ucb_scale', None), ('epsilon', None), ('greed_factor', None), ('strategy', 'round-robin'), ('order', 'random'))
Allocations after round 50 :
 {'model4_xgb': 0.1964696828677392, 'model1_rf': 0.16666666666666666, 'model2_lr': 0.1624090929236563, 'model5_lgbm': 0.1624090929236563, 'model_baseline': 0.15815151918064596, 'model3_lr': 0.1538939454376356}
(('perm_idx', 3), ('policy', 'UCB1'), ('ucb_scale', None), ('epsilon', None), ('greed_factor', None), ('strategy', 'greedy'), ('order', 'best'))
Allocations after round 50 :
 {'model1_rf': 0.19989420882502837, 'model

In [98]:
def key_to_json(data):
    if data is None or isinstance(data, (bool, int, str)):
        return data
    if isinstance(data, (tuple, frozenset)):
        return str(data)
    raise TypeError

def to_json(data):
    if data is None or isinstance(data, (bool, int, tuple, range, str, list)):
        return data
    if isinstance(data, (set, frozenset)):
        return sorted(data)
    if isinstance(data, dict):
        return {key_to_json(key): to_json(data[key]) for key in data}
    raise TypeError

In [99]:
# write simulation sequence to a json file
with open('simulations.json', 'w') as json_file:
    json.dump(to_json(simulations), json_file)