# MAB experiment

In this notebook we will simulate a multi armed bandit experiment, using different approaches to solve it

In [1]:
import os
import sys
sys.path.append('/Users/fci02/Documents/GitHub/decisioning-analysis/test_and_learn/production_modules/')

import numpy as np
import pandas as pd

from scipy.stats import f_oneway
from tqdm import tqdm

from contextual_mab.experiments.framework import MABFramework
from contextual_mab.experiments.framework import run_experiment
from contextual_mab.data_utils.data_generation  import generate_experimental_dataset

import matplotlib.pyplot as plt
import seaborn as sns

## Experimental data generation

In [2]:
n_1 = 5000
noise_scale_1=5.

cw_1 = [.05,.03,.01]

ct_prm_1 = {'X1':{'loc':0.,'scale':1.},
          'X2':{'loc':10.,'scale':5.},
          'X3':{'loc':-.3,'scale':.5}}

ord_prm_1 = {'O1':{'start':1,'stop':11,'weights':[.1]*10},
           'O2':{'start':1,'stop':4,'weights':[.3,.4,.3]}}

catg_prm_1 = {'C1':{'levels':['Yes','No'],'weights':[.4,.6]},
            'C2':{'levels':['Green','Yellow'],'weights':[.2,.8]},
            'C3':{'levels':['A','B','C'],'weights':[.4,.1,.5]}}

weights_1 = [[-0.85427315,  1.16572722,  0.8890073,  -0.57988417, 0.15193386, 
              -0.28800726, -0.06606457,  0.36732494, -0.03666541, -0.61067511],
             [ 0.46719077, -1.64435559,  0.69791627, -0.14981489,  0.26023682,  
               0.67528998, 1.52150038,  1.05417964,  0.37329345,  0.79700709],
             [ 1.62075116, 0.15865047, -0.85728784,  1.23667642, -0.58707557,
               0.05713119, -0.47387454,  0.51293855, -0.55820087, -2.14815787]]

In [3]:
n_2 = 5000
cw_2 = [.02,.07,.03]

ct_prm_2 = {'X1':{'loc':5.,'scale':1.},
            'X2':{'loc':1.,'scale':5.},
            'X3':{'loc':1.3,'scale':.5}}

ord_prm_2 = {'O1':{'start':1,'stop':11,'weights':[.2,.2,.05,.05,.05,.05,.1,.1,.1,.1]},
             'O2':{'start':1,'stop':4,'weights':[.1,.6,.3]}}

catg_prm_2 = {'C1':{'levels':['Yes','No'],'weights':[.1,.9]},
              'C2':{'levels':['Green','Yellow'],'weights':[.5,.5]},
              'C3':{'levels':['A','B','C'],'weights':[.6,.2,.2]}}

noise_scale_2=5.

weights_2 = [[ 0.2249653,   0.54732847,  0.76620536,  0.7461608,  -0.76568111,
              -0.13241893, -1.82046231, -0.47742618, -0.96465132, -0.68848216],
             [-0.04717597,  1.49105257, -0.6332578,  -1.03206255, -1.30374031,
              -0.48575409, 0.01466847, 0.54927814,  0.72014772,  0.42807199],
             [-0.56907754, 0.04875765,  0.89346343,  0.62619356, -2.19116666,
              1.70168624, 0.34768686,  0.26208243,  1.27787397, -2.07476064]]

In [4]:
n_3 = 5000
cw_3 = [.01,.03,.06]

ct_prm_3 = {'X1':{'loc':-2.,'scale':1.},
            'X2':{'loc':-20.,'scale':5.},
            'X3':{'loc':11.3,'scale':.5}}

ord_prm_3 = {'O1':{'start':1,'stop':11,'weights':[.5,.0,.0,.0,.05,.05,.1,.1,.1,.1]},
             'O2':{'start':1,'stop':4,'weights':[.5,.1,.4]}}

catg_prm_3 = {'C1':{'levels':['Yes','No'],'weights':[.5,.5]},
              'C2':{'levels':['Green','Yellow'],'weights':[.8,.2]},
              'C3':{'levels':['A','B','C'],'weights':[.35,.35,.3]}}

noise_scale_3=5.

weights_3 = [[ 0.2249653,   0.54732847,  0.76620536,  0.7461608,  -0.76568111, 
              -0.13241893, -1.82046231, -0.47742618, -0.96465132, -0.68848216],
             [-0.04717597,  1.49105257, -0.6332578,  -1.03206255, -1.30374031,
              -0.48575409, 0.01466847,  0.54927814,  0.72014772,  0.42807199],
             [-0.56907754,  0.04875765,  0.89346343,  0.62619356, -2.19116666,
              1.70168624, 0.34768686,  0.26208243,  1.27787397, -2.07476064]]

In [5]:
seed = 0
experiment_data = generate_experimental_dataset([n_1,n_2,n_3],
                                                [cw_1,cw_2,cw_3],
                                                [ct_prm_1,ct_prm_2,ct_prm_3],
                                                [ord_prm_1,ord_prm_2,ord_prm_3],
                                                [catg_prm_1,catg_prm_2,catg_prm_3],
                                                [noise_scale_1,noise_scale_2,noise_scale_3],
                                                [weights_1,weights_2,weights_3],
                                                seed,
                                                output_info=True)

Group 1


action_1_reward    0.0416
action_2_reward    0.0290
action_3_reward    0.0052
dtype: float64

Group 2


action_1_reward    0.0188
action_2_reward    0.0676
action_3_reward    0.0260
dtype: float64

Group 3


action_1_reward    0.0096
action_2_reward    0.0300
action_3_reward    0.0464
dtype: float64

Overall


action_1_reward    0.023333
action_2_reward    0.042200
action_3_reward    0.025867
dtype: float64

## Single run

### ABC greedy

In [6]:
batch_size = 2000

run_experiment(experiment_data, batch_size, MABFramework,{'strategy':'static-one-fits-all','n_actions':3,'static_min_steps':2})

100%|██████████| 7/7 [00:00<00:00, 70.39it/s]


0.038066666666666665

### Thompson Sampling

In [7]:
run_experiment(experiment_data, batch_size, MABFramework,{'strategy':'dynamic-one-fits-all','n_actions':3,'alphas':[1.,1.,1.],'betas':[1.,1.,1.]})

100%|██████████| 7/7 [00:00<00:00, 85.74it/s]


0.04033333333333333

### Contextual Thompson Sampling
#### Bayesian Logistic

In [8]:
from contextual_mab.predictive_models.bayesian_logistic import BayesianLogisticRegression

In [9]:
action_cols = [column for column in experiment_data.columns if 'action' in column]
predictors = pd.get_dummies(experiment_data.drop(columns=action_cols),drop_first=True).columns.tolist()
predictors

['X1', 'X2', 'X3', 'O1', 'O2', 'C1_Yes', 'C2_Yellow', 'C3_B', 'C3_C']

In [10]:
run_experiment(experiment_data, batch_size, MABFramework,{'strategy':'contextual-one-fits-one','n_actions':3,
                                                          'modelling_approach':BayesianLogisticRegression,
                                                          'modelling_approach_pms':{'n_samples':500,'n_chains':2,
                                                                                    'predictors':predictors.copy(),
                                                                                    'tune':1000,
                                                                                    'check_prog':False}})

  0%|          | 0/7 [00:00<?, ?it/s]Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [C3_C, C3_B, C2_Yellow, C1_Yes, O2, O1, X3, X2, X1, Intercept]
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [C3_C, C3_B, C2_Yellow, C1_Yes, O2, O1, X3, X2, X1, Intercept]
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [C3_C, C3_B, C2_Yellow, C1_Yes, O2, O1, X3, X2, X1, Intercept]
 29%|██▊       | 2/7 [00:42<01:47, 21.47s/it]Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [C3_C, C3_B, C2_Yellow, C1_Yes, O2, O1, X3, X2, X1, Intercept]
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [C3_C, C3_B, C2_Yellow, C1_Yes, O2, O1, X3, X2, X1, Intercept]
Auto-assignin

0.058133333333333335

#### Bootstrap Oracles

In [11]:
from contextual_mab.predictive_models.oracles import BootstrapOracle
from sklearn.linear_model import LogisticRegression

In [12]:
logistic_params = {'solver':'lbfgs',
                   'max_iter':500,
                   'random_state':0}

In [13]:
run_experiment(experiment_data, batch_size, MABFramework,{'strategy':'contextual-one-fits-one','n_actions':3,
                                                          'modelling_approach':BootstrapOracle,
                                                          'modelling_approach_pms':{'n_bootstrap':1000,
                                                                                    'learner_class':LogisticRegression,
                                                                                    'learner_class_params':logistic_params,
                                                                                    'check_prog':False}})

100%|██████████| 7/7 [05:25<00:00, 56.02s/it]


0.05926666666666667

## Running many experiments

In [14]:
batch_size = 2000

In [15]:
abc_approval_rates = []
for seed in range(10):
    experiment_data = generate_experimental_dataset([n_1,n_2,n_3],
                                                [cw_1,cw_2,cw_3],
                                                [ct_prm_1,ct_prm_2,ct_prm_3],
                                                [ord_prm_1,ord_prm_2,ord_prm_3],
                                                [catg_prm_1,catg_prm_2,catg_prm_3],
                                                [noise_scale_1,noise_scale_2,noise_scale_3],
                                                [weights_1,weights_2,weights_3],
                                                seed)
    abc_approval_rates.append(run_experiment(experiment_data, batch_size, MABFramework,{'strategy':'static-one-fits-all','n_actions':3,'static_min_steps':2}))

100%|██████████| 7/7 [00:00<00:00, 100.57it/s]
100%|██████████| 7/7 [00:00<00:00, 96.38it/s]
100%|██████████| 7/7 [00:00<00:00, 98.88it/s]
100%|██████████| 7/7 [00:00<00:00, 100.06it/s]
100%|██████████| 7/7 [00:00<00:00, 99.99it/s]
100%|██████████| 7/7 [00:00<00:00, 103.35it/s]
100%|██████████| 7/7 [00:00<00:00, 99.58it/s]
100%|██████████| 7/7 [00:00<00:00, 93.36it/s]
100%|██████████| 7/7 [00:00<00:00, 91.86it/s]
100%|██████████| 7/7 [00:00<00:00, 99.69it/s]


In [16]:
ts_approval_rates = []
for seed in range(10):
    experiment_data = generate_experimental_dataset([n_1,n_2,n_3],
                                                [cw_1,cw_2,cw_3],
                                                [ct_prm_1,ct_prm_2,ct_prm_3],
                                                [ord_prm_1,ord_prm_2,ord_prm_3],
                                                [catg_prm_1,catg_prm_2,catg_prm_3],
                                                [noise_scale_1,noise_scale_2,noise_scale_3],
                                                [weights_1,weights_2,weights_3],
                                                seed)
    ts_approval_rates.append(run_experiment(experiment_data, batch_size, MABFramework,{'strategy':'dynamic-one-fits-all','n_actions':3,'alphas':[1.,1.,1.],'betas':[1.,1.,1.]}))

100%|██████████| 7/7 [00:00<00:00, 94.99it/s]
100%|██████████| 7/7 [00:00<00:00, 97.08it/s]
100%|██████████| 7/7 [00:00<00:00, 97.71it/s]
100%|██████████| 7/7 [00:00<00:00, 96.70it/s]
100%|██████████| 7/7 [00:00<00:00, 97.18it/s]
100%|██████████| 7/7 [00:00<00:00, 98.32it/s]
100%|██████████| 7/7 [00:00<00:00, 95.63it/s]
100%|██████████| 7/7 [00:00<00:00, 98.21it/s]
100%|██████████| 7/7 [00:00<00:00, 97.85it/s]
100%|██████████| 7/7 [00:00<00:00, 93.42it/s]


In [17]:
from statsmodels.stats.weightstats import ztest
ztest(abc_approval_rates,ts_approval_rates)

(-7.372048750698871, 1.6802554565386437e-13)