In [1]:
from mab_todel import MAB
import numpy as np
from defaults import N_ARMS
# N_ARMS = 3

In [14]:
# init state body
state = dict.fromkeys([str(x) for x in np.arange(1, N_ARMS + 1)])
state

{'1': None, '2': None, '3': None}

In [15]:
# fill the state with defaults
for i in state:
    state[i] = {'description': '', 
                 'current_alpha': 1,
                 'current_beta': 1,
                 'success': [0],
                 'trials': [0],
                 'probability': [],
                 'is_active': 1,
                 'params_weight': None,
                 'is_filled': False
                 }
state

{'1': {'description': '',
  'current_alpha': 1,
  'current_beta': 1,
  'success': [0],
  'trials': [0],
  'probability': [],
  'is_active': 1,
  'params_weight': None,
  'is_filled': False},
 '2': {'description': '',
  'current_alpha': 1,
  'current_beta': 1,
  'success': [0],
  'trials': [0],
  'probability': [],
  'is_active': 1,
  'params_weight': None,
  'is_filled': False},
 '3': {'description': '',
  'current_alpha': 1,
  'current_beta': 1,
  'success': [0],
  'trials': [0],
  'probability': [],
  'is_active': 1,
  'params_weight': None,
  'is_filled': False}}

In [16]:
# init bandit
m = MAB(state=state)

In [17]:
# add description per arm
_ = m.strategy_weights_description(arms=['1', '2', '3'],
                                    weights_dicts=[None, None, None],
                                    descs=['str1', 'str2', 'ml optimization'])
m.arms_dict_params

{'1': {'description': 'str1',
  'current_alpha': 1,
  'current_beta': 1,
  'success': [0],
  'trials': [0],
  'probability': [],
  'is_active': 1,
  'params_weight': None,
  'is_filled': True},
 '2': {'description': 'str2',
  'current_alpha': 1,
  'current_beta': 1,
  'success': [0],
  'trials': [0],
  'probability': [],
  'is_active': 1,
  'params_weight': None,
  'is_filled': True},
 '3': {'description': 'ml optimization',
  'current_alpha': 1,
  'current_beta': 1,
  'success': [0],
  'trials': [0],
  'probability': [],
  'is_active': 1,
  'params_weight': None,
  'is_filled': True}}

In [18]:
# get the number of turned on arms (is_active=1)
m.number_active_strategies

3

In [19]:
# add new arm
_ = m.add_arms(arm_name=None)

Current number of active arms is 3
Now the number of active arms is 4


In [20]:
# turn off new arm (param is_active=0 for new arm)
_ = m.remove_arm('4')

Current number of active arms is 4
Now the number of active arms is 3


In [21]:
# sample probability for each arm from Beta distribution with alpha=beta=1
m.arms_probabilities()

({'1': {'description': 'str1',
   'current_alpha': 1,
   'current_beta': 1,
   'success': [0],
   'trials': [0],
   'probability': [0.16111564467040976],
   'is_active': 1,
   'params_weight': None,
   'is_filled': True},
  '2': {'description': 'str2',
   'current_alpha': 1,
   'current_beta': 1,
   'success': [0],
   'trials': [0],
   'probability': [0.9321228476598821],
   'is_active': 1,
   'params_weight': None,
   'is_filled': True},
  '3': {'description': 'ml optimization',
   'current_alpha': 1,
   'current_beta': 1,
   'success': [0],
   'trials': [0],
   'probability': [0.9814650164021689],
   'is_active': 1,
   'params_weight': None,
   'is_filled': True},
  '4': {'description': '',
   'current_alpha': 1,
   'current_beta': 1,
   'success': [0],
   'trials': [0],
   'probability': [],
   'is_active': 0,
   'params_weight': None,
   'is_filled': False}},
 {'1': 0.16111564467040976, '2': 0.9321228476598821, '3': 0.9814650164021689})

In [22]:
# get rewards for arms
m.results_from_db(None)

{'1': {'success': 29, 'trials': 121},
 '2': {'success': 14, 'trials': 111},
 '3': {'success': 63, 'trials': 194}}

In [23]:
# get reward, append it to the state and sample probabilities from updated distribution (new alpha and beta)
m.update_distr_and_return_proba(None)

({'1': {'description': 'str1',
   'current_alpha': 59,
   'current_beta': 134,
   'success': [0, 58],
   'trials': [0, 191],
   'probability': [0.16111564467040976, 0.2840127350473365],
   'is_active': 1,
   'params_weight': None,
   'is_filled': True},
  '2': {'description': 'str2',
   'current_alpha': 46,
   'current_beta': 95,
   'success': [0, 45],
   'trials': [0, 139],
   'probability': [0.9321228476598821, 0.279950548090563],
   'is_active': 1,
   'params_weight': None,
   'is_filled': True},
  '3': {'description': 'ml optimization',
   'current_alpha': 47,
   'current_beta': 93,
   'success': [0, 46],
   'trials': [0, 138],
   'probability': [0.9814650164021689, 0.33913082090341573],
   'is_active': 1,
   'params_weight': None,
   'is_filled': True},
  '4': {'description': '',
   'current_alpha': 1,
   'current_beta': 1,
   'success': [0],
   'trials': [0],
   'probability': [],
   'is_active': 0,
   'params_weight': None,
   'is_filled': False}},
 {'1': 0.2840127350473365, '2'

In [24]:
# turn on arm (set is_active to 1)
_ = m.turn_on_arm('4')

Current number of active arms is 3
Now the number of active arms is 4
