# A/B Testing from Scratch: Multi-armed Bandits

In [1]:
import numpy as np
import pandas as pd

#widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

#plots
import matplotlib.pyplot as plt
from plotnine import *

#stats
import scipy as sp
import statsmodels as sm

In [217]:
class Arm:
    def __init__(self,true_p):
        self.true_p = true_p
        self.reset()
    def reset(self):
        self.impressions = 0
        self.actions = 0
    def get_state(self):
        return self.impressions,self.actions
    def get_rate(self):
        return self.actions / self.impressions
    def pull(self):
        self.impressions+=1
        res = 1 if np.random.random() < self.true_p else 0
        self.actions+=res
        return res
a = Arm(0.1)
for i in range(100): a.pull()
a.get_state()

(100, 16)

In [218]:
class MusketeerEnv:
    def __init__(self, true_ps, avg_impressions):
        self.true_ps = true_ps
        self.avg_impressions = avg_impressions
        self.nb_arms = len(true_ps)
        self.reset()
    def reset(self):
        self.t = -1
        self.ds=[]
        self.arms = [Arm(p) for p in self.true_ps]
        return self.get_state()
    def get_state(self):
        return [self.arms[i].get_state() for i in range(self.nb_arms)]
    def get_impressions(self):
        return int(np.random.triangular(self.avg_impressions/2,
                                    self.avg_impressions,
                                    self.avg_impressions*1.5))
    def step(self, ps):
        self.t+=1
        impressions = self.get_impressions()
        for i in np.random.choice(a=self.nb_arms,size=impressions,p=ps):
            self.arms[i].pull()
        self.record()
        return self.get_state()
    def record(self):
        d = {'t':self.t,'max_rate':0,'opt_impressions':0}
        for i in range(self.nb_arms):
            d[f'impressions_{i}'],d[f'actions_{i}'] = self.arms[i].get_state()
            d[f'rate_{i}'] = d[f'actions_{i}'] / d[f'impressions_{i}']
            if d[f'rate_{i}'] > d['max_rate']: 
                d['max_rate'] = d[f'rate_{i}']
                d['opt_impressions'] = d[f'impressions_{i}']
        d['total_impressions'] = sum([self.arms[i].impressions for i in range(self.nb_arms)])
        d['opt_impressions_rate'] = d['opt_impressions'] / d['total_impressions']
        d['total_actions'] = sum([self.arms[i].actions for i in range(self.nb_arms)])
        d['total_rate'] = d['total_actions'] / d['total_impressions']
        d['regret_rate'] = d['max_rate'] - d['total_rate']
        d['regret'] = d['regret_rate'] * d['total_impressions']
        self.ds.append(d)
    def show_df(self):
        df = pd.DataFrame(self.ds)
        cols = ['t'] + [f'rate_{i}' for i in range(self.nb_arms)]+ \
               [f'impressions_{i}' for i in range(self.nb_arms)]+ \
               [f'actions_{i}' for i in range(self.nb_arms)]+ \
               ['total_impressions','total_actions','total_rate']+ \
               ['regret_rate','regret']+ \
               ['opt_impressions','opt_impressions_rate']
        df = df[cols]
        return df
env = MusketeerEnv(true_ps = [0.1,0.12,0.13], avg_impressions=500)

In [219]:
for i in range(10):
    print(env.step([0.7,0.2,0.1]))

[(430, 40), (149, 15), (76, 10)]
[(748, 70), (254, 28), (119, 14)]
[(1141, 117), (359, 43), (164, 19)]
[(1605, 162), (504, 62), (226, 25)]
[(1890, 186), (583, 73), (258, 32)]
[(2300, 243), (706, 85), (310, 38)]
[(2693, 272), (820, 102), (363, 47)]
[(3046, 305), (919, 116), (426, 54)]
[(3365, 335), (1012, 127), (472, 65)]
[(3714, 373), (1123, 145), (529, 77)]


In [225]:
env.show_df()

Unnamed: 0,t,rate_0,rate_1,rate_2,impressions_0,impressions_1,impressions_2,actions_0,actions_1,actions_2,total_impressions,total_actions,total_rate,regret_rate,regret,opt_impressions,opt_impressions_rate
0,0,0.169811,0.119048,0.131661,53,42,319,9,5,42,414,56,0.135266,0.034546,14.301887,53,0.128019
1,1,0.112150,0.117647,0.112994,107,102,708,12,12,80,917,104,0.113413,0.004234,3.882353,102,0.111232
2,2,0.125000,0.130137,0.111992,136,146,1009,17,19,113,1291,149,0.115414,0.014723,19.006849,146,0.113091
3,3,0.112613,0.138889,0.115385,222,216,1586,25,30,183,2024,238,0.117589,0.021300,43.111111,216,0.106719
4,4,0.114695,0.133829,0.117021,279,269,1974,32,36,231,2522,299,0.118557,0.015272,38.516729,269,0.106661
5,5,0.107463,0.148867,0.120682,335,309,2345,36,46,283,2989,365,0.122114,0.026753,79.964401,309,0.103379
6,6,0.102564,0.146552,0.125716,390,348,2792,40,51,351,3530,442,0.125212,0.021339,75.327586,348,0.098584
7,7,0.105618,0.145038,0.124304,445,393,3234,47,57,402,4072,506,0.124263,0.020775,84.595420,393,0.096513
8,8,0.102510,0.144186,0.123826,478,430,3513,49,62,435,4421,546,0.123501,0.020685,91.446512,430,0.097263
9,9,0.097928,0.141649,0.123869,531,473,3867,52,67,479,4871,598,0.122767,0.018882,91.972516,473,0.097105


In [221]:
class Agent:
    def __init__(self, env):
        self.env = env
    def equal_weights(self):
        res = np.array([1/self.env.nb_arms for i in range(self.env.nb_arms)])
        return res
    def randomize(self):
        res = np.random.rand(self.env.nb_arms)
        res /= res.sum()
        return res
    def greedy(self):
        res = np.array([0 for i in range(self.env.nb_arms)])
        best_idx = np.argmax([self.env.arms[i].get_rate() for i in range(self.env.nb_arms)])
        res[best_idx] = 1
        return res
    def eps_greedy(self, eps = 0.3):
        res = np.array([eps/self.env.nb_arms for i in range(self.env.nb_arms)])
        best_idx = np.argmax([self.env.arms[i].get_rate() for i in range(self.env.nb_arms)])
        res[best_idx] += 1-eps
        return res
    def softmax(self,tau=0.05):
        sum_exp = sum([np.exp(env.arms[i].get_rate()/tau) for i in range(self.env.nb_arms)])
        res = np.array([np.exp(env.arms[i].get_rate()/tau) / sum_exp for i in range(self.env.nb_arms)])
        return res
    def ucb(self):
        ucbs = [self.env.arms[i].get_rate() + np.sqrt()]
    def thompson(self):
        pass

a = Agent(env)
a.equal_weights(), a.randomize(), a.greedy(), a.eps_greedy(), a.softmax()

(array([0.33333333, 0.33333333, 0.33333333]),
 array([0.45814235, 0.50189778, 0.03995987]),
 array([0, 0, 1]),
 array([0.1, 0.1, 0.8]),
 array([0.19081179, 0.33867473, 0.47051348]))

In [222]:
[env.arms[i].get_rate() for i in range(env.nb_arms)]

[0.10043080236941303, 0.12911843276936777, 0.14555765595463138]

In [223]:
env = MusketeerEnv(true_ps = [0.1,0.12,0.13], avg_impressions=500)
for i in range(100):
    action = a.eps_greedy()
    env.step(action)

In [224]:
np.random.rand(10)

array([0.22921063, 0.25874669, 0.49074226, 0.41564119, 0.86889909,
       0.31964825, 0.83444461, 0.75162391, 0.77964637, 0.84843834])