In [1]:
import numpy as np
import pandas as pd
import time

from functools import partial
from scipy.stats import randint, uniform
from sim_lib import simulation
from scipy.stats import beta

pd.options.mode.chained_assignment = None

In [3]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]


In [7]:
# seed for homework
np.random.seed(seed=384758917)

start = time.time()
output = simulation(t_sampler, n=200000)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


2475.5922060012817

In [8]:
# baseline regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(14125.238382271236, 0.07062619191135618, 207)

In [9]:
output['history']

Unnamed: 0,impressions,clicks,lifetime,p
186,2294.0,130.0,29326.096778,0.055519
189,2242.0,17.0,3391.914442,0.007924
195,1849.0,43.0,11664.13632,0.020399
204,654.0,14.0,26138.49166,0.028943
205,474.0,32.0,46021.78926,0.057974
206,223.0,8.0,6280.605263,0.036934


In [6]:
def bandit_decorator(func):
#для первого вызова
    curr_ind = None    
    curr_click = None    
    old_score = None
    def inner(new_score):
        curr_ind = None 
        curr_click = None   
        old_score = None 
        if old_score is None:    
            new_score[["alpha", "betta"]] = 1
            old_score, curr_ind, curr_click = func(new_score)
            return curr_ind
        else:
            new_score = pd.merge(old_score[old_score.columns.difference(new_score.columns) ], new_score, left_index=True, right_index=True, how="right")
            new_score[["alpha", "betta"]].fillna(1, inplace=True)
            
            if curr_ind in new_score.index:
                if new_score.loc[curr_ind, "clicks"] < curr_click: # провал
                                new_score.loc[curr_ind, "betta"] += 1  #повышаем значение бетта, понижаем шанс на получение хорошего скора
                else: # успех
                    new_score.loc[curr_ind, "alpha"] += 2 #повышаем значение альфа, повышаем шанс на получение хорошего скора  
            
            old_score, curr_ind, curr_click = func(new_score)
            return curr_ind
    return inner


@bandit_decorator
def t_sampler(new_score):
 #получаем значение параметра из распределения бернули
    new_score["score"] = new_score[["alpha", "betta"]].apply(lambda parameters: beta.rvs(parameters[0], parameters[1]), axis=1)
    curr_ind = int(new_score[["score"]].idxmax())
    curr_click = new_score.loc[curr_ind, "clicks"]
    return new_score, curr_ind, curr_click