In [3]:
import numpy as np
import pandas as pd
import time
from sim_lib import simulation

pd.options.mode.chained_assignment = None

In [30]:
# seed for homework
seed = 18475

In [31]:
class UCB1:
    def __init__(self, explor_mult : float, impression_bias : float):
        self.t = 1
        self.explor_mult = explor_mult
        self.impression_bias = impression_bias
    
    def __call__(self, history):
        n_i = (history['impressions'] + self.impression_bias)
        explonation = history['clicks'] / n_i
        exploration = np.sqrt(2*np.log(self.t)/n_i)
        n = np.argmax(explonation+self.explor_mult*exploration)
        self.t += 1
        return history.index[n]

In [32]:
def test_ucb(explor_mult, impr_bias):
    print("----------------------------------------")
    print(f"for mult = {explor_mult}; bias={impr_bias}")
    np.random.seed(seed)
    start = time.time()
    output = simulation(UCB1(explor_mult=explor_mult, impression_bias=impr_bias), n=200000, seed=seed)
    end = time.time()
    print(end - start)
    print(f"{output['regret']}| {output['regret']/output['rounds']} | {output['total_banners']}")
    print("----------------------------------------")

Протестируем различные наборы параметров(множитель exploration, доп слагаемое в знаменателе n_i) с достаточно большим шагом, чтобы определить оптимальный диапазон для дальнейшей, более точечной проверки

In [34]:
for explor_mult in [0.1, 1, 10]:
    for impr_bias in [0.1, 1, 10]:
        test_ucb(explor_mult, impr_bias)
        

----------------------------------------
for mult = 0.1; bias=0.1
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
569.7854039669037
256.7479590979893| 0.0012837397954899464 | 184
----------------------------------------
----------------------------------------
for mult = 0.1; bias=

Видно, что оптимальные значения получаем при множителе = 0.1. Для него оптимальное слагаемое для знаменателя -- 0.1. Попробуем зафиксировать bias = 0.1 и улучшить баланс с помощью множителя

In [35]:
for explor_mult in [0.05, 0.15]:
    test_ucb(explor_mult, 0.1)

----------------------------------------
for mult = 0.05; bias=0.1
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
543.4070310592651
167.47235173965575| 0.0008373617586982788 | 184
----------------------------------------
----------------------------------------
for mult = 0.15; bi

0.05 улучшил значение, проверим еще величины между 0.05 и 0.1

In [39]:
for explor_mult in [0.06, 0.08]:
    test_ucb(explor_mult, 0.1)
        

----------------------------------------
for mult = 0.06; bias=0.1
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
612.0384089946747
132.01002053307033| 0.0006600501026653516 | 184
----------------------------------------
----------------------------------------
for mult = 0.08; bi

Получили еще лучшие значения. Можно попробовать улучшить еще, однако регрет и так сильно уменьшился.

Исходя из всех результатов наилучшим будет ucb, где в знаменатель n_i добавлена 0.1, а множитель у корня = 0.06

In [40]:
np.random.seed(seed)
output = simulation(UCB1(explor_mult=0.06, impression_bias=0.1), n=200000, seed=seed)
print(f"{output['regret']}| {output['regret']/output['rounds']} | {output['total_banners']}")
output['history']

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated
132.01002053307033| 0.0006600501026653516 | 184


Unnamed: 0,impressions,clicks,lifetime,p
153,3.0,0.0,18003.025431,0.220134
162,3.0,0.0,1537.166719,0.11378
172,21203.0,4624.0,19648.592394,0.219968
173,2.0,0.0,12771.47499,0.122694
180,2.0,0.0,4655.819793,0.020061
182,2.0,0.0,889.624649,0.004621
183,2.0,0.0,15187.163761,0.073886
