In [3]:
import numpy as np
import pandas as pd
import time
from functools import partial
from tqdm import tqdm

from scipy.stats import randint, uniform
from sim_lib import simulation

pd.options.mode.chained_assignment = None


In [6]:
def eps_greedy(history: pd.DataFrame, eps: float):
    if uniform.rvs() < eps:
        n = history.shape[0]
        return history.index[randint.rvs(0, n)]

    ctr = history['clicks'] / (history['impressions'] + 10)
    n = np.argmax(ctr)
    return history.index[n]

policy = partial(eps_greedy, eps=0.06)

In [7]:
# seed for homework
seed = 18475
np.random.seed(seed=seed)

start = time.time()
output = simulation(policy, n=200000, seed=seed)
end = time.time()
end - start

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


148.60962390899658

In [8]:
# baseline regret
output['regret'], output['regret']/output['rounds'],  output['total_banners']

(1540.7609683932544, 0.007703804841966272, 184)

In [9]:
output['history']

Unnamed: 0,impressions,clicks,lifetime,p
153,18970.0,4199.0,18003.025431,0.220134
162,228.0,26.0,1537.166719,0.11378
172,163.0,32.0,19648.592394,0.219968
173,170.0,18.0,12771.47499,0.122694
180,26.0,3.0,4655.819793,0.020061
182,6.0,0.0,889.624649,0.004621
183,1.0,0.0,15187.163761,0.073886


In [2]:
class UCB_policy:
    def __init__(self, c: float):
        self.t = 1
        self.c = c
    
    def __call__(self, history: pd.DataFrame):
        N_t = history['impressions'] + 1
        Q_t = history['clicks'] / N_t
        A_t = np.argmax(Q_t + self.c * np.sqrt(np.log(self.t) / N_t))
        self.t += 1
        return history.index[A_t]

In [11]:
for c in tqdm(np.arange(0.2, 2.2, 0.2)):
    start = time.time()
    output = simulation(UCB_policy(c=c), n=200000, seed=seed)
    end = time.time()
    print(f'\tc: {c}')
    print(f"\t regret: {output['regret'], output['regret']/output['rounds'],  output['total_banners']}  | time: {end - start}")       

  0%|                                                                                                                            | 0/10 [00:00<?, ?it/s]

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 10%|███████████▌                                                                                                       | 1/10 [02:52<25:55, 172.85s/it]

	c: 0.2
	 regret: (444.341685174941, 0.002221708425874705, 184)  | time: 172.85487699508667
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 20%|███████████████████████                                                                                            | 2/10 [05:46<23:05, 173.22s/it]

	c: 0.4
	 regret: (1527.7127745403288, 0.007638563872701645, 184)  | time: 173.47760200500488
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 30%|██████████████████████████████████▌                                                                                | 3/10 [08:40<20:14, 173.47s/it]

	c: 0.6000000000000001
	 regret: (3033.9557902005904, 0.015169778951002952, 184)  | time: 173.77307510375977
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 40%|██████████████████████████████████████████████                                                                     | 4/10 [11:33<17:21, 173.56s/it]

	c: 0.8
	 regret: (4925.028862704537, 0.02462514431352268, 184)  | time: 173.69226384162903
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 50%|█████████████████████████████████████████████████████████▌                                                         | 5/10 [14:27<14:27, 173.51s/it]

	c: 1.0
	 regret: (7170.813119876884, 0.03585406559938442, 184)  | time: 173.41926097869873
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 60%|█████████████████████████████████████████████████████████████████████                                              | 6/10 [17:20<11:33, 173.45s/it]

	c: 1.2
	 regret: (9258.906910162113, 0.04629453455081057, 184)  | time: 173.32935190200806
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 70%|████████████████████████████████████████████████████████████████████████████████▌                                  | 7/10 [20:13<08:40, 173.34s/it]

	c: 1.4000000000000001
	 regret: (10837.109478138087, 0.05418554739069043, 184)  | time: 173.10748505592346
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 80%|████████████████████████████████████████████████████████████████████████████████████████████                       | 8/10 [23:06<05:46, 173.30s/it]

	c: 1.6
	 regret: (12996.105736636038, 0.06498052868318019, 184)  | time: 173.21233296394348
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 9/10 [25:59<02:53, 173.23s/it]

	c: 1.8
	 regret: (14403.0821112556, 0.072015410556278, 184)  | time: 173.09453177452087
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [28:54<00:00, 173.41s/it]

	c: 2.0
	 regret: (15826.129257727003, 0.07913064628863502, 184)  | time: 174.1071228981018





Лучший параметр ```0.2```, при этом все остальные сильно хуже с точки зрения награды.Но при этом мы побили baseline:

| model | regret | regret / rounds | 
|---|---|---|
| baseline | 1540.7609683932544 | 0.007703804841966272 |
| c=0.2 | 444.341685174941 | 0.002221708425874705 | 

Чтобы затюнить в моей политике баланс exploration/exploitation подберем параметры вокруг ```0.2```

In [12]:
for c in tqdm([0.05, 0.1, 0.15, 0.25]):
    start = time.time()
    output = simulation(UCB_policy(c=c), n=200000, seed=seed)
    end = time.time()
    print(f'\tc: {c}')
    print(f"\t regret: {output['regret'], output['regret']/output['rounds'],  output['total_banners']}  | time: {end - start}")       

  0%|                                                                                                                             | 0/4 [00:00<?, ?it/s]

1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 25%|█████████████████████████████                                                                                       | 1/4 [02:52<08:38, 172.95s/it]

	c: 0.05
	 regret: (51.2872086223395, 0.0002564360431116975, 184)  | time: 172.95283794403076
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 50%|██████████████████████████████████████████████████████████                                                          | 2/4 [05:46<05:46, 173.45s/it]

	c: 0.1
	 regret: (154.70073030357108, 0.0007735036515178554, 184)  | time: 173.79476594924927
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


 75%|███████████████████████████████████████████████████████████████████████████████████████                             | 3/4 [08:40<02:53, 173.48s/it]

	c: 0.15
	 regret: (224.10542525583185, 0.0011205271262791593, 184)  | time: 173.52623414993286
1 impressions have been simulated
10001 impressions have been simulated
20001 impressions have been simulated
30001 impressions have been simulated
40001 impressions have been simulated
50001 impressions have been simulated
60001 impressions have been simulated
70001 impressions have been simulated
80001 impressions have been simulated
90001 impressions have been simulated
100001 impressions have been simulated
110001 impressions have been simulated
120001 impressions have been simulated
130001 impressions have been simulated
140001 impressions have been simulated
150001 impressions have been simulated
160001 impressions have been simulated
170001 impressions have been simulated
180001 impressions have been simulated
190001 impressions have been simulated


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [11:33<00:00, 173.29s/it]

	c: 0.25
	 regret: (577.9432202661922, 0.0028897161013309606, 184)  | time: 172.90010619163513





Оптимальная политика при ```c=0.05```