In [1]:
from datetime import datetime, timedelta
import itertools 
import numpy as np
import os
import pandas as pd
import plotly.graph_objects as go
import sys

# In Github
from bandits.multi_armed_bandits import *
from simulation_env.advertisement import Advertisement
from simulation_env.util import rand_update_ads

# Different exploration algorithms

eGreddy, UCB, Thompson sampling for handling Exploration Vs. Exploitation dilemma.

In [None]:
batch_size_i = 1
ad_A = Advertisement(0.115, "Ad_A")
ad_B = Advertisement(0.118, "Ad_B")
ad_C = Advertisement(0.103, "Ad_C")
ad_D = Advertisement(0.114, "Ad_D")
ad_E = Advertisement(0.1004, "Ad_E")
ad_F = Advertisement(0.088, "Ad_F")
ad_G = Advertisement(0.09, "Ad_G")
ads = [ad_A, ad_B, ad_C, ad_D, ad_E, ad_F, ad_G]

n_exp = 1000
# eGreedy
eps = 0.1
egreedy_alg = eGreedy(1, ads, eps)
egreedy_alg.run_test(n_exp)
egreedy_alg.calc_avg_reward()

# UCB
c = 0.1
ucb_alg = UpperConfidenceBounds(1, ads, c)
ucb_alg.run_test(n_exp)
ucb_alg.calc_avg_reward()

# TS
ts_alg = ThompsonSampling(1, ads)
ts_alg.run_test(n_exp)
ts_alg.calc_avg_reward()

fig = go.Figure()
for alg in [egreedy_alg, ucb_alg, ts_alg]:
    fig.add_trace(
        go.Scatter(
            name = alg.name,
            x = np.arange(1, n_exp + 1),
            y = alg.avg_reward
        )
    )

fig.update_layout(title="<b>Explore Exploitation strategies comparison plot</b>",
                  xaxis_title="# of Exposure",
                  yaxis_title="Average reward")

fig.show()

# Thompson Sampling

## Dynamic reward Environment

CTR changes randomly every random(n) days

<b>Things to try</b>:

- when CTR rank changes (constant change):
  - 하나가 다른 하나의 CTR 을 따라잡도록 설정해놓고 톰슨이 그것을 반영하여 노출수를 조정하는지 확인
  - 달라지는 slope에 따른 변화. Ex: 따라잡히지만 by little vs. by a lot

<br>

- when new advertisement added:

<br>

- when CTR rank changes (one-time change)

----

<b>Performance Metrics</b>:
- Average of multiple simulations

###  CTR rank changes (one-time) 

1. find optimal "n_exp", "day_i_reset". See how impressions, CTR changes as hyperparameters change.
2. Add time penality. -> more weight on more recent actions.
<ul> 
    <li> ${reward \over t}$, t = # of days after alpha reset</li>
    <li> </li>
</ul>


In [38]:
batch_size_i = 1
ad_A = Advertisement(0.115, "Ad_A")
ad_C = Advertisement(0.103, "Ad_C")
ads = [ad_A, ad_C]

n_exp = 100
ts_alg = ThompsonSampling(1, ads)

ad_A_daily_imps = []
ad_C_daily_imps = []

for day_i in range(100):
    ts_alg.run_test(n_exp)
    ad_A_daily_imps.append(ad_A.imps)
    ad_A.reset_imps()
    ad_C_daily_imps.append(ad_C.imps)
    ad_C.reset_imps()
    
    if (day_i % 50 == 0) & (day_i != 0):
        ts_alg.reset_params()
        print(f"updated CTR at day = {day_i}")
        ad_A.ctr = 0.103
        ad_C.ctr = 0.115

updated CTR at day = 50


In [39]:
df = pd.DataFrame({"ad_A":ad_A_daily_imps,
                   "ad_C":ad_C_daily_imps})

In [40]:
fig = go.Figure()
for col_nm in ["ad_A", "ad_C"]:
    fig.add_trace(
        go.Scatter(
            name = col_nm,
            x = df.index,
            y = df[col_nm]
        )
    )

fig.update_layout(title="<b>daily impressions W/ alpha reset</b>",
                  xaxis_title="# of days",
                  yaxis_title="# of impressions")

fig.show()

In [34]:
fig = go.Figure()
for col_nm in ["ad_A", "ad_C"]:
    fig.add_trace(
        go.Scatter(
            name = col_nm,
            x = df.index,
            y = df[col_nm]
        )
    )

fig.update_layout(title="<b>daily impressions</b>",
                  xaxis_title="# of days",
                  yaxis_title="# of impressions")

fig.show()

In [None]:
fig = go.Figure()
for ad in [ad_A, ad_C]:
    fig.add_trace(
        go.Scatter(
            name = ad.name,
            x = np.arange(1, len(ad.ctr_history)),
            y = ad.ctr_history
        )
    )

fig.update_layout(title="<b>CTR changes of Advertisements</b>",
                  xaxis_title="# of days",
                  yaxis_title="CTR")

fig.show()

In [None]:
ts_alg.calc_avg_reward()

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        name = ts_alg.name,
        x = np.arange(1, len(ts_alg.rewards)),
        y = ts_alg.avg_reward
    )
)

fig.update_layout(title="<b>Explore Exploitation strategies comparison plot</b>",
                  xaxis_title="# of Exposure",
                  yaxis_title="Average reward")

fig.show()