# Online Advertisment Problem

A company intends to advertise a product online, it wants to identify the banner that produces the most click-view ratio. The reward hence is distributed in a binary manner (click or no click).

Create a simulation environment to simulate the smae for a bunch of 5 ads

In [None]:
# Import
import numpy as np
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
from matplotlib import pyplot as plt

In [None]:
# Creating a Bandit class
class BernoullisBandit(object):
  def __init__(self, p):
    self.p = p

  def display_ad(self):
    reward = np.random.binomial(n=1, p=self.p)
    return reward

# Notations

Action (a) => The ad chosen for display

Rewards (R(i)) => The reward at the i th time


* 1 for a click
* 0 for no click


Average Reward (Qn) => Prior to th n th selection
*   Qn = (R1 + R2 + ... + Rn-1)/(n-1)
*   Qn+1 = Qn + (1/n)(Rn - Qn)

Alpha => It is a number between 0 and 1. For stationary problems alpha is 1/N(a), where N(a) is the number of times action a has been taken, previously denoted by n





In [None]:
# Creating variables to keep track of rewards
n_prods = 90000 # Number of exploitive iterations

# Creating Advert instances
adA = BernoullisBandit(0.004)
adB = BernoullisBandit(0.016)
adC = BernoullisBandit(0.02)
adD = BernoullisBandit(0.028)
adE = BernoullisBandit(0.031)
ads = [adA, adB, adC, adD, adE]
ad_name = ['adA','adB', 'adC', 'adD', 'adE']

In [None]:
# Create a class to simulate Epsilon Greedy
class SimulateEpsilonGreedy(object):
  def __init__(self, eps:float, n_prods:int, ads:list, ad_name:list):
    self.eps = eps # Number of Exploratory Iterations
    self.n_prods = n_prods # Number of Exploitive iterations
    self.ads = ads # List of all ads
    self.ad_name = ad_name # List of the name of all ads
    self.n_ads = len(ads) # Nmber of ads
    self.Q = np.zeros(self.n_ads) # Q, Action Values
    self.N = np.zeros(self.n_ads) # N, Total impressions
    self.total_rewards = 0
    self.avg_rewards = []
    self.total_runs = 0
    self.best_ad_index = 0
    self.best_ad_name = self.ad_name[self.best_ad_index]

  def get_best_ad(self) -> tuple:
    return (self.best_ad_index, self.best_ad_name, self.ads)

  def get_stats(self) -> tuple:
    return (self.total_runs, self.avg_rewards, self.total_rewards, self.Q, self.N)

  def simulate(self) -> tuple:
    ad_chosen = np.random.randint(self.n_ads)
    for i in range(self.n_prods):
      # Calculating reward for selected ad
      R = self.ads[ad_chosen].display_ad()

      # Updating the N and Q
      self.N[ad_chosen] += 1
      self.Q[ad_chosen] += (1/self.N[ad_chosen]) * (R - self.Q[ad_chosen])

      # Updating rewards and average rewards
      self.total_runs += 1
      self.total_rewards += R
      avg_reward_so_far = self.total_rewards / self.total_runs
      self.avg_rewards.append(avg_reward_so_far)

      # Selecting the next ad
      if np.random.uniform() <= self.eps:
        ad_chosen = np.random.randint(self.n_ads)
      else:
        ad_chosen = np.argmax(self.Q)

    self.best_ad_index = np.argmax(self.Q)
    self.best_ad_name = self.ad_name[self.best_ad_index]
    return (self.best_ad_index, self.best_ad_name, self.total_rewards, self.avg_rewards, self.total_runs)

  def plot_avg_over_iterations(self) -> go.Figure():
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.arange(1,self.total_runs+1), y=self.avg_rewards, mode='lines', name='lines', line = dict(color='firebrick')))
    fig.update_layout(title='Average Reward Value with Iterations', xaxis_title='Iterations', yaxis_title='Average Rewards', showlegend=False)
    return fig

  def plot_ad_view_freq(self) -> go.Figure():
    fig = go.Figure()
    fig.add_trace(go.Bar(x=self.ad_name, y=self.N, marker_color='firebrick'))
    fig.update_layout(title="Ad Frequency during Exploration",  xaxis_title='Ads', yaxis_title='View Frequency', showlegend=False)
    return fig

  def plot_ad_action_value(self) -> go.Figure():
    fig = go.Figure()
    fig.add_trace(go.Bar(x=self.ad_name, y=self.Q, marker_color='firebrick'))
    fig.update_layout(title="Action Value for Ads Post Exploration",  xaxis_title='Ads', yaxis_title='Action Value', showlegend=False)
    return fig


# Create a simulation instance for eps 0.01

In [None]:
simulation001 = SimulateEpsilonGreedy(0.01, n_prods, ads, ad_name)
best_ad_index001, best_ad_name001, total_r001, avg_r001, iterations001 = simulation001.simulate()
print(f"The best performing ad is {best_ad_name001}")
print(f"The total rewards for {iterations001} iterations is {total_r001}. With an average reward of {avg_r001}.")

The best performing ad is adE
The total rewards for 90000 iterations is 2799. With an average reward of [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.019230769230769232, 0.018867924528301886, 0.018518518518518517, 0.01818181818181818, 0.017857142857142856, 0.017543859649122806, 0.017241379310344827, 0.01694915254237288, 0.016666666666666666, 0.01639344262295082, 0.016129032258064516, 0.015873015873015872, 0.015625, 0.015384615384615385, 0.015151515151515152, 0.014925373134328358, 0.014705882352941176, 0.014492753623188406, 0.014285714285714285, 0.014084507042253521, 0.013888888888888888, 0.0136986301369863, 0.013513513513513514, 0.013333333333333334, 0.013157894736842105, 0.012987012987012988, 0.01282051282051282, 0.012658227848101266, 0.0125, 0.012345679012345678, 0.0121951219

In [None]:
simulation001.plot_ad_view_freq()

In [None]:
simulation001.plot_ad_action_value()

# Create a simulation instance for eps 0.05

In [None]:
simulation005 = SimulateEpsilonGreedy(0.05, n_prods, ads, ad_name)
best_ad_index005, best_ad_name005, total_r005, avg_r005, iterations005 = simulation005.simulate()
print(f"The best performing ad is {best_ad_name005}")
print(f"The total rewards for {iterations005} iterations is {total_r005}. With an average reward of {avg_r005}.")

The best performing ad is adE
The total rewards for 90000 iterations is 2741. With an average reward of [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 

In [None]:
simulation005.plot_ad_view_freq()

In [None]:
simulation005.plot_ad_action_value()

# Create a simulation instance for eps 0.1

In [None]:
simulation01 = SimulateEpsilonGreedy(0.1, n_prods, ads, ad_name)
best_ad_index01, best_ad_name01, total_r01, avg_r01, iterations01 = simulation01.simulate()
print(f"The best performing ad is {best_ad_name01}")
print(f"The total rewards for {iterations01} iterations is {total_r01}. With an average reward of {avg_r01}.")

The best performing ad is adE
The total rewards for 90000 iterations is 2570. With an average reward of [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.047619047619047616, 0.045454545454545456, 0.043478260869565216, 0.041666666666666664, 0.04, 0.038461538461538464, 0.037037037037037035, 0.03571428571428571, 0.034482758620689655, 0.03333333333333333, 0.03225806451612903, 0.03125, 0.030303030303030304, 0.029411764705882353, 0.02857142857142857, 0.027777777777777776, 0.02702702702702703, 0.02631578947368421, 0.02564102564102564, 0.025, 0.024390243902439025, 0.023809523809523808, 0.023255813953488372, 0.022727272727272728, 0.022222222222222223, 0.021739130434782608, 0.02127659574468085, 0.020833333333333332, 0.02040816326530612, 0.02, 0.0196078431372549, 0.019230769230769232, 0.018867924528301886, 0.018518518518518517, 0.01818181818181818, 0.017857142857142856, 0.017543859649122806, 0.017241379310344827, 0.01694915254237288, 0.01666666

In [None]:
simulation01.plot_ad_view_freq()

In [None]:
simulation01.plot_ad_action_value()

# Create a simulation instance for eps 0.2

In [None]:
simulation02 = SimulateEpsilonGreedy(0.2, n_prods, ads, ad_name)
best_ad_index02, best_ad_name02, total_r02, avg_r02, iterations02 = simulation02.simulate()
print(f"The best performing ad is {best_ad_name02}")
print(f"The total rewards for {iterations02} iterations is {total_r02}. With an average reward of {avg_r02}.")

The best performing ad is adE
The total rewards for 90000 iterations is 2528. With an average reward of [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06666666666666667, 0.0625, 0.058823529411764705, 0.05555555555555555, 0.05263157894736842, 0.05, 0.047619047619047616, 0.045454545454545456, 0.043478260869565216, 0.041666666666666664, 0.04, 0.038461538461538464, 0.037037037037037035, 0.03571428571428571, 0.034482758620689655, 0.03333333333333333, 0.03225806451612903, 0.03125, 0.030303030303030304, 0.029411764705882353, 0.02857142857142857, 0.027777777777777776, 0.02702702702702703, 0.02631578947368421, 0.02564102564102564, 0.025, 0.024390243902439025, 0.023809523809523808, 0.023255813953488372, 0.022727272727272728, 0.022222222222222223, 0.021739130434782608, 0.02127659574468085, 0.020833333333333332, 0.02040816326530612, 0.02, 0.0196078431372549, 0.019230769230769232, 0.018867924528301886, 0.018518518518518517, 0.01818181818181818, 0.017857142857142856, 0.0175

In [None]:
simulation02.plot_ad_view_freq()

In [None]:
simulation02.plot_ad_action_value()

# Visualise the average rewards over iterations for various epsilons

In [None]:
x = np.swapaxes(np.array([np.arange(1,iterations02+1), avg_r001, avg_r005, avg_r01, avg_r02]), 0, 1)
df = pd.DataFrame(x, columns=["Iteration", "Average Rewards for E 0.01", "Average Rewards for E 0.05", "Average Rewards for E 0.1", "Average Rewards for E 0.2"])
fig = go.Figure()
fig.add_trace(go.Scatter(x=df["Iteration"], y=df['Average Rewards for E 0.01'], mode='lines', name='epsilon = 0.01', line = dict(color='orange')))
fig.add_trace(go.Scatter(x=df["Iteration"], y=df['Average Rewards for E 0.05'], mode='lines', name='epsilon = 0.05', line = dict(color='royalblue')))
fig.add_trace(go.Scatter(x=df["Iteration"], y=df['Average Rewards for E 0.1'], mode='lines', name='epsilon = 0.1', line = dict(color='crimson')))
fig.add_trace(go.Scatter(x=df["Iteration"], y=df['Average Rewards for E 0.2'], mode='lines', name='epsilon = 0.2', line = dict(color='green')))
fig.update_layout(title='Average Reward Value with Iterations', xaxis_title='Iterations', yaxis_title='Average Rewards', showlegend=True)
fig.show()