# Online Advertisment Problem

A company intends to advertise a product online, it wants to identify the banner that produces the most click-view ratio. The reward hence is distributed in a binary manner (click or no click).

Create a simulation environment to simulate the smae for a bunch of 5 ads

In [139]:
# Import
import numpy as np
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
from matplotlib import pyplot as plt

In [140]:
# Creating a Bandit class
class BernoullisBandit(object):
  def __init__(self, p):
    self.p = p

  def display_ad(self):
    reward = np.random.binomial(n=1, p=self.p)
    return reward

In [141]:
x = np.arange(1, 10)
print(x)

[1 2 3 4 5 6 7 8 9]


# Notations

Action (a) => The ad chosen for display

Rewards (R(i)) => The reward at the i th time


* 1 for a click
* 0 for no click


Average Reward (Qn) => Prior to th n th selection
*   Qn = (R1 + R2 + ... + Rn-1)/(n-1)
*   Qn+1 = Qn + (1/n)(Rn - Qn)

Alpha => It is a number between 0 and 1. For stationary problems alpha is 1/N(a), where N(a) is the number of times action a has been taken, previously denoted by n





In [142]:
# Creating variables to keep track of rewards
n_test = 10000 # Number of exploratory iterations
n_prods = 90000 # Number of exploitive iterations

# Creating Advert instances
adA = BernoullisBandit(0.004)
adB = BernoullisBandit(0.016)
adC = BernoullisBandit(0.02)
adD = BernoullisBandit(0.028)
adE = BernoullisBandit(0.031)
ads = [adA, adB, adC, adD, adE]
ad_name = ['adA','adB', 'adC', 'adD', 'adE']

In [143]:
# Create a class to simulate AB
class SimulateABNTest(object):
  def __init__(self, n_test:int, n_prods:int, ads:list, ad_name:list):
    self.n_test = n_test # Number of Exploratory Iterations
    self.n_prods = n_prods # Number of Exploitive iterations
    self.ads = ads # List of all ads
    self.ad_name = ad_name # List of the name of all ads
    self.n_ads = len(ads) # Nmber of ads
    self.Q = np.zeros(self.n_ads) # Q, Action Values
    self.N = np.zeros(self.n_ads) # N, Total impressions
    self.total_rewards = 0
    self.avg_rewards = []
    self.total_runs = 0
    self.best_ad_index = 0
    self.best_ad_name = self.ad_name[self.best_ad_index]

  def get_best_ad(self) -> tuple:
    return (self.best_ad_index, self.best_ad_name, self.ads)

  def get_stats(self) -> tuple:
    return (self.total_runs, self.avg_rewards, self.total_rewards, self.Q, self.N)

  def explore(self) -> tuple:
    for i in range(self.n_test):
      ad_chosen = np.random.randint(self.n_ads) # Choose an ad
      R = self.ads[ad_chosen].display_ad() # Display to get a reward
      self.total_rewards += R # Cumulatiely add rewards
      self.N[ad_chosen] += 1 # Update the count of the ad in the display count list
      self.Q[ad_chosen] += (1/self.N[ad_chosen]) * (R - self.Q[ad_chosen]) # Update the Q value of the ad
      self.total_runs +=1 # Evaluate total ad displays
      avg_reward_so_far = self.total_rewards/(self.total_runs) # Calculate average rewards till now
      self.avg_rewards.append(avg_reward_so_far) # Update Progress of avg rewards

    # Update the best ad viewed
    self.best_ad_index = np.argmax(self.Q)
    self.best_ad_name = self.ad_name[self.best_ad_index]
    return (self.best_ad_index, self.best_ad_name)

  def exploit(self) -> tuple:
    for i in range(self.n_prods):
      R = self.ads[self.best_ad_index].display_ad() # Calculate score of the best perfroming ad
      self.total_rewards += R # Add rewards to the total reward observed
      self.total_runs +=1

      # Calculate and Store average
      avg_reward_so_far = self.total_rewards/(self.total_runs)
      self.avg_rewards.append(avg_reward_so_far)
    return (self.total_rewards, self.avg_rewards[-1], self.total_runs)

  def plot_avg_over_iterations(self) -> go.Figure():
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.arange(1,self.total_runs+1), y=self.avg_rewards, mode='lines', name='lines', line = dict(color='firebrick')))
    fig.update_layout(title='Average Reward Value with Iterations', xaxis_title='Iterations', yaxis_title='Average Rewards', showlegend=False)
    return fig

  def plot_ad_view_freq(self) -> go.Figure():
    fig = go.Figure()

    fig.add_trace(go.Bar(x=self.ad_name, y=self.N, marker_color='firebrick'))
    fig.update_layout(title="Ad Frequency during Exploration",  xaxis_title='Ads', yaxis_title='View Frequency', showlegend=False)
    return fig

  def plot_ad_action_value(self) -> go.Figure():
    fig = go.Figure()
    fig.add_trace(go.Bar(x=self.ad_name, y=self.Q, marker_color='firebrick'))
    fig.update_layout(title="Action Value for Ads Post Exploration",  xaxis_title='Ads', yaxis_title='Action Value', showlegend=False)
    return fig


In [144]:
# Create a simulation instance
simulation1 = SimulateABNTest(n_test, n_prods, ads, ad_name)
best_ad_index, best_ad_name = simulation1.explore()
print(f"The best performing ad is {best_ad_name}")
total_r, avg_r, iterations = simulation1.exploit()
print(f"The total rewards for {iterations} iterations is {total_r}. With an average reward of {avg_r}.")

The best performing ad is adD
The total rewards for 100000 iterations is 2695. With an average reward of 0.02695.


In [145]:
# Get model statistics
total_iterations, avg_rewards, total_rewards, Q, N = simulation1.get_stats()

In [146]:
# Evaluate Average Reward Frequency with Frequency
x = simulation1.plot_avg_over_iterations()
x.show()

In [147]:
# Evaluate Ad Views Frequency during exploration
x = simulation1.plot_ad_view_freq()
x.show()

In [148]:
# Evaluate Ad Views Frequency during exploration
x = simulation1.plot_ad_action_value()
x.show()