# Case study: Contextual online advertising with synthetic user data

User click behavior follows logistic function
$$ p_a(x) = \frac{1}{1 + e^{-f_a(x)}} $$
$$ f_a(x = [device, location, age]) = \beta^a_0 + \beta^a_1 * device + \beta^a_2 * location + \beta^a_3 * age $$

## Imports

In [1]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy import stats
import plotly.offline
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import cufflinks as cf

In [2]:
cf.go_offline()
cf.set_config_file(world_readable=True, theme='white')

In [3]:
class UserGenerator(object):
    def __init__(self) -> None:
        self.beta = {}
        self.beta['A'] = np.array([-4, -0.1, -3, 0.1])
        self.beta['B'] = np.array([-6, -0.1, 1, 0.1])
        self.beta['C'] = np.array([2, 0.1, 1, -0.1])
        self.beta['D'] = np.array([4, 0.1, -3, -0.2])
        self.beta['E'] = np.array([0.1, 0, 0.5, -0.01])
        self.context = None

    def logistic(self, beta, context):
        f = np.dot(beta, context)
        p = 1 / (1 + np.exp(-f))
        return p

    def display_ad(self, ad):
        if ad in ['A', 'B', 'C', 'D', 'E']:
            p = self.logistic(self.beta[ad], self.context)
            reward = np.random.binomial(n=1, p=p)
            return reward
        else:
            raise Exception('Unknown ad!')

    def generate_user_with_context(self):
        # 0: international, 1: U.S
        location = np.random.binomial(n=1, p=0.6)
        # 0: desktop, 1: mobile
        device = np.random.binomial(n=1, p=0.8)
        # User age between 10 and 70, with mean age 34
        age = 10 + int(np.random.beta(2, 3) * 60)
        self.context = [1, location, device, age]
        return self.context


In [4]:
ug = UserGenerator()

### Function approximation with regularized logistic regression

### Implementing regularized logistic regression


In [5]:
from scipy.optimize import minimize

In [6]:
class RegularizedLR(object):
    def __init__(self, name, alpha, rlambda, n_dim):
        self.name = name
        self.alpha = alpha
        self.rlambda = rlambda
        self.n_dim = n_dim
        self.m = np.zeros(n_dim)
        self.q = np.ones(n_dim) * rlambda
        self.w = self.get_sampled_weights()
    
    def get_sampled_weights(self):
        """
        Sample parameters of the logistic regression function
        """
        w = np.random.normal(self.m, self.alpha * self.q**(-1/2))
        return w

    def loss(self, w, *args):
        X, y = args
        n = len(y)
        regularizer = 0.5 * np.dot(self.q, (w - self.m)**2)
        pred_loss = sum([np.log(1 + np.exp(np.dot(w, X[j]))) - y[j] * np.dot(w, X[j]) for j in range(n) ])
        return regularizer + pred_loss

    def fit(self, X, y):
        if y:
            X = np.array(X)
            y = np.array(y)
            minimization = minimize(self.loss, self.w, args=(X, y), method="L-BFGS-B", bounds=[(-10, 10)]*3 + [(-1, 1)], options={'maxiter': 50})
            self.w = minimization.x
            self.m = self.w
            p = (1 + np.exp(-np.matmul(self.w, X.T)))**(-1)
            self.q = self.q + np.matmul(p * (1 - p), X**2)

    def calc_sigmoid(self, w, context):
        return 1 / (1 + np.exp(-np.dot(w, context)))

    def get_ucb(self, context):
        pred = self.calc_sigmoid(self.m, context)
        confidence = self.alpha * np.sqrt(np.sum(np.divide(np.array(context)**2, self.q)))
        ucb = pred + confidence
        return ucb

    def get_prediction(self, context):
        return self.calc_sigmoid(self.m, context)

    def sample_prediction(self, context):
        w = self.get_sampled_weights()
        return self.calc_sigmoid(w, context)



### Objective: Regret minimization
$$ \sum_{k=1}^{K} = p_{a^*}(x_k) - p_a(x_k) $$
$x_k$: Context for $k^{th}$ user\
$a^*$: best action (ad) to take, that gives the highest expected CTR\
$a$: expected CTR for the selected action (ad)


In [7]:
def calculate_regret(ug, context, ad_options, ad):
    action_values = {a: ug.logistic(ug.beta[a], context) for a in ad_options}
    best_action = max(action_values, key=action_values.get)
    regret = action_values[best_action] - action_values[ad]
    return regret, best_action


### Solving the online advertising problem

In [8]:
def select_ad_eps_greed(ad_models, context, eps):
    if np.random.uniform() < eps:
        return np.random.choice(list(ad_models.keys()))
    else:
        predictions = {ad: ad_models[ad].get_prediction(context) for ad in ad_models}
        max_value = max(predictions.values())
        max_keys = [key for key, value in predictions.items() if value == max_value]
        return np.random.choice(max_keys)


In [9]:
ad_options = [char for char in 'ABCDE']
exploration_data = {}
data_columns = ['context',
                'ad',
                'click',
                'best_action', 
                'regret', 
                'total_regret']
exploration_strategies = ['eps-greedy']

In [10]:
for strategy in exploration_strategies:
    print(f"--- Now using {strategy}")
    np.random.seed(0)
    # Creat LR models for each ad
    alpha, rlambda, n_dim = 0.5, 0.5, 4
    ad_models = {ad: RegularizedLR(ad, alpha, rlambda, n_dim) for ad in ad_options}
    # Initialize data structures
    X = {ad: [] for ad in ad_options}
    y = {ad: [] for ad in ad_options}
    results = []
    total_regret = 0
    # run active strategy
    for i in range(10000):
        context = ug.generate_user_with_context()
        if strategy == 'eps-greedy':
            eps = 0.1
            ad = select_ad_eps_greed(ad_models, context, eps)
        click = ug.display_ad(ad)
        X[ad].append(context)
        y[ad].append(click)
        regret, best_action = calculate_regret(ug, context, ad_options, ad)
        total_regret += regret
        results.append((context,ad,click,best_action,regret,total_regret))
        # Update models with latest batch
        if (i + 1) % 500 == 0:
            print(f"Updating the models at i: {i+1}")
            for ad in ad_options:
                ad_models[ad].fit(X[ad], y[ad])
            X = {ad: [] for ad in ad_options}
            y = {ad: [] for ad in ad_options}
        exploration_data[strategy] = {'models': ad_models, 'results': pd.DataFrame(results, columns=data_columns)}

--- Now using eps-greedy
Updating the models at i: 500
Updating the models at i: 1000
Updating the models at i: 1500
Updating the models at i: 2000
Updating the models at i: 2500
Updating the models at i: 3000
Updating the models at i: 3500
Updating the models at i: 4000
Updating the models at i: 4500
Updating the models at i: 5000
Updating the models at i: 5500
Updating the models at i: 6000
Updating the models at i: 6500
Updating the models at i: 7000
Updating the models at i: 7500
Updating the models at i: 8000
Updating the models at i: 8500
Updating the models at i: 9000
Updating the models at i: 9500
Updating the models at i: 10000


In [11]:
exploration_data[strategy]['results']['total_regret'].iplot(title=strategy, xTitle='Impressions', yTitle='Total Regret', color='black')