<a href="https://colab.research.google.com/github/AnshuMishra01/Contextual-Ads/blob/main/ContextualBandit(Ads).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy import stats
import plotly.offline
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import cufflinks as cf
cf.go_offline()
cf.set_config_file(world_readable=True, theme='white')

In [4]:
class UserGenerator(object):
    def __init__(self):
        self.beta = {}
        self.beta['A'] = np.array([-4, -0.1, -3, 0.1])
        self.beta['B'] = np.array([-6, -0.1, 1, 0.1])
        self.beta['C'] = np.array([2, 0.1, 1, -0.1])
        self.beta['D'] = np.array([4, 0.1, -3, -0.2])
        self.beta['E'] = np.array([-0.1, 0, 0.5, -0.01])
        self.context = None

    def logistic(self, beta, context):
        f = np.dot(beta, context)
        p = 1 / (1 + np.exp(-f))
        return p

    def display_ad(self, ad):
        if ad in ['A', 'B', 'C', 'D', 'E']:
            p = self.logistic(self.beta[ad], self.context)
            reward = np.random.binomial(n=1, p=p)
            return reward
        else:
            raise Exception('Unknown ad!')

    def generate_user_with_context(self):
        # 0: International, 1: U.S.
        location = np.random.binomial(n=1, p=0.6)
        # 0: Desktop, 1: Mobile
        device = np.random.binomial(n=1, p=0.8)
        # User age changes between 10 and 70,
        # with mean age 34
        age = 10 + int(np.random.beta(2, 3) * 60)
        # Add 1 to the concept for the intercept
        self.context = [1, device, location, age]
        return self.context

In [5]:
def get_scatter(x, y, name, showlegend):
    dashmap = {'A': 'solid',
               'B': 'dot',
               'C': 'dash',
               'D': 'dashdot',
               'E': 'longdash'}
    s = go.Scatter(x=x,
                   y=y,
                   legendgroup=name,
                   showlegend=showlegend,
                   name=name,
                   line=dict(color='blue',
                             dash=dashmap[name]))
    return s

def visualize_bandits(ug):
    ad_list = 'ABCDE'
    ages = np.linspace(10, 70)
    fig = make_subplots(rows=2, cols=2,
            subplot_titles=("Desktop, International",
                            "Desktop, U.S.",
                            "Mobile, International",
                            "Mobile, U.S."))
    for device in [0, 1]:
        for loc in [0, 1]:
            showlegend = (device == 0) & (loc == 0)
            for ad in ad_list:
                probs = [ug.logistic(ug.beta[ad],
                          [1, device, loc, age])
                                 for age in ages]
                fig.add_trace(get_scatter(ages,
                                          probs,
                                          ad,
                                          showlegend),
                           row=device+1,
                           col=loc+1)
    fig.update_layout(template="presentation")
    fig.show()

In [6]:
ug = UserGenerator()
visualize_bandits(ug)

## Regularized

In [7]:
class RegularizedLR(object):
    def __init__(self, name, alpha, rlambda, n_dim):
        self.name = name
        self.alpha = alpha
        self.rlambda = rlambda
        self.n_dim = n_dim
        self.m = np.zeros(n_dim)
        self.q = np.ones(n_dim) * rlambda
        self.w = self.get_sampled_weights()

    def get_sampled_weights(self):
        w = np.random.normal(self.m, self.alpha * self.q**(-1/2))
        return w

    def loss(self, w, *args):
        X, y = args
        n = len(y)
        regularizer = 0.5 * np.dot(self.q, (w - self.m)**2)
        pred_loss = sum([np.log(1 + np.exp(np.dot(w, X[j])))
                                    - y[j] * np.dot(w, X[j]) for j in range(n)])
        return regularizer + pred_loss

    def fit(self, X, y):
        if y:
            X = np.array(X)
            y = np.array(y)
            minimization = minimize(self.loss,
                                    self.w,
                                    args=(X, y),
                                    method="L-BFGS-B",
                                    bounds=[(-10,10)]*3 + [(-1, 1)],
                                    options={'maxiter': 50})
            self.w = minimization.x
            self.m = self.w
            p = (1 + np.exp(-np.matmul(self.w, X.T)))**(-1)
            self.q = self.q + np.matmul(p * (1 - p), X**2)


    def calc_sigmoid(self, w, context):
        return 1 / (1 + np.exp(-np.dot(w, context)))

    def get_prediction(self, context):
        return self.calc_sigmoid(self.m, context)

    def sample_prediction(self, context):
        w = self.get_sampled_weights()
        return self.calc_sigmoid(w, context)

    def get_ucb(self, context):
        pred = self.calc_sigmoid(self.m, context)
        confidence = self.alpha * np.sqrt(np.sum(np.divide(np.array(context)**2, self.q)))
        ucb = pred + confidence
        return ucb

In [8]:
def calculate_regret(ug, context, ad_options, ad):
    action_values = {a: ug.logistic(ug.beta[a], context) for a in ad_options}
    best_action = max(action_values, key=action_values.get)
    regret = action_values[best_action] - action_values[ad]
    return regret, best_action

In [9]:
def select_ad_eps_greedy(ad_models, context, eps):
    if np.random.uniform() < eps:
        return np.random.choice(list(ad_models.keys()))
    else:
        predictions = {ad: ad_models[ad].get_prediction(context)
                       for ad in ad_models}
        max_value = max(predictions.values());
        max_keys = [key for key, value in predictions.items() if value == max_value]
        return np.random.choice(max_keys)

In [10]:
def select_ad_ucb(ad_models, context):
    ucbs = {ad: ad_models[ad].get_ucb(context)
                   for ad in ad_models}
    max_value = max(ucbs.values());
    max_keys = [key for key, value in ucbs.items() if value == max_value]
    return np.random.choice(max_keys)

In [11]:
def select_ad_thompson(ad_models, context):
    samples = {ad: ad_models[ad].sample_prediction(context)
                   for ad in ad_models}
    max_value = max(samples.values());
    max_keys = [key for key, value in samples.items() if value == max_value]
    return np.random.choice(max_keys)

In [12]:
ad_options = ['A', 'B', 'C', 'D', 'E']
exploration_data = {}
data_columns = ['context',
                'ad',
                'click',
                'best_action',
                'regret',
                'total_regret']
exploration_strategies = ['eps-greedy',
                          'ucb',
                          'Thompson']
# Start comparisons
for strategy in exploration_strategies:
    print("--- Now using", strategy)
    np.random.seed(0)
    # Create the LR models for each ad
    alpha, rlambda, n_dim = 0.5, 0.5, 4
    ad_models = {ad: RegularizedLR(ad,
                                   alpha,
                                   rlambda,
                                   n_dim)
                 for ad in 'ABCDE'}
    # Initialize data structures
    X = {ad: [] for ad in ad_options}
    y = {ad: [] for ad in ad_options}
    results = []
    total_regret = 0
    # Start ad display
    for i in range(10**4):
        context = ug.generate_user_with_context()
        if strategy == 'eps-greedy':
            eps = 0.1
            ad = select_ad_eps_greedy(ad_models,
                                      context,
                                      eps)
        elif strategy == 'ucb':
            ad = select_ad_ucb(ad_models, context)
        elif strategy == 'Thompson':
            ad = select_ad_thompson(ad_models, context)
        # Display the selected ad
        click = ug.display_ad(ad)
        # Store the outcome
        X[ad].append(context)
        y[ad].append(click)
        regret, best_action = calculate_regret(ug,
                                               context,
                                               ad_options,
                                               ad)
        total_regret += regret
        results.append((context,
                        ad,
                        click,
                        best_action,
                        regret,
                        total_regret))
        # Update the models with the latest batch of data
        if (i + 1) % 500 == 0:
            print("Updating the models at i:", i + 1)
            for ad in ad_options:
                ad_models[ad].fit(X[ad], y[ad])
            X = {ad: [] for ad in ad_options}
            y = {ad: [] for ad in ad_options}

    exploration_data[strategy] = {'models': ad_models,
                                  'results': pd.DataFrame(results,
                                                          columns=data_columns)}

--- Now using eps-greedy
Updating the models at i: 500
Updating the models at i: 1000
Updating the models at i: 1500
Updating the models at i: 2000
Updating the models at i: 2500
Updating the models at i: 3000
Updating the models at i: 3500
Updating the models at i: 4000
Updating the models at i: 4500
Updating the models at i: 5000
Updating the models at i: 5500
Updating the models at i: 6000
Updating the models at i: 6500
Updating the models at i: 7000
Updating the models at i: 7500
Updating the models at i: 8000
Updating the models at i: 8500
Updating the models at i: 9000
Updating the models at i: 9500
Updating the models at i: 10000
--- Now using ucb
Updating the models at i: 500
Updating the models at i: 1000
Updating the models at i: 1500
Updating the models at i: 2000
Updating the models at i: 2500
Updating the models at i: 3000
Updating the models at i: 3500
Updating the models at i: 4000
Updating the models at i: 4500
Updating the models at i: 5000
Updating the models at i: 55

In [13]:
df_regret_comparisons = pd.DataFrame({s: exploration_data[s]['results'].total_regret
                                     for s in exploration_strategies})
df_regret_comparisons.iplot(dash=['solid', 'dash','dot'],
                            xTitle='Impressions',
                            yTitle='Total Regret',
                            color='black')

In [14]:
lrmodel = exploration_data['eps-greedy']['models']['A']
df_beta_dist = pd.DataFrame([], index=np.arange(-4,1,0.01))
mean = lrmodel.m
std_dev = lrmodel.q ** (-1/2)

for i in range(lrmodel.n_dim):
    df_beta_dist['beta_'+str(i)] = stats.norm(loc=mean[i],
                                              scale=std_dev[i]).pdf(df_beta_dist.index)

df_beta_dist.iplot(dash=['dashdot','dot', 'dash', 'solid'],
                   yTitle='p.d.f.',
                   color='black')

In [15]:
for strategy in exploration_strategies:
    print(strategy)
    print(exploration_data[strategy]['models']['A'].m)
    print(exploration_data[strategy]['models']['B'].m)
    print(exploration_data[strategy]['models']['C'].m)
    print(exploration_data[strategy]['models']['D'].m)
    print(exploration_data[strategy]['models']['E'].m)

eps-greedy
[-3.44227326 -0.0039223  -2.75325418  0.08734624]
[-4.43350246 -0.45196664  0.98930135  0.0752194 ]
[ 2.30450775 -0.04629187  0.64741616 -0.09750206]
[ 1.94368059  1.2793214  -1.2084242  -0.18089474]
[ 0.10932254  0.20920869  0.71327797 -0.02671893]
ucb
[-1.97999499 -0.2029213  -2.53660429  0.05457882]
[-3.21384497 -0.17566291  0.71902497  0.05149285]
[ 0.59388961  1.14819653  0.94063468 -0.08505643]
[ 1.68914104  0.09824264 -1.97870229 -0.08847244]
[ 0.33126419 -0.06551462  0.65471319 -0.02436713]
Thompson
[-3.19263779  0.00539657 -2.23835883  0.08077323]
[-2.56364341 -0.03226467  0.53747484  0.0419582 ]
[ 0.56716885  0.5373284   1.27792063 -0.07215639]
[ 2.52612988  0.44007939 -1.75879694 -0.13799652]
[-0.26701669  0.21947116  0.35699892 -0.00895418]


## NN with Bayesian Approximation

In [16]:
from collections import namedtuple
from numpy.random import uniform as U
import pandas as pd
import numpy as np
import io
import requests
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout

In [17]:
url="https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
s=requests.get(url).content
names = ['age',
           'workclass',
           'fnlwgt',
           'education',
           'education_num',
           'marital_status',
           'occupation',
           'relationship',
           'race',
           'gender',
           'capital_gain',
           'capital_loss',
           'hours_per_week',
           'native_country',
          'income']
usecols = ['age',
           'workclass',
           'education',
           'marital_status',
           'occupation',
           'relationship',
           'race',
           'gender',
           'hours_per_week',
           'native_country',
           'income']
df_census = pd.read_csv(io.StringIO(s.decode('utf-8')),
                        sep=',',
                        skipinitialspace=True,
                        names=names,
                        header=None,
                        usecols=usecols)

In [18]:
# Cleanup
df_census = df_census.replace('?', np.nan).dropna()
edu_map = {'Preschool': 'Elementary',
           '1st-4th': 'Elementary',
           '5th-6th': 'Elementary',
           '7th-8th': 'Elementary',
           '9th': 'Middle',
           '10th': 'Middle',
           '11th': 'Middle',
           '12th': 'Middle',
           'Some-college': 'Undergraduate',
           'Bachelors': 'Undergraduate',
           'Assoc-acdm': 'Undergraduate',
           'Assoc-voc': 'Undergraduate',
           'Prof-school': 'Graduate',
           'Masters': 'Graduate',
           'Doctorate': 'Graduate'}
for from_level, to_level in edu_map.items():
    df_census.education.replace(from_level, to_level, inplace=True)

In [19]:
# Convert raw data to processed data
context_cols = [c for c in usecols if c != 'education']
df_data = pd.concat([pd.get_dummies(df_census[context_cols]),
           df_census['education']], axis=1)

In [20]:
def get_ad_inventory():
    ad_inv_prob = {'Elementary': 0.9,
                   'Middle':  0.7,
                   'HS-grad':  0.7,
                   'Undergraduate':  0.9,
                   'Graduate':  0.8}
    ad_inventory = []
    for level, prob in ad_inv_prob.items():
        if U() < prob:
            ad_inventory.append(level)
    # Make sure there are at least one ad
    if not ad_inventory:
        ad_inventory = get_ad_inventory()
    return ad_inventory

In [21]:
def get_ad_click_probs():
    base_prob = 0.8
    delta = 0.3
    ed_levels = {'Elementary': 1,
                 'Middle':  2,
                 'HS-grad':  3,
                 'Undergraduate':  4,
                 'Graduate':  5}
    ad_click_probs = {l1: {l2: max(0, base_prob - delta * abs(ed_levels[l1]- ed_levels[l2])) for l2 in ed_levels}
                           for l1 in ed_levels}
    return ad_click_probs

In [22]:
def display_ad(ad_click_probs, user, ad):
    prob = ad_click_probs[ad][user['education']]
    click = 1 if U() < prob else 0
    return click

In [23]:
def calc_regret(user, ad_inventory, ad_click_probs, ad_selected):
    this_p = 0
    max_p = 0
    for ad in ad_inventory:
        p = ad_click_probs[ad][user['education']]
        if ad == ad_selected:
            this_p = p
        if p > max_p:
            max_p = p
    regret = max_p - this_p
    return regret

In [24]:
def get_model(n_input, dropout):
    inputs = keras.Input(shape=(n_input,))
    x = Dense(256, activation='relu')(inputs)
    if dropout > 0:
        x = Dropout(dropout)(x, training=True)
    x = Dense(256, activation='relu')(x)
    if dropout > 0:
        x = Dropout(dropout)(x, training=True)
    phat = Dense(1, activation='sigmoid')(x)
    model = keras.Model(inputs, phat)
    model.compile(loss=keras.losses.BinaryCrossentropy(),
                  optimizer=keras.optimizers.Adam(),
                  metrics=[keras.metrics.binary_accuracy])
    return model

In [25]:
def update_model(model, X, y):
    X = np.array(X)
    X = X.reshape((X.shape[0], X.shape[2]))
    y = np.array(y).reshape(-1)
    model.fit(X, y, epochs=10)
    return model

In [26]:
def ad_to_one_hot(ad):
    ed_levels = ['Elementary',
                 'Middle',
                 'HS-grad',
                 'Undergraduate',
                 'Graduate']
    ad_input = [0] * len(ed_levels)
    if ad in ed_levels:
        ad_input[ed_levels.index(ad)] = 1
    return ad_input

In [27]:
def select_ad(model, context, ad_inventory):
    selected_ad = None
    selected_x = None
    max_action_val = 0
    for ad in ad_inventory:
        ad_x = ad_to_one_hot(ad)
        x = np.array(context + ad_x).reshape((1, -1))
        action_val_pred = model.predict(x)[0][0]
        if action_val_pred >= max_action_val:
            selected_ad = ad
            selected_x = x
            max_action_val = action_val_pred
    return selected_ad, selected_x

In [28]:
def generate_user(df_data):
    user = df_data.sample(1)
    context = user.iloc[:, :-1].values.tolist()[0]
    return user.to_dict(orient='records')[0], context

In [29]:
ad_click_probs = get_ad_click_probs()
df_cbandits = pd.DataFrame()
#dropout_levels = [0, 0.01, 0.05, 0.1, 0.2, 0.4]
dropout_levels = [0, 0.01]
for d in dropout_levels:
    print("Trying with dropout:", d)
    np.random.seed(0)
    context_n = df_data.shape[1] - 1
    ad_input_n = df_data.education.nunique()
    model = get_model(context_n + ad_input_n, 0.01)
    X = []
    y = []
    regret_vec = []
    total_regret = 0
    for i in range(5000):
        if i % 20 == 0:
            print("# of impressions:", i)
        user, context = generate_user(df_data)
        ad_inventory = get_ad_inventory()
        ad, x = select_ad(model, context, ad_inventory)
        click = display_ad(ad_click_probs, user, ad)
        regret = calc_regret(user, ad_inventory, ad_click_probs, ad)
        total_regret += regret
        regret_vec.append(total_regret)
        X.append(x)
        y.append(click)
        if (i + 1) % 500 == 0:
            print('Updating the model at', i+1)
            model = update_model(model, X, y)
            X = []
            y = []

    df_cbandits['dropout: '+str(d)] = regret_vec

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
# of impressions: 3800
# of impressions: 3820
# of impressions: 3840
# of impressions: 3860
# of impressions: 3880
# of impressions: 3900
# of impressions: 3920
# of impressions: 3940
# of impressions: 3960
# of impressions: 3980
Updating the model at 4000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
# of impressions: 4000
# of impressions: 4020
# of impressions: 4040
# of impressions: 4060
# of impressions: 4080
# of impressions: 4100
# of impressions: 4120
# of impressions: 4140
# of impressions: 4160
# of impressions: 4180
# of impressions: 4200
# of impressions: 4220
# of impressions: 4240
# of impressions: 4260
# of impressions: 4280
# of impressions: 4300
# of impressions: 4320
# of impressions: 4340
# of impressions: 4360
# of impressions: 4380
# of impressions: 4400
# of impressions: 4420
# of impressions: 4440
# of impressions: 4460
# of impression

In [31]:
df_cbandits.iplot(dash = ['dash', 'solid'],
                  xTitle='Impressions',
                  yTitle='Cumulative Regret')