## Demo for Deep and Wide Bandits

In [11]:
import torch
import torch.nn as nn
import numpy as np
from random import random
from IPython.display import clear_output

In [12]:
#from google.colab import drive
#drive.mount('/content/drive')

In [13]:
import sys
## path to wide_deep_bandits.py and toy_problem_wu.py files
path = '/content/drive/MyDrive/Fellowship_Deep_and_Wide_Bandit/Wide_and_Deep_Models/'
sys.path.append(path)

#from contextual_dataset_wu import ContextualDataset
from toy_problem_wu import generate_dataframe
from wide_deep_bandits import Wide_Deep_Bandits

# Toy problem
The space-bandits toy_problem divides users/customers into two groups

Group 1: age ~25, ARPU ~100, user_id range 0 - 19, best action = 0

Group 2: age ~45, ARPU ~50, user_id range 20 - 39, best action = 2


In [14]:
## Get training data from toy_problem
n_train_sample = 1000
df = generate_dataframe(n_train_sample)
context_cols = ['age','ARPU']
action_col = ['action']
reward_col = ['reward']
user_id_col = ['user_id']
df.head()

Unnamed: 0,age,ARPU,action,reward,user_id
0,25.0,100.524144,0,10,2
1,25.0,127.456575,0,10,10
2,21.0,80.620042,0,10,14
3,28.0,102.703556,2,0,13
4,47.0,21.652255,1,0,20


In [15]:
num_actions = df[action_col].nunique()[0]
num_features = len(context_cols)
num_users = df[user_id_col].nunique()[0]
print("Number of actions:", num_actions)
print("Number of features:", num_features)
print("Number of users:", num_users)

Number of actions: 3
Number of features: 2
Number of users: 40


# Train models

In [16]:
%%time

### Train models 
demo_model_wide = Wide_Deep_Bandits(num_actions, num_features, wide_embed_size=num_users, model_type='wide') ## Wide only model
demo_model_deep = Wide_Deep_Bandits(num_actions, num_features, wide_embed_size=num_users, model_type='deep') ## Deep only model
demo_model_wide_deep = Wide_Deep_Bandits(num_actions, num_features, wide_embed_size=num_users, model_type='wide_deep') ## Wide + Deep model

for i in range(n_train_sample):
  test_context = df[context_cols].iloc[i].values
  test_action = df[action_col].iloc[i].values
  test_reward = df[reward_col].iloc[i].values
  test_user_id = df[user_id_col].iloc[i].values

  demo_model_wide.update(torch.tensor(df[user_id_col].iloc[i].values), test_context,test_action,test_reward)
  demo_model_deep.update(torch.tensor(df[user_id_col].iloc[i].values), test_context,test_action,test_reward)
  demo_model_wide_deep.update(torch.tensor(df[user_id_col].iloc[i].values), test_context,test_action,test_reward)

clear_output()

CPU times: user 1min 16s, sys: 58.7 s, total: 2min 14s
Wall time: 1min 8s


## Predict Best Action

In [17]:
user_id = torch.tensor([10])
context = torch.tensor([25.0, 100.0])

## Possible methods
## BLR - Use expected values of Bayesian Linear Regression
## BLR_TS - Use Bayesian Linear Regression + Thompson sampling
## forward - Use forward pass of neural networks
method = 'forward'

action_wide = demo_model_wide.action(user_id, context, method=method)
action_deep = demo_model_deep.action(user_id, context, method=method)
action_wide_deep = demo_model_wide_deep.action(user_id, context, method=method)

In [18]:
print("Wide model predicts action: ", action_wide)
print("Deep model predicts action: ", action_deep)
print("Wide + Deep model predicts action: ", action_wide_deep)

Wide model predicts action:  0
Deep model predicts action:  0
Wide + Deep model predicts action:  0


## Get Expected Rewards for Each Action

In [None]:
## Possible methods
## BLR - Expected values of Bayesian Linear Regression
## forward - Forward pass of neural networks
method = 'BLR'

expected_values_wide = demo_model_wide.expected_values(user_id, context, method=method)
expected_values_deep = demo_model_deep.expected_values(user_id, context, method=method)
expected_values_wide_deep = demo_model_wide_deep.expected_values(user_id, context, method=method)

In [22]:
print("Wide model expected values: ", expected_values_wide)
print("Deep model expected values: ", expected_values_deep)
print("Wide + Deep model expected values: ", expected_values_wide_deep)

Wide model expected values:  [ 8.84334732  1.46878973 -0.20481975]
Deep model expected values:  [12.26969536  4.71722552  1.69028515]
Wide + Deep model expected values:  [  8.11236442  11.44496876 -51.70120539]
