## Demo for Deep and Wide Bandits

In [None]:
import torch
import torch.nn as nn
import numpy as np
from random import random
from IPython.display import clear_output

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
import sys
## path to wide_deep_bandits.py and toy_problem_wu.py files
path = '/content/drive/MyDrive/Fellowship_Deep_and_Wide_Bandit/Wide_and_Deep_Models/'
sys.path.append(path)

#from contextual_dataset_wu import ContextualDataset
from toy_problem_wu import generate_dataframe
from wide_deep_bandits import Wide_Deep_Bandits

# Toy problem
The space-bandits toy_problem divides users/customers into two groups

Group 1: age ~25, ARPU ~100, user_id range 0 - 19, best action = 0

Group 2: age ~45, ARPU ~50, user_id range 20 - 39, best action = 2


In [None]:
## Get training data from toy_problem
n_train_sample = 10000
df = generate_dataframe(n_train_sample)
context_cols = ['age','ARPU']
action_col = ['action']
reward_col = ['reward']
user_id_col = ['user_id']
df.head()

Unnamed: 0,age,ARPU,action,reward,user_id
0,31.0,100.265699,1,0,10
1,37.0,18.657117,2,0,23
2,29.0,100.990283,2,0,16
3,23.0,93.845837,0,10,17
4,57.0,44.111938,1,0,21


In [None]:
num_actions = df[action_col].nunique()[0]
num_features = len(context_cols)
num_users = df[user_id_col].nunique()[0]
print("Number of actions:", num_actions)
print("Number of features:", num_features)
print("Number of users:", num_users)

Number of actions: 3
Number of features: 2
Number of users: 40


# Train models

In [None]:
%%time

### Train models 
demo_model_wide = Wide_Deep_Bandits(num_actions, num_features, wide_embed_size=num_users, model_type='wide', num_epochs=10) ## Wide only model
demo_model_deep = Wide_Deep_Bandits(num_actions, num_features, wide_embed_size=num_users, model_type='deep', num_epochs=10) ## Deep only model
demo_model_wide_deep = Wide_Deep_Bandits(num_actions, num_features, wide_embed_size=num_users, model_type='wide_deep', num_epochs=10) ## Wide + Deep model

test_context = df[context_cols].values
test_action = df[action_col].values[:,0]
test_reward = df[reward_col].values[:,0]
test_user_id = df[user_id_col].values[:,0]

## Use the 'fit' function to fit a batch of data. 
## Another option is to use the 'update' function to add one data point at a time. 
demo_model_wide.fit(test_user_id, test_context,test_action,test_reward)
demo_model_deep.fit(test_user_id, test_context,test_action,test_reward)
demo_model_wide_deep.fit(test_user_id, test_context,test_action,test_reward)

clear_output()

CPU times: user 1.18 s, sys: 4.95 ms, total: 1.19 s
Wall time: 1.19 s


## Predict Best Action

In [None]:
user_id = torch.tensor(10)
context = torch.tensor([25.0, 100.0])

## Use the 'action' function to find get the best action predicted by model for a given data point
## Another option is to use the 'predict' function to make predictions on a batch of data
action_wide = demo_model_wide.action(user_id, context)
action_deep = demo_model_deep.action(user_id, context)
action_wide_deep = demo_model_wide_deep.action(user_id, context)

print("Wide model predicts action: ", action_wide)
print("Deep model predicts action: ", action_deep)
print("Wide + Deep model predicts action: ", action_wide_deep)

Wide model predicts action:  0
Deep model predicts action:  0
Wide + Deep model predicts action:  0


  context = torch.tensor(context).float().to(device)


## Get Expected Rewards for Each Action

In [None]:
expected_values_wide = demo_model_wide.expected_values(user_id, context, multiple_rows=False)
expected_values_deep = demo_model_deep.expected_values(user_id, context, multiple_rows=False)
expected_values_wide_deep = demo_model_wide_deep.expected_values(user_id, context, multiple_rows=False)

print("Wide model expected values: ", expected_values_wide)
print("Deep model expected values: ", expected_values_deep)
print("Wide + Deep model expected values: ", expected_values_wide_deep)

Wide model expected values:  tensor([ 8.5194,  2.6094, -0.5079], grad_fn=<SqueezeBackward1>)
Deep model expected values:  tensor([9.4291, 0.8213, 5.9998], grad_fn=<SqueezeBackward1>)
Wide + Deep model expected values:  tensor([8.4731, 1.7266, 5.3449], grad_fn=<SqueezeBackward1>)


  context = torch.tensor(context).float().to(device)
