In [None]:
!pip install space_bandits

In [13]:
import pandas as pd
import numpy as np

In [14]:
# Load our data
transactions = pd.read_csv('data/transactions.csv')
action_reward = pd.read_csv('data/actions_reward_transaction_df.csv')
predictions_for_transactions = pd.read_csv('data/predictions_for_transactions.csv')

In [15]:
transactions.dates = pd.to_datetime(transactions.dates)
predictions_for_transactions.dates = pd.to_datetime(predictions_for_transactions.dates) 
action_reward.dates = pd.to_datetime(action_reward.dates)

In [16]:
action_reward

Unnamed: 0,tr_id,dates,action,reward
0,1003923,2019-01-01,2,0.000
1,1018719,2019-01-01,3,96246.080
2,1028713,2019-01-01,3,98328.150
3,1030155,2019-01-01,3,96290.195
4,1047479,2019-01-01,2,0.000
...,...,...,...,...
9995,99977644,2019-12-30,3,97569.675
9996,99979376,2019-12-30,3,98179.700
9997,99979915,2019-12-30,1,98274.760
9998,99981943,2019-12-30,1,96469.850


In [17]:
transactions

Unnamed: 0,tr_id,user_id,dates,sale_price
0,1018719,27185,2019-01-01,96246.080
1,1028713,42513,2019-01-01,98328.150
2,1030155,33953,2019-01-01,96290.195
3,1055428,28434,2019-01-01,97184.400
4,1058208,15628,2019-01-01,96948.420
...,...,...,...,...
6325,99949208,19090,2019-12-30,99624.545
6326,99977644,45377,2019-12-30,97569.675
6327,99979376,75419,2019-12-30,98179.700
6328,99979915,13167,2019-12-30,98274.760


In [18]:
predictions_for_transactions

Unnamed: 0,tr_id,dates,model_1,model_2,model_3
0,1003923,2019-01-01,98073.000,96380.000,96656.000
1,1018719,2019-01-01,95755.080,95924.080,96246.080
2,1028713,2019-01-01,96318.150,96729.150,98328.150
3,1030155,2019-01-01,96636.195,98557.195,96290.195
4,1047479,2019-01-01,96149.280,96850.280,96170.280
...,...,...,...,...,...
9995,99977644,2019-12-30,98539.675,96468.675,97569.675
9996,99979376,2019-12-30,97875.700,97139.700,98179.700
9997,99979915,2019-12-30,98274.760,98599.760,97072.760
9998,99981943,2019-12-30,96469.850,98086.850,99745.850


In [19]:
# Let's build the 1st model
from space_bandits import LinearBandits

num_actions = 3 # three actions
num_features = 2 # two features

model = LinearBandits(num_actions, num_features)

In [21]:
action_reward['dates_int'] = action_reward.dates.astype(np.int64)

In [22]:
action_reward

Unnamed: 0,tr_id,dates,action,reward,dates_int
0,1003923,2019-01-01,2,0.000,1546300800000000000
1,1018719,2019-01-01,3,96246.080,1546300800000000000
2,1028713,2019-01-01,3,98328.150,1546300800000000000
3,1030155,2019-01-01,3,96290.195,1546300800000000000
4,1047479,2019-01-01,2,0.000,1546300800000000000
...,...,...,...,...,...
9995,99977644,2019-12-30,3,97569.675,1577664000000000000
9996,99979376,2019-12-30,3,98179.700,1577664000000000000
9997,99979915,2019-12-30,1,98274.760,1577664000000000000
9998,99981943,2019-12-30,1,96469.850,1577664000000000000


In [34]:
next(action_reward.iterrows())

(0,
 tr_id                    1003923
 dates        2019-01-01 00:00:00
 action                         2
 reward                       0.0
 dates_int    1546300800000000000
 Name: 0, dtype: object)

In [35]:
# The model is trained through the update() function
for index, row in action_reward.iterrows():
    context = row[['dates_int', 'tr_id']]
    action = row['action'] - 1
    reward = row['reward']
    model.update(context, action, reward)

In [36]:
# Get a recommendation which action to choose depending on the context use
pred_context = action_reward.iloc[-2][['dates_int', 'tr_id']].values
model.action(pred_context)

  multivariates = [np.random.multivariate_normal(mus[j], covs[j]) for j in range(n_rows)]


0

In [37]:
# Save the model
model.save('model_1.pkl')

In [38]:
# Let's build the 2nd model
# Add time, year/week and user and option data to the context
full_action_reward = action_reward.merge(predictions_for_transactions, how='left', on=['tr_id', 'dates'])
full_action_reward = full_action_reward.merge(transactions, how='left', on=['tr_id', 'dates'])

In [39]:
full_action_reward

Unnamed: 0,tr_id,dates,action,reward,dates_int,model_1,model_2,model_3,user_id,sale_price
0,1003923,2019-01-01,2,0.000,1546300800000000000,98073.000,96380.000,96656.000,,
1,1018719,2019-01-01,3,96246.080,1546300800000000000,95755.080,95924.080,96246.080,27185.0,96246.080
2,1028713,2019-01-01,3,98328.150,1546300800000000000,96318.150,96729.150,98328.150,42513.0,98328.150
3,1030155,2019-01-01,3,96290.195,1546300800000000000,96636.195,98557.195,96290.195,33953.0,96290.195
4,1047479,2019-01-01,2,0.000,1546300800000000000,96149.280,96850.280,96170.280,,
...,...,...,...,...,...,...,...,...,...,...
9995,99977644,2019-12-30,3,97569.675,1577664000000000000,98539.675,96468.675,97569.675,45377.0,97569.675
9996,99979376,2019-12-30,3,98179.700,1577664000000000000,97875.700,97139.700,98179.700,75419.0,98179.700
9997,99979915,2019-12-30,1,98274.760,1577664000000000000,98274.760,98599.760,97072.760,13167.0,98274.760
9998,99981943,2019-12-30,1,96469.850,1577664000000000000,96469.850,98086.850,99745.850,46652.0,96469.850


In [40]:
# Fill in the blanks
full_action_reward.user_id = full_action_reward.user_id.fillna(-1)
full_action_reward.sale_price = full_action_reward.sale_price.fillna(0)

In [41]:
# Add year, week
full_action_reward['week'] = full_action_reward.dates.dt.week
full_action_reward['year'] = full_action_reward.dates.dt.year

  full_action_reward['week'] = full_action_reward.dates.dt.week


In [42]:
# Add information about the last purchase for user
full_action_reward['previous purchase'] = full_action_reward.sort_values('dates').groupby('user_id').sale_price.shift(1)
full_action_reward['previous purchase'] = full_action_reward['previous purchase'].fillna(0)

In [43]:
# Check that we actually added information about the last purchase
full_action_reward[full_action_reward.user_id == 46652]

Unnamed: 0,tr_id,dates,action,reward,dates_int,model_1,model_2,model_3,user_id,sale_price,week,year,previous purchase
6297,63820317,2019-08-15,3,98994.73,1565827200000000000,98085.73,97723.73,98994.73,46652.0,98994.73,33,2019,0.0
9998,99981943,2019-12-30,1,96469.85,1577664000000000000,96469.85,98086.85,99745.85,46652.0,96469.85,1,2019,98994.73


In [44]:
full_action_reward

Unnamed: 0,tr_id,dates,action,reward,dates_int,model_1,model_2,model_3,user_id,sale_price,week,year,previous purchase
0,1003923,2019-01-01,2,0.000,1546300800000000000,98073.000,96380.000,96656.000,-1.0,0.000,1,2019,0.00
1,1018719,2019-01-01,3,96246.080,1546300800000000000,95755.080,95924.080,96246.080,27185.0,96246.080,1,2019,0.00
2,1028713,2019-01-01,3,98328.150,1546300800000000000,96318.150,96729.150,98328.150,42513.0,98328.150,1,2019,0.00
3,1030155,2019-01-01,3,96290.195,1546300800000000000,96636.195,98557.195,96290.195,33953.0,96290.195,1,2019,0.00
4,1047479,2019-01-01,2,0.000,1546300800000000000,96149.280,96850.280,96170.280,-1.0,0.000,1,2019,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,99977644,2019-12-30,3,97569.675,1577664000000000000,98539.675,96468.675,97569.675,45377.0,97569.675,1,2019,0.00
9996,99979376,2019-12-30,3,98179.700,1577664000000000000,97875.700,97139.700,98179.700,75419.0,98179.700,1,2019,98347.16
9997,99979915,2019-12-30,1,98274.760,1577664000000000000,98274.760,98599.760,97072.760,13167.0,98274.760,1,2019,0.00
9998,99981943,2019-12-30,1,96469.850,1577664000000000000,96469.850,98086.850,99745.850,46652.0,96469.850,1,2019,98994.73


In [45]:
full_action_reward.columns

Index(['tr_id', 'dates', 'action', 'reward', 'dates_int', 'model_1', 'model_2',
       'model_3', 'user_id', 'sale_price', 'week', 'year',
       'previous purchase'],
      dtype='object')

In [46]:
context_columns = ['tr_id','dates_int', 'model_1', 'model_2',
       'model_3', 'user_id', 'sale_price', 'week', 'year',
       'previous purchase']

In [47]:
# Create another model
num_actions = 3 # three actions
num_features = 10 # ten features


model_2 = LinearBandits(num_actions, num_features)

In [48]:
# Train it
for index, row in full_action_reward.iterrows():
    context = row[context_columns]
    action = row['action'] - 1
    reward = row['reward']
    model_2.update(context, action,reward)

In [49]:
pred_context = full_action_reward.iloc[-2][context_columns].values
model_2.action(pred_context)

  multivariates = [np.random.multivariate_normal(mus[j], covs[j]) for j in range(n_rows)]


2

In [50]:
# Save the model
model_2.save('model_2.pkl')