In [21]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

In [8]:
# Загрузим наши данные
FLD="/home/nur/projects/analysis/dynamic_price/data/hw9"
transactions = pd.read_csv(os.path.join(FLD, 'transactions.csv'))
action_reward = pd.read_csv(os.path.join(FLD, 'actions_reward_transaction_df.csv'))
predictions_for_transactions = pd.read_csv(os.path.join(FLD, 'predictions_for_transactions.csv'))

transactions.dates = pd.to_datetime(transactions.dates)
predictions_for_transactions.dates = pd.to_datetime(predictions_for_transactions.dates) 
action_reward.dates = pd.to_datetime(action_reward.dates)

In [13]:
print(action_reward.action.value_counts())
action_reward.head(2)

1    3379
2    3328
3    3293
Name: action, dtype: int64


Unnamed: 0,tr_id,dates,action,reward
0,1003923,2019-01-01,2,0.0
1,1018719,2019-01-01,3,96246.08


In [12]:
predictions_for_transactions.head(2)

Unnamed: 0,tr_id,dates,model_1,model_2,model_3
0,1003923,2019-01-01,98073.0,96380.0,96656.0
1,1018719,2019-01-01,95755.08,95924.08,96246.08


In [10]:
transactions.head(2)

Unnamed: 0,tr_id,user_id,dates,sale_price
0,1018719,27185,2019-01-01,96246.08
1,1028713,42513,2019-01-01,98328.15


### Построим 1-ую модель

In [18]:
from space_bandits import LinearBandits

num_actions = 3 # three actions
num_features = 2 # two features

model = LinearBandits(num_actions, num_features)

In [19]:
action_reward['dates_int'] = action_reward.dates.astype(int)

In [20]:
# Обучение модели происходит через функцию update()
for index, row in tqdm(action_reward.iterrows(), total = action_reward.shape[0]):
    context = row[['dates_int', 'tr_id']]
    action = row['action'] - 1
    reward = row['reward']
    model.update(context, action, reward)

In [22]:
# Чтобы получить рекомендацию по выбору какой именно action, надо выбрать в зависимости от контекста используйте action()
pred_context = action_reward.iloc[-2][['dates_int', 'tr_id']].values
model.action(pred_context)

  multivariates = [np.random.multivariate_normal(mus[j], covs[j]) for j in range(n_rows)]


0

In [23]:
# Сохраните модель
model.save('model_1.pkl')

### Постройте 2-у модель

In [25]:
# Добавим в контекст данные о времени, year/week, данные о user и данные о возможных вариантах
full_action_reward = action_reward.merge(predictions_for_transactions, how='left', on=['tr_id', 'dates'])
full_action_reward = full_action_reward.merge(transactions, how='left', on=['tr_id', 'dates'])

In [26]:
# Заполним пропуски
full_action_reward.user_id = full_action_reward.user_id.fillna(-1)
full_action_reward.sale_price = full_action_reward.sale_price.fillna(0)

In [27]:
# Добавим year, week
full_action_reward['week'] = full_action_reward.dates.dt.week
full_action_reward['year'] = full_action_reward.dates.dt.year

  


In [29]:
# Добавим информацию о последней покупки для user'а
full_action_reward['previous purchase'] = full_action_reward.sort_values('dates').groupby('user_id').sale_price.shift(1)
full_action_reward['previous purchase'] = full_action_reward['previous purchase'].fillna(0)

In [30]:
# Проверим что мы действительно добавили информацию о последней покупке
full_action_reward[full_action_reward.user_id == 46652]

Unnamed: 0,tr_id,dates,action,reward,dates_int,model_1,model_2,model_3,user_id,sale_price,week,year,previous purchase
6297,63820317,2019-08-15,3,98994.73,1565827200000000000,98085.73,97723.73,98994.73,46652.0,98994.73,33,2019,0.0
9998,99981943,2019-12-30,1,96469.85,1577664000000000000,96469.85,98086.85,99745.85,46652.0,96469.85,1,2019,98994.73


In [18]:
full_action_reward.columns

Index(['tr_id', 'dates', 'action', 'reward', 'dates_int', 'model_1', 'model_2',
       'model_3', 'user_id', 'sale_price', 'week', 'year',
       'previous purchase'],
      dtype='object')

In [35]:
context_columns = ['tr_id','dates_int', 'model_1', 'model_2',
       'model_3', 'user_id', 'sale_price', 'week', 'year',
       'previous purchase']

In [36]:
# Создадим еще одну модель 
num_actions = 3 # three actions
num_features = 10 # ten features

model_2 = LinearBandits(num_actions, num_features)

In [37]:
# Обучим еще одну модель
for index, row in tqdm(full_action_reward.iterrows(), total=full_action_reward.shape[0]):
    context = row[context_columns]
    action = row['action'] - 1
    reward = row['reward']
    model_2.update(context, action,reward)

100%|██████████| 10000/10000 [01:28<00:00, 112.73it/s]


In [38]:
pred_context = full_action_reward.iloc[-2][context_columns].values
model_2.action(pred_context)

  multivariates = [np.random.multivariate_normal(mus[j], covs[j]) for j in range(n_rows)]


2

In [39]:
# Сохраните модель
model_2.save('model_2.pkl')

In [40]:
!explorer.exe .