In [1]:
from data_processing import *
from recommenders import *
from metrics import evaluate_model

# Data preparation

In [2]:
zip_file = load_data()
raw_orders, raw_order_product_train, raw_order_product_test = raw_set_preparation(zip_file)
train_set_ratings, test_set_prepared = train_test_preparation(raw_orders, 
                                                              raw_order_product_train, 
                                                              raw_order_product_test)

# Most popular Recommender

In [3]:
mpr = MostPopularRecommender()
mpr.fit(train_set_ratings)
mpr_prediction = mpr.predict(test_set_prepared)
evaluate_model(test_set_prepared, mpr_prediction)

hit_rate@10: 0.457
precision_at_k@10: 0.073
recall_at_k@10: 0.070
average_precision_at_k@10: 0.220
ndcg_at_k@10: 0.087


# SVD Decomposition

In [12]:
%%time
svd = TruncatedSVDRecommender()
svd.fit(train_set_ratings)

Wall time: 3min 30s


In [13]:
svd_prediction = svd.predict(test_set_prepared, batch_size=10000)
evaluate_model(test_set_prepared, svd_prediction)

hit_rate@10: 0.741
precision_at_k@10: 0.171
recall_at_k@10: 0.199
average_precision_at_k@10: 0.439
ndcg_at_k@10: 0.211


# Alternate Least Squares

In [17]:
als = ALSRecommender()
als.fit(train_set_ratings)
als_prediction = als.predict(test_set_prepared)
evaluate_model(test_set_prepared, als_prediction)

100%|██████████| 15/15 [03:19<00:00, 13.28s/it]
hit_rate@10: 0.665
precision_at_k@10: 0.134
recall_at_k@10: 0.170
average_precision_at_k@10: 0.312
ndcg_at_k@10: 0.150


# Stratify Most Popular

In [3]:
starspace_prepared = starspace_preparation(raw_orders, raw_order_product_train)

In [4]:
stratify_mpr = StratifyMostPopularRecommender()
stratify_mpr.fit(train_set_ratings, starspace_df=starspace_prepared)
stratify_mpr_prediction =stratify_mpr.predict(test_set_prepared)
evaluate_model(test_set_prepared, stratify_mpr_prediction)

Starspace is already installed


  if (await self.run_code(code, result,  async_=asy)):


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=500.0), HTML(value='')))


hit_rate@10: 0.560
precision_at_k@10: 0.099
recall_at_k@10: 0.111
average_precision_at_k@10: 0.314
ndcg_at_k@10: 0.126


# King of the hill ensemble
This approach uses a lot of pretrained values:
- predictions of each model on validation set and on full train set
- vector representations of users from starspace training

To train your own model consider refactoring of these class

In [3]:
train_rates, valid_set = train_validation_split(raw_orders, raw_order_product_train)

In [4]:
koh = KingOfTheHillRecommender()
koh.fit(valid_set)
koh_prediction = koh.predict(test_set_prepared)
evaluate_model(test_set_prepared, koh_prediction)

HBox(children=(FloatProgress(value=0.0, max=206209.0), HTML(value='')))


Training until validation scores don't improve for 5 rounds
[4]	valid_0's multi_logloss: 0.709875
[8]	valid_0's multi_logloss: 0.708915
[12]	valid_0's multi_logloss: 0.708026
[16]	valid_0's multi_logloss: 0.707193
[20]	valid_0's multi_logloss: 0.706439
[24]	valid_0's multi_logloss: 0.705735
[28]	valid_0's multi_logloss: 0.705098
[32]	valid_0's multi_logloss: 0.704487
[36]	valid_0's multi_logloss: 0.70393
[40]	valid_0's multi_logloss: 0.703398
[44]	valid_0's multi_logloss: 0.702898
[48]	valid_0's multi_logloss: 0.702406
[52]	valid_0's multi_logloss: 0.701958
[56]	valid_0's multi_logloss: 0.70153
[60]	valid_0's multi_logloss: 0.701133
[64]	valid_0's multi_logloss: 0.700754
[68]	valid_0's multi_logloss: 0.70038
[72]	valid_0's multi_logloss: 0.700026
[76]	valid_0's multi_logloss: 0.699698
[80]	valid_0's multi_logloss: 0.699395
[84]	valid_0's multi_logloss: 0.699095
[88]	valid_0's multi_logloss: 0.698812
[92]	valid_0's multi_logloss: 0.698541
[96]	valid_0's multi_logloss: 0.698294
[100]	va