In [1]:
import os, sys

os.environ['MKL_NUM_THREADS'] = '1'

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import catboost as catb
%matplotlib inline

from tqdm import tqdm
from scipy.sparse import csr_matrix
from implicit import als
from lightgbm import LGBMClassifier

module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.metrics import precision_at_k, recall_at_k
from src.utils import prefilter_items
from src.recommenders import CastomRecommender

import warnings
warnings.filterwarnings("ignore")

In [3]:
train = pd.read_csv('raw_data/retail_train.csv')
item_features = pd.read_csv('raw_data/product.csv').rename(columns={'PRODUCT_ID':'item_id'})
user_features = pd.read_csv('raw_data/hh_demographic.csv').rename(columns={'household_key':'user_id'})
transaction = pd.read_csv('raw_data/transaction_data.csv')
test = pd.read_csv('raw_data/retail_test1.csv')

In [4]:
train

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.60,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.00,1631,1,0.0,0.0
2,2375,26984851472,1,1036325,1,0.99,364,-0.30,1631,1,0.0,0.0
3,2375,26984851472,1,1082185,1,1.21,364,0.00,1631,1,0.0,0.0
4,2375,26984851472,1,8160430,1,1.50,364,-0.39,1631,1,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2396799,1613,41655820646,663,16102849,1,2.00,3262,-1.15,1231,95,0.0,0.0
2396800,1001,41655829421,663,13217063,1,1.69,3131,0.00,2231,95,0.0,0.0
2396801,1001,41655829421,663,13217800,1,1.69,3131,0.00,2231,95,0.0,0.0
2396802,1167,41656790510,663,6410462,22451,43.98,3385,-0.65,1059,95,0.0,0.0


In [5]:
item_features

Unnamed: 0,item_id,MANUFACTURER,DEPARTMENT,BRAND,COMMODITY_DESC,SUB_COMMODITY_DESC,CURR_SIZE_OF_PRODUCT
0,25671,2,GROCERY,National,FRZN ICE,ICE - CRUSHED/CUBED,22 LB
1,26081,2,MISC. TRANS.,National,NO COMMODITY DESCRIPTION,NO SUBCOMMODITY DESCRIPTION,
2,26093,69,PASTRY,Private,BREAD,BREAD:ITALIAN/FRENCH,
3,26190,69,GROCERY,Private,FRUIT - SHELF STABLE,APPLE SAUCE,50 OZ
4,26355,69,GROCERY,Private,COOKIES/CONES,SPECIALTY COOKIES,14 OZ
...,...,...,...,...,...,...,...
92348,18293142,6384,DRUG GM,National,BOOKSTORE,PAPERBACK BOOKS,
92349,18293439,6393,DRUG GM,National,BOOKSTORE,CHILDRENS LOW END,
92350,18293696,6406,DRUG GM,National,BOOKSTORE,PAPERBACK BEST SELLER,
92351,18294080,6442,DRUG GM,National,BOOKSTORE,PAPERBACK BOOKS,


In [6]:
user_features

Unnamed: 0,AGE_DESC,MARITAL_STATUS_CODE,INCOME_DESC,HOMEOWNER_DESC,HH_COMP_DESC,HOUSEHOLD_SIZE_DESC,KID_CATEGORY_DESC,user_id
0,65+,A,35-49K,Homeowner,2 Adults No Kids,2,None/Unknown,1
1,45-54,A,50-74K,Homeowner,2 Adults No Kids,2,None/Unknown,7
2,25-34,U,25-34K,Unknown,2 Adults Kids,3,1,8
3,25-34,U,75-99K,Homeowner,2 Adults Kids,4,2,13
4,45-54,B,50-74K,Homeowner,Single Female,1,None/Unknown,16
...,...,...,...,...,...,...,...,...
796,35-44,U,50-74K,Homeowner,2 Adults No Kids,2,None/Unknown,2494
797,45-54,A,75-99K,Homeowner,Unknown,3,1,2496
798,45-54,U,35-49K,Unknown,Single Male,1,None/Unknown,2497
799,25-34,U,50-74K,Homeowner,2 Adults No Kids,2,None/Unknown,2498


In [7]:
transaction

Unnamed: 0,household_key,BASKET_ID,DAY,PRODUCT_ID,QUANTITY,SALES_VALUE,STORE_ID,RETAIL_DISC,TRANS_TIME,WEEK_NO,COUPON_DISC,COUPON_MATCH_DISC
0,2375,26984851472,1,1004906,1,1.39,364,-0.60,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.00,1631,1,0.0,0.0
2,2375,26984851472,1,1036325,1,0.99,364,-0.30,1631,1,0.0,0.0
3,2375,26984851472,1,1082185,1,1.21,364,0.00,1631,1,0.0,0.0
4,2375,26984851472,1,8160430,1,1.50,364,-0.39,1631,1,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2595727,1598,42305362535,711,92130,1,0.99,3228,0.00,1520,102,0.0,0.0
2595728,1598,42305362535,711,114102,1,8.89,3228,0.00,1520,102,0.0,0.0
2595729,1598,42305362535,711,133449,1,6.99,3228,0.00,1520,102,0.0,0.0
2595730,1598,42305362535,711,6923644,1,4.50,3228,-0.49,1520,102,0.0,0.0


In [8]:
test

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,1340,41652823310,664,912987,1,8.49,446,0.0,52,96,0.0,0.0
1,588,41652838477,664,1024426,1,6.29,388,0.0,8,96,0.0,0.0
2,2070,41652857291,664,995242,5,9.10,311,-0.6,46,96,0.0,0.0
3,1602,41665647035,664,827939,1,7.99,334,0.0,1741,96,0.0,0.0
4,1602,41665647035,664,927712,1,0.59,334,-0.4,1741,96,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
88729,98,41944918665,684,945779,2,2.00,421,0.0,1705,98,0.0,0.0
88730,98,41944918665,684,993617,2,2.00,421,0.0,1705,98,0.0,0.0
88731,98,41944918665,684,1128647,2,2.00,421,0.0,1705,98,0.0,0.0
88732,98,41944918665,684,9526886,2,0.60,421,0.0,1705,98,0.0,0.0


In [9]:
recomender = CastomRecommender(data=train, items_features=item_features, users_features=user_features)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3907.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))




In [10]:
test_recs = recomender.predict(data_test=test)
test_recs.head()

Unnamed: 0,user_id,purchases,top_popular,top_purchases,top_purchases_by_user,own_recommender,als_recommender,basic_recommender,catb_recommender,hit
0,1,"[880007, 883616, 931136, 938004, 940947, 94726...","[1082185, 981760, 995242, 1029743, 840361, 109...","[1029743, 995242, 1106523, 981760, 1133018, 11...","[856942, 995242, 940947, 5577022, 9527290, 108...","[1081177, 1004906, 12810393, 6034857, 1006184,...","[995242, 1029743, 965766, 1024306, 940947, 979...","[1082185, 1029743, 856942, 995242, 1081177, 98...","[856942, 1082185, 9655212, 995242, 979707, 865...",1
1,2,"[820165, 820291, 826784, 826835, 829009, 85784...","[1082185, 981760, 995242, 1029743, 840361, 109...","[1029743, 995242, 1106523, 981760, 1133018, 11...","[826784, 1106523, 951590, 8090521, 901062, 980...","[1127831, 1106523, 1133018, 1081177, 5569230, ...","[1106523, 1133018, 5569230, 962229, 981760, 11...","[1082185, 1029743, 826784, 1106523, 1127831, 9...","[1133018, 1106523, 1082185, 6534178, 5569230, ...",1
2,3,"[827683, 908531, 989069, 1071377, 1080155, 109...","[1082185, 981760, 995242, 1029743, 840361, 109...","[1029743, 995242, 1106523, 981760, 1133018, 11...","[910032, 1053690, 951590, 1085604, 1092026, 11...","[1106523, 5569230, 1053690, 5568378, 1092026, ...","[1106523, 1133018, 951590, 1053690, 5569230, 5...","[1082185, 1029743, 910032, 1106523, 1106523, 9...","[1037840, 1106523, 1092026, 1053690, 5569230, ...",1
3,6,"[956902, 960791, 1037863, 1119051, 1137688, 84...","[1082185, 981760, 995242, 1029743, 840361, 109...","[1029743, 995242, 1106523, 981760, 1133018, 11...","[1037863, 1029743, 965267, 8203834, 993638, 84...","[1029743, 1044078, 5569230, 1096036, 1004906, ...","[930118, 1024306, 995242, 1127831, 1029743, 96...","[1082185, 1029743, 1037863, 930118, 1029743, 9...","[995242, 1082185, 878996, 5569230, 1119051, 10...",1
4,7,"[847270, 855557, 859987, 863407, 895454, 90663...","[1082185, 981760, 995242, 1029743, 840361, 109...","[1029743, 995242, 1106523, 981760, 1133018, 11...","[1122358, 828867, 1106523, 1072483, 893018, 92...","[1127831, 1106523, 1044078, 1133018, 5568378, ...","[1106523, 1058997, 1126899, 1133018, 1127831, ...","[1082185, 1029743, 1122358, 1106523, 1127831, ...","[1082185, 1106523, 1058997, 1126899, 1133018, ...",1


In [11]:
k_precision=5
k_recall=200

recomends = test_recs.drop(columns=["user_id", "purchases", "hit"]).columns

metrics = pd.DataFrame(index=recomends, columns=[f"precision@{k_precision}", f"recall@{k_recall}"])
for recomend in recomends:
    metrics[f"precision@{k_precision}"][recomend] = np.nanmean([precision_at_k(recom, bought, k=k_precision)
                                                                  for (recom, bought) in zip(test_recs[recomend], 
                                                                                             test_recs["purchases"])])
    metrics[f"recall@{k_recall}"][recomend] = np.nanmean([recall_at_k(recom, bought, k=k_recall)
                                                            for (recom, bought) in zip(test_recs[recomend], 
                                                                                       test_recs["purchases"])])

In [12]:
metrics

Unnamed: 0,precision@5,recall@200
top_popular,0.17305,0.202535
top_purchases,0.126048,0.151067
top_purchases_by_user,0.303317,0.171033
own_recommender,0.139493,0.0915727
als_recommender,0.19957,0.187602
basic_recommender,0.272935,0.21291
catb_recommender,0.311521,0.237411
