In [1]:
import pandas as pd
import numpy as np

from src.metrics import precision_at_k, recall_at_k
from src.utils import split_data, prefilter_items
from src.recommenders import MainRecommender, SecondRecommender

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('retail_train.csv')
item_features = pd.read_csv('product.csv')
user_features = pd.read_csv('hh_demographic.csv')

# column processing
item_features.columns = [col.lower() for col in item_features.columns]
user_features.columns = [col.lower() for col in user_features.columns]

item_features.rename(columns={'product_id': 'item_id'}, inplace=True)
user_features.rename(columns={'household_key': 'user_id'}, inplace=True)

In [3]:
train_lvl_1, test_lvl_1, train_lvl_2, test_lvl_2 = split_data(data)

In [4]:
train_lvl_1 = prefilter_items(train_lvl_1, item_features=item_features, take_n_popular=2500)

In [5]:
recommender_lvl_1 = MainRecommender(train_lvl_1)



  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/2501 [00:00<?, ?it/s]

In [6]:
model = SecondRecommender(recommender_lvl_1)
X_train, y_train = model.fit_transform(train_lvl_2, item_features, user_features)
model.fit(X_train, y_train)

In [7]:
X_test = model.transform(test_lvl_2)
pred_ex_lvl_2 = model.predict_user_list(X_test, extend=True)
pred_ex_lvl_2.columns =  ['user_id','pred_ex_item']

In [8]:
result = test_lvl_2.groupby('user_id')['item_id'].unique().reset_index()
result.columns=['user_id', 'actual_lvl_2']
result = result.merge(pred_ex_lvl_2, on='user_id')
result['precision'] = result['user_id'].apply(lambda x: precision_at_k(list(result.loc[result['user_id']==x, \
                                        'pred_ex_item'])[0], list(result.loc[result['user_id']==x, 'actual_lvl_2'])[0], k=5)) 
print(f"Precision на валидационной выборке N=5: {result['precision'].mean()}")

Precision на валидационной выборке N=5: 0.4120192307692309


In [9]:
result[['user_id','pred_ex_item']].to_csv('recommendations.csv', index=False)