1. Перенесите функции prefilter_items и postfilter_items из вебинара в модуль src.utils.py
1. Реализуйте функции get_similar_items_recommendation, get_similar_users_recommendation (они разбирались на вебинаре) и переместите в src.recommenders.py
1. Проверьте, что все модули корректно импортируются

In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Для работы с матрицами

from scipy.sparse import csr_matrix

# Матричная факторизация
from implicit.als import AlternatingLeastSquares
from implicit.nearest_neighbours import bm25_weight

# Функции из 1-ого вебинара
import os, sys

module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.utils import prefilter_items, id_converter
from src.recommenders import get_similar_users_recommendation, get_similar_item

In [16]:
data = pd.read_csv('./input/transaction_data.csv')

data.columns = [col.lower() for col in data.columns]
data.rename(columns={'household_key': 'user_id',
                    'product_id': 'item_id'},
            inplace=True)

test_size_weeks = 3
current_week = data['week_no'].max()
test_week =  current_week - test_size_weeks

data_train = data[data['week_no'] < test_week]
data_test = data[data['week_no'] >= test_week]

data_train.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0


In [17]:
item_features = pd.read_csv('./input/product.csv')
item_features.columns = [col.lower() for col in item_features.columns]
item_features.rename(columns={'product_id': 'item_id'}, inplace=True)

item_features.head(2)

Unnamed: 0,item_id,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product
0,25671,2,GROCERY,National,FRZN ICE,ICE - CRUSHED/CUBED,22 LB
1,26081,2,MISC. TRANS.,National,NO COMMODITY DESCRIPTION,NO SUBCOMMODITY DESCRIPTION,


In [18]:
n_items_before = data_train['item_id'].nunique()

data_train_f = prefilter_items(data_train, item_features, current_week, n_popular=5000)

n_items_after = data_train_f['item_id'].nunique()
print(f'Decreased # items from {n_items_before} to {n_items_after}')

Decreased # items from 90386 to 4910


In [19]:
result = data_train_f.groupby('user_id')['item_id'].unique().reset_index()
result.columns=['user_id', 'actual']
result.head(2)

Unnamed: 0,user_id,actual
0,1,"[-1, 856942, 883616, 911454, 940947, 974327, 9..."
1,2,"[885023, 1106523, -1, 1123496, 916122, 940947,..."


In [20]:
user_item_matrix = pd.pivot_table(data_train_f, index='user_id', columns='item_id',
                                  values='quantity', aggfunc='sum', fill_value=0)

user_item_matrix = user_item_matrix.astype(float)

id_to_item_value, id_to_user_value, item_value_to_id, user_value_to_id = id_converter(user_item_matrix)

f_items = item_value_to_id[-1]

In [21]:
user_item_matrix = bm25_weight(user_item_matrix.T).T  # Применяется к item-user матрице !

sparse_uim = csr_matrix(user_item_matrix).tocsr()
# sparse_ium = csr_matrix(user_item_matrix).T.tocsr()

In [22]:
als_model = AlternatingLeastSquares(factors=100,
                                    regularization=0.001,
                                    iterations=15,
                                    calculate_training_loss=True,
                                    num_threads=4)

als_model.fit(sparse_uim , show_progress=True)

  0%|          | 0/15 [00:00<?, ?it/s]

In [23]:
result['q_mean_als'] = result['user_id'].apply(lambda x: [id_to_item_value[ids]
                                                          for ids in als_model.recommend(userid=user_value_to_id[x],
                                                                                         user_items=sparse_uim[user_value_to_id[x]],
                                                                                         N=5,
                                                                                         filter_already_liked_items=False,
                                                                                         filter_items=[f_items, ],
                                                                                         recalculate_user=True)[0]])

In [24]:
result

Unnamed: 0,user_id,actual,q_mean_als
0,1,"[-1, 856942, 883616, 911454, 940947, 974327, 9...","[8090541, 5577022, 856942, 9527558, 883616]"
1,2,"[885023, 1106523, -1, 1123496, 916122, 940947,...","[1103898, 7410217, 916122, 826835, 1139142]"
2,3,"[826385, 827656, 831063, 854405, 862799, 89962...","[1075979, 998206, 10456568, 1092937, 867389]"
3,4,"[-1, 9835509, 936470, 6391541, 846417, 998119,...","[936470, 7431408, 6391541, 891423, 5570830]"
4,5,"[932631, 969846, 994577, -1, 829621, 1004385, ...","[1112387, 969846, 1003031, 1065017, 1004385]"
...,...,...,...
2460,2496,"[865511, 907631, -1, 995876, 5569230, 8065410,...","[844179, 1044078, 1070702, 12810393, 12731432]"
2461,2497,"[870515, 1067606, 8090532, 8090537, -1, 823721...","[1079067, 1081177, 1038663, 7025204, 845208]"
2462,2498,"[-1, 1034956, 987044, 993044, 1051283, 1077745...","[1030981, 1022053, 1125123, 7144132, 1100379]"
2463,2499,"[883202, 899624, 5591170, 854042, -1, 1096635,...","[941797, 907631, 5570048, 944139, 890695]"


In [25]:
sim_user_rec = get_similar_users_recommendation(result['user_id'], id_to_item_value, user_value_to_id, id_to_user_value, sparse_uim, f_items, als_model)
result = result.merge(sim_user_rec, 'left', on='user_id')

  0%|          | 0/4910 [00:00<?, ?it/s]

In [26]:
result['sim_item_reс'] = result['actual'].apply(lambda x: get_similar_item(als_model, x, id_to_item_value, item_value_to_id, f_items))

In [27]:
result

Unnamed: 0,user_id,actual,q_mean_als,sim_user_items_recommends,sim_item_reс
0,1,"[-1, 856942, 883616, 911454, 940947, 974327, 9...","[8090541, 5577022, 856942, 9527558, 883616]","[847186, 1006342, 1030577, 1091901, 1124029, 8...","[819969, 830503, 835098, 835530, 837005]"
1,2,"[885023, 1106523, -1, 1123496, 916122, 940947,...","[1103898, 7410217, 916122, 826835, 1139142]","[825343, 844179, 876948, 916122, 920109, 1044078]","[819400, 819978, 831628, 834117, 837270]"
2,3,"[826385, 827656, 831063, 854405, 862799, 89962...","[1075979, 998206, 10456568, 1092937, 867389]","[832678, 841762, 899624, 1059902, 1066783, 106...","[823704, 827667, 834117, 839419, 845462]"
3,4,"[-1, 9835509, 936470, 6391541, 846417, 998119,...","[936470, 7431408, 6391541, 891423, 5570830]","[824180, 838186, 947201, 1014292, 1027642, 105...","[827667, 866211, 886395, 899624, 908318]"
4,5,"[932631, 969846, 994577, -1, 829621, 1004385, ...","[1112387, 969846, 1003031, 1065017, 1004385]","[864893, 903529, 932631, 957951, 969846, 1007136]","[822049, 854852, 916122, 921438, 944137]"
...,...,...,...,...,...
2460,2496,"[865511, 907631, -1, 995876, 5569230, 8065410,...","[844179, 1044078, 1070702, 12810393, 12731432]","[826385, 829955, 831181, 866548, 893867, 959821]","[823721, 825343, 829291, 831628, 832678]"
2461,2497,"[870515, 1067606, 8090532, 8090537, -1, 823721...","[1079067, 1081177, 1038663, 7025204, 845208]","[833241, 862866, 912817, 956125, 1008814, 1016...","[819978, 820321, 823721, 823775, 825006]"
2462,2498,"[-1, 1034956, 987044, 993044, 1051283, 1077745...","[1030981, 1022053, 1125123, 7144132, 1100379]","[825882, 881249, 892048, 919766, 938381, 1018818]","[823704, 823721, 824180, 832678, 839243]"
2463,2499,"[883202, 899624, 5591170, 854042, -1, 1096635,...","[941797, 907631, 5570048, 944139, 890695]","[883665, 898869, 944139, 1028705, 1105301, 556...","[819978, 823721, 825006, 829001, 832678]"
