In [424]:
#!pip install implicit==0.4.4 --no-use-pep517
#!pip install implicit

In [597]:
import pandas as pd
import numpy as np
from lightgbm import LGBMClassifier

# Для работы с матрицами
from scipy.sparse import csr_matrix

# Матричная факторизация
from implicit.als import AlternatingLeastSquares
from implicit.nearest_neighbours import ItemItemRecommender  # нужен для одного трюка
from implicit.nearest_neighbours import bm25_weight, tfidf_weight


class MainRecommender:
    """Рекоммендации, которые можно получить из ALS

    Input
    -----
    user_item_matrix: pd.DataFrame
        Матрица взаимодействий user-item
    """

    def __init__(self, data, data_full, weighting=True):

        # Топ покупок каждого юзера
        self.top_purchases = data_full.groupby(['user_id', 'item_id'])['quantity'].count().reset_index()
        self.top_purchases.sort_values('quantity', ascending=False, inplace=True)
        #self.top_purchases = self.top_purchases[self.top_purchases['item_id'] != 999999]

        # Топ покупок по всему датасету
        self.overall_top_purchases = data_full.groupby('item_id')['quantity'].count().reset_index()
        self.overall_top_purchases.sort_values('quantity', ascending=False, inplace=True)
        #self.overall_top_purchases = self.overall_top_purchases[self.overall_top_purchases['item_id'] != 999999]
        self.overall_top_purchases = self.overall_top_purchases.item_id.tolist()
        
        self.user_item_matrix = self._prepare_matrix(data)  # pd.DataFrame
        self.id_to_itemid, self.id_to_userid, \
            self.itemid_to_id, self.userid_to_id = self._prepare_dicts(self.user_item_matrix)

        if weighting:
            self.user_item_matrix = bm25_weight(self.user_item_matrix.T).T

        self.model = self.fit(self.user_item_matrix)
               
        self.own_recommender = self.fit_own_recommender(self.user_item_matrix)
        
        self.data_ = data
        self.item_fact = self.model.item_factors
        self.user_fact = self.model.user_factors
        
        self.users_to_pred = data['user_id'].unique()

    @staticmethod
    def _prepare_matrix(data):
        """Готовит user-item матрицу"""
        user_item_matrix = pd.pivot_table(data,
                                          index='user_id', columns='item_id',
                                          values='quantity',  # Можно пробовать другие варианты
                                          aggfunc='count',
                                          fill_value=0
                                          )

        user_item_matrix = user_item_matrix.astype(float)  # необходимый тип матрицы для implicit

        return user_item_matrix

    @staticmethod
    def _prepare_dicts(user_item_matrix):
        """Подготавливает вспомогательные словари"""

        userids = user_item_matrix.index.values
        itemids = user_item_matrix.columns.values

        matrix_userids = np.arange(len(userids))
        matrix_itemids = np.arange(len(itemids))

        id_to_itemid = dict(zip(matrix_itemids, itemids))
        id_to_userid = dict(zip(matrix_userids, userids))

        itemid_to_id = dict(zip(itemids, matrix_itemids))
        userid_to_id = dict(zip(userids, matrix_userids))

        return id_to_itemid, id_to_userid, itemid_to_id, userid_to_id
    
    #@staticmethod
    def fit_2_level(self, item_features_st2, user_features_st2, recomender_my='own', N=5, N_inpt=200):
        
        data_train_st2 = self.data_
        
        if recomender_my == 'own':
            N_inpt = 200 # при таком соотношении датасет для обучения на второй стадии сбалансирован
            data_stage = self._get_recommendations(model=self.own_recommender, N=N_inpt)
        elif recomender_my =='als':
            N_inpt = 50
            data_stage = self._get_recommendations(model=self.model, N=N_inpt)
        else:
            data_stage = {}
            for i in range(len(self.users_to_pred)):
                data_stage[self.id_to_userid[i]] = self._extend_with_top_popular(recommendations=[], N=N_inpt)
        
        
        #формируем датафрейм по кандидатам(с первого уровня модели)
        temp = pd.DataFrame(columns=['user_id', 'item_id'])
        temp['user_id'] = data_stage.keys()
        temp['item_id'] = data_stage.values()
        
        # создаем series где индекс это порядок по user_id а значения это предложенные товары
        data_stage_2 = temp.copy()
        s = data_stage_2.apply(lambda x: pd.Series(x['item_id']), axis=1).stack().reset_index(level=1, drop=True)
        s.name = 'item_id'
        
        # создаем датафрейм на основе предыдущего сериес с user id и флагом покупки.
        data_stage_2 = data_stage_2.drop('item_id', axis=1).join(s)
        data_stage_2['flag'] = 1
        
        # создаем датафрейм покупок(истинный класс)
        data_targets = data_train_st2[['user_id', 'item_id']].copy()    
        data_targets['target'] = 1  # тут только покупки 
        
        # объединяем датафреймы, тем самым получая истинные метки класса
        data_targets = data_stage_2.merge(data_targets, on=['user_id', 'item_id'], how='left')
        
        
        # заполняем вторую метку класса и удаляем вспомогательный столбец
        data_targets['target'].fillna(0, inplace= True)
        data_targets.drop('flag', axis=1, inplace=True)
        
        # добавляем фичи пользователей и фичи юзверей
        data_targets = data_targets.merge(item_features_st2, on='item_id', how='left')
        data_targets = data_targets.merge(user_features_st2, on='user_id', how='left')
        
        #создаем финальный датасет
        X_train = data_targets.drop('target', axis=1)
        y_train = data_targets[['target']]

        print(data_targets['target'].mean()) 

        cat_feats = X_train.columns[2:].tolist()
        X_train[cat_feats] = X_train[cat_feats].astype('category')

        
        
        
        ''' 'boosting_type': 'gbdt',
         'max_depth': -1,
         'objective': 'binary',
         'num_leaves': 17,
         'learning_rate': 0.01,
         'max_bin': 512,
         'subsample_for_bin': 200,
         'subsample': 0.75,
         'subsample_freq': 1,
         'colsample_bytree': 0.8,
         'reg_alpha': 6,
         'reg_lambda': 7,
         'min_split_gain': 0.5,
         'min_child_weight': 1,
         'min_child_samples': 5,
         'scale_pos_weight': 1,
         'num_class': 1,
         'metric': 'binary_error',
         'verbosity': -1}'''
        
        lgb = LGBMClassifier(boosting_type='gbdt',
                             min_split_gain=0.5,
                             min_child_weight=1,
                             min_child_samples=5,
                             scale_pos_weight=1,
                             num_class=1,
                             metric='binary_error',
                             verbosity=-1,
                             max_bin=512,
                             subsample_for_bin=200,
                             subsample_freq=1,
                             colsample_bytree=0.8, 
                             learning_rate=0.01, 
                             n_estimators=25, 
                             num_leaves=17, 
                             objective='binary', 
                             reg_alpha=6, 
                             reg_lambda=7, 
                             seed=500, 
                             subsample=0.75,
                             categorical_column=cat_feats)
        
        self.lgb = lgb.fit(X_train, y_train)
        
        
        # предсказываем
        

        #train_preds = lgb.predict(X_train)
        
        
        ''' 
        

        #Сортировка
        #indices = (-train_preds).argpartition(N, axis=None)[:N]
        #users_lvl_2['candidates200'].loc[users_ids] = data_train_lvl_1['item_id'].unique()[indices]
        data_predict = data_targets.copy()
        data_predict['probobility'] = train_preds
        data_predict = data_predict.sort_values(by='probobility', ascending=False)

        finish_df = pd.DataFrame()
        for el in data_predict['user_id'].unique():
            k = data_predict[data_predict['user_id'] == el].sort_values(by='probobility', ascending=False)[:N].groupby('user_id')['item_id'].unique().reset_index()   
            finish_df = pd.concat([finish_df, k], ignore_index=True)

        finish_df.columns=['user_id', 'item_id']
        
        for i in range(finish_df.shape[0]):
            finish_df['item_id'][i] = self._extend_with_top_popular(finish_df['item_id'][i].tolist(), N=N)'''

               
        #return finish_df

    
    # так как нормальный результат показал только item2item, то и предсказывать будем его.
    def predict_2_level(self, users_to_pred, item_features_st2, user_features_st2, N=5, N_inpt=30):
           
        data_stage = self._get_recommendations(model=self.own_recommender, N=N_inpt)
        
        #формируем новый словарь для предсказаний
        data_stage_temp = {}
        for i in users_to_pred:
            if i not in data_stage.keys():
                data_stage_temp[i] = self._extend_with_top_popular([], N=N_inpt)
            else:
                data_stage_temp[i] = data_stage[i]
        del(data_stage)

        
        #формируем датафрейм по кандидатам(с первого уровня модели)
        temp = pd.DataFrame(columns=['user_id', 'item_id'])
        temp['user_id'] = data_stage_temp.keys()
        temp['item_id'] = data_stage_temp.values()
        
        # создаем series где индекс это порядок по user_id а значения это предложенные товары
        data_stage_2 = temp.copy()
        s = data_stage_2.apply(lambda x: pd.Series(x['item_id']), axis=1).stack().reset_index(level=1, drop=True)
        s.name = 'item_id'
        
        # создаем датафрейм на основе предыдущего сериес с user id 
        data_stage_2 = data_stage_2.drop('item_id', axis=1).join(s)
                
        # добавляем фичи пользователей и фичи юзверей
        data_stage_2 = data_stage_2.merge(item_features, on='item_id', how='left')
        X_ = data_stage_2.merge(user_features, on='user_id', how='left')        

        cat_feats = X_.columns[2:].tolist()
        X_[cat_feats] = X_[cat_feats].astype('category')
        
        # предсказываем                                
        train_preds = self.lgb.predict(X_)
        
        X_['pred']=train_preds
        df_preds = X_[X_['pred']==1].groupby('user_id')['item_id'].unique().reset_index()
        
        
        # Создаем словарь и проверяем колличество ответов
        preds_dict = {}
        for i in X_[X_['pred']==1].groupby('user_id')['item_id'].unique().reset_index().iloc:
            if len(i['item_id']) > N:
                recs = np.random.choice(i['item_id'], size=N, replace=False)
                preds_dict[i['user_id']] = recs.tolist()
            elif len(i['item_id']) < N:
                preds_dict[i['user_id']] = self._extend_with_top_popular(i['item_id'].tolist(), N=N)                
                
               
        return preds_dict

    
    
    @staticmethod
    def fit_own_recommender(user_item_matrix):
        """Обучает модель, которая рекомендует товары, среди товаров, купленных юзером"""

        own_recommender = ItemItemRecommender(K=1, num_threads=4)
        own_recommender.fit(csr_matrix(user_item_matrix).T.tocsr())

        return own_recommender

    @staticmethod
    def fit(user_item_matrix, n_factors=50, regularization=0.001, iterations=40, num_threads=4):
        """Обучает ALS"""

        model = AlternatingLeastSquares(factors=n_factors,
                                        regularization=regularization,
                                        iterations=iterations,
                                        num_threads=num_threads)
        model.fit(csr_matrix(user_item_matrix).T.tocsr())

        return model

    def _update_dict(self, user_id):
        """Если появился новыю user / item, то нужно обновить словари"""

        if user_id not in self.userid_to_id.keys():

            max_id = max(list(self.userid_to_id.values()))
            max_id += 1

            self.userid_to_id.update({user_id: max_id})
            self.id_to_userid.update({max_id: user_id})

    def _get_similar_item(self, item_id):
        """Находит товар, похожий на item_id"""
        recs = self.model.similar_items(self.itemid_to_id[item_id], N=2)  # Товар похож на себя -> рекомендуем 2 товара
        top_rec = recs[1][0]  # И берем второй (не товар из аргумента метода)
        return self.id_to_itemid[top_rec]

    def _extend_with_top_popular(self, recommendations, N=5):
        """Если кол-во рекоммендаций < N, то дополняем их топ-популярными"""

        if len(recommendations) < N:
            recommendations.extend(self.overall_top_purchases[:N])
            recommendations = recommendations[:N]

        return recommendations

    def _get_recommendations(self, model, user=False, N=5):
        """Рекомендации через стардартные библиотеки implicit"""

        self._update_dict(user_id=user)
        res, weights = model.recommend(userid=self.users_to_pred,
                                        user_items=csr_matrix(self.user_item_matrix).tocsr(),
                                        N=N,
                                        filter_already_liked_items=False,
                                        recalculate_user=True)
        
        
        #res = [self.id_to_itemid[k][i] for i in k for k in res]
        fin_res = []
        for i in range(len(res)):
            fin_res_=[]
            for j in range(len(res[i])):
                try:
                    fin_res_.append(self.id_to_itemid[res[i,j]])
                except:
                    c = 1+1#print(res[i,j])
            fin_res.append(fin_res_)
                # распарсить индексы в числа

                
        if N > 50:
            N = int(N * 1.1)
                
        user_dict = {}
        if user:
            local_id = self.userid_to_id[user]
            fin_res = fin_res[local_id]
            fin_res = self._extend_with_top_popular(fin_res, N=N)
            user_dict[user] = fin_res

            assert len(fin_res) == N, 'Количество рекомендаций != {}'.format(N)
            
        else:
            for i in range(len(fin_res)):
                fin_res[i] = self._extend_with_top_popular(fin_res[i], N=N)
                user_dict[self.id_to_userid[i]] = fin_res[i]
                
        return user_dict

    def get_als_recommendations(self, user=False, N=5):
        """Рекомендации через стардартные библиотеки implicit"""
        if user:
            self._update_dict(user_id=user)
        return self._get_recommendations(model=self.model, user=user, N=N)

    def get_own_recommendations(self, user=False, N=5):
        """Рекомендуем товары среди тех, которые юзер уже купил"""

        if user:
            self._update_dict(user_id=user)
        return self._get_recommendations(model=self.own_recommender, user=user, N=N)

    def get_similar_items_recommendation(self, user, N=5):
        """Рекомендуем товары, похожие на топ-N купленных юзером товаров"""
        self._update_dict(user_id=user)
        top_users_purchases = self.top_purchases[self.top_purchases['user_id'] == user].head(N)

        res = top_users_purchases['item_id'].apply(lambda x: self._get_similar_item(x)).tolist()
        res = self._extend_with_top_popular(res, N=N)

        assert len(res) == N, 'Количество рекомендаций != {}'.format(N)
        return res

    def get_similar_users_recommendation(self, user, N=5):
        """Рекомендуем топ-N товаров, среди купленных похожими юзерами"""

        res = []
        self._update_dict(user_id=user)

        # Находим топ-N похожих пользователей
        similar_users = self.model.similar_users(self.userid_to_id[user], N=N+1)
        similar_users = [rec[0] for rec in similar_users]
        similar_users = similar_users[1:]   # удалим юзера из запроса

        for user in similar_users:
            res.extend(self.get_own_recommendations(user, N=1))

        res = self._extend_with_top_popular(res, N=N)

        assert len(res) == N, 'Количество рекомендаций != {}'.format(N)
        return res

In [551]:
import pandas as pd
import numpy as np


def prefilter_items(data, take_n_popular=2000, item_features=None):
    # Уберем самые популярные товары (их и так купят)
    popularity = data.groupby('item_id')['user_id'].nunique().reset_index()
    popularity['user_id'] = popularity['user_id'] / data['user_id'].nunique()
    popularity.rename(columns={'user_id': 'share_unique_users'}, inplace=True)

    top_popular = popularity[popularity['share_unique_users'] > 0.5].item_id.tolist()
    data = data[~data['item_id'].isin(top_popular)]

    # Уберем самые НЕ популярные товары (их и так НЕ купят)
    top_notpopular = popularity[popularity['share_unique_users'] < 0.01].item_id.tolist()
    data = data[~data['item_id'].isin(top_notpopular)]

    # Уберем товары, которые не продавались за последние 12 месяцев
    data_12_last = data[(data['week_no'] > data['week_no'].max() - 4 * 12) & (data['quantity']==0)].item_id.tolist()
    data = data[~data['item_id'].isin(data_12_last)]

    # Уберем не интересные для рекоммендаций категории (department)
    if item_features is not None:
        department_size = pd.DataFrame(item_features.\
                                        groupby('department')['item_id'].nunique().\
                                        sort_values(ascending=False)).reset_index()

        department_size.columns = ['department', 'n_items']
        rare_departments = department_size[department_size['n_items'] < 150].department.tolist()
        items_in_rare_departments = item_features[item_features['department'].isin(rare_departments)].item_id.unique().tolist()

        data = data[~data['item_id'].isin(items_in_rare_departments)]


    # Уберем слишком дешевые товары (на них не заработаем). 1 покупка из рассылок стоит 60 руб.
    data['price'] = data['sales_value'] / (np.maximum(data['quantity'], 1))
    data = data[data['price'] > 1]

    # Уберем слишком дорогие товарыs
    data = data[data['price'] < 50]

    # Возбмем топ по популярности
    popularity = data.groupby('item_id')['user_id'].nunique().reset_index()
    popularity['user_id'] = popularity['user_id'] / data['user_id'].nunique()
    popularity.rename(columns={'user_id': 'share_unique_users'}, inplace=True)
    top = popularity.sort_values('share_unique_users', ascending=False).head(take_n_popular).item_id.tolist()
    
    # опять какой то херовый код из методички
    #popularity = data.groupby('item_id')['quantity'].sum().reset_index()
    #popularity.rename(columns={'quantity': 'n_sold'}, inplace=True)
    #top = popularity.sort_values('n_sold', ascending=False).head(take_n_popular).item_id.tolist()
    
    # Заведем фиктивный item_id (если юзер покупал товары из топ-5000, то он "купил" такой товар)
    #data.loc[~data['item_id'].isin(top), 'item_id'] = 999999
    data = data.loc[data['item_id'].isin(top)]
    # ...

    return data


def postfilter_items(user_id, recommednations):
    pass

---

In [427]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Для работы с матрицами
from scipy.sparse import csr_matrix

# Матричная факторизация
from implicit.als import AlternatingLeastSquares
from implicit.nearest_neighbours import ItemItemRecommender  # нужен для одного трюка
from implicit.nearest_neighbours import bm25_weight, tfidf_weight

# Функции из 1-ого вебинара
import os, sys

module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)
    
#from src.metrics import precision_at_k, recall_at_k
#from src.utils import prefilter_items

In [552]:
data = pd.read_csv('./data/retail_train.csv')

data.columns = [col.lower() for col in data.columns]
data.rename(columns={'household_key': 'user_id',
                    'product_id': 'item_id'},
           inplace=True)


test_size_weeks = 3

data_train_full = data[data['week_no'] < data['week_no'].max() - test_size_weeks]
data_test = data[data['week_no'] >= data['week_no'].max() - test_size_weeks]

data_train_full.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0


In [429]:
item_features = pd.read_csv('./data/product.csv')
user_features = pd.read_csv('./data/hh_demographic.csv')

# column processing
item_features.columns = [col.lower() for col in item_features.columns]
user_features.columns = [col.lower() for col in user_features.columns]

item_features.rename(columns={'product_id': 'item_id'}, inplace=True)
user_features.rename(columns={'household_key': 'user_id'}, inplace=True)

item_features.head(2)

Unnamed: 0,item_id,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product
0,25671,2,GROCERY,National,FRZN ICE,ICE - CRUSHED/CUBED,22 LB
1,26081,2,MISC. TRANS.,National,NO COMMODITY DESCRIPTION,NO SUBCOMMODITY DESCRIPTION,


In [430]:
result = data_test.groupby('user_id')['item_id'].unique().reset_index()
result.columns=['user_id', 'actual']
result

Unnamed: 0,user_id,actual
0,1,"[821867, 834484, 856942, 865456, 889248, 90795..."
1,3,"[835476, 851057, 872021, 878302, 879948, 90963..."
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107..."
3,7,"[840386, 889774, 898068, 909714, 929067, 95347..."
4,8,"[835098, 872137, 910439, 924610, 992977, 10412..."
...,...,...
2037,2496,[6534178]
2038,2497,"[1016709, 9835695, 1132298, 16809501, 845294, ..."
2039,2498,"[15716530, 834484, 901776, 914190, 958382, 972..."
2040,2499,"[867188, 877580, 902396, 914190, 951590, 95813..."


In [553]:
n_items_before = data_train_full['item_id'].nunique()

data_train = prefilter_items(data_train_full, item_features=item_features, take_n_popular=2400)

n_items_after = data_train['item_id'].nunique()
print('Decreased # items from {} to {}'.format(n_items_before, n_items_after))
data_train.head(5)

Decreased # items from 86865 to 2400


Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc,price
6,2375,26984851516,1,1043142,1,1.57,364,-0.68,1642,1,0.0,0.0,1.57
7,2375,26984851516,1,1085983,1,2.99,364,-0.4,1642,1,0.0,0.0,2.99
11,1364,26984896261,1,842930,1,2.19,31742,0.0,1520,1,0.0,0.0,2.19
19,1130,26984905972,1,1048462,1,1.19,31642,-0.8,1340,1,0.0,0.0,1.19
25,98,26984951769,1,965138,2,3.0,337,-0.08,1937,1,0.0,0.0,1.5


In [598]:
recommender = MainRecommender(data_train, data_train_full)

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/2469 [00:00<?, ?it/s]

In [507]:
params = {'boosting_type': 'gbdt', 'max_depth': -1, 'objective': 'binary', 
          'num_leaves': 64, 'learning_rate': 0.05, 'max_bin': 512, 
          'subsample_for_bin': 200, 'subsample': 1, 'subsample_freq': 1,
          'colsample_bytree': 0.8, 'reg_alpha': 5, 'reg_lambda': 10, 
          'min_split_gain': 0.5, 'min_child_weight': 1, 
          'min_child_samples': 5, 'scale_pos_weight': 1, 'num_class': 1, 
          'metric': 'binary_error'}

In [512]:
data_train_st2 = data_train
        

data_stage = recommender.get_own_recommendations(N=200)

       
        

temp = pd.DataFrame(columns=['user_id', 'item_id'])
temp['user_id'] = data_stage.keys()
temp['item_id'] = data_stage.values()
        
        # создаем series где индекс это порядок по user_id а значения это предложенные товары
data_stage_2 = temp.copy()
s = data_stage_2.apply(lambda x: pd.Series(x['item_id']), axis=1).stack().reset_index(level=1, drop=True)
s.name = 'item_id'
        
        # создаем датафрейм на основе предыдущего сериес с user id и флагом покупки.
data_stage_2 = data_stage_2.drop('item_id', axis=1).join(s)
data_stage_2['flag'] = 1
        
        # создаем датафрейм покупок(истинный класс)
data_targets = data_train_st2[['user_id', 'item_id']].copy()    
data_targets['target'] = 1  # тут только покупки 
        
        # объединяем датафреймы, тем самым получая истинные метки класса
data_targets = data_stage_2.merge(data_targets, on=['user_id', 'item_id'], how='left')
        
        
        # заполняем вторую метку класса и удаляем вспомогательный столбец
data_targets['target'].fillna(0, inplace= True)
data_targets.drop('flag', axis=1, inplace=True)
        
        # добавляем фичи пользователей и фичи юзверей
data_targets = data_targets.merge(item_features, on='item_id', how='left')
data_targets = data_targets.merge(user_features, on='user_id', how='left')
        
        #создаем финальный датасет
X_train = data_targets.drop('target', axis=1)
y_train = data_targets[['target']]

print(data_targets['target'].mean()) 

cat_feats = X_train.columns[2:].tolist()
X_train[cat_feats] = X_train[cat_feats].astype('category')

lgb = LGBMClassifier(**params)

0.5852728172858677


In [514]:
lgb.get_params().keys()

dict_keys(['boosting_type', 'class_weight', 'colsample_bytree', 'importance_type', 'learning_rate', 'max_depth', 'min_child_samples', 'min_child_weight', 'min_split_gain', 'n_estimators', 'n_jobs', 'num_leaves', 'objective', 'random_state', 'reg_alpha', 'reg_lambda', 'silent', 'subsample', 'subsample_for_bin', 'subsample_freq', 'max_bin', 'scale_pos_weight', 'num_class', 'metric'])

In [516]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

In [520]:
grid_params = {'learning_rate': [0.01], 'n_estimators': [23, 24, 25],
                'num_leaves': [15, 16, 17], 'boosting_type': ['gbdt'], 
                'objective': ['binary'], 'seed': [500],
                'colsample_bytree': [0.8, 8.5, 9.0], 
                'subsample': [0.75, 0.8], 'reg_alpha': [6, 7, 8],
                'reg_lambda': [6, 7, 8]}

In [518]:
gkf = KFold(n_splits=5, shuffle=True, random_state=42)

In [521]:
# Create the grid
grid = GridSearchCV(lgb, param_grid=grid_params, verbose=1, cv=gkf, n_jobs=-1)
# Run the grid
grid.fit(X_train, y_train)

# Print the best parameters found
print(grid.best_params_)
print(grid.best_score_)

Fitting 5 folds for each of 486 candidates, totalling 2430 fits


1620 fits failed out of a total of 2430.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1620 fits failed with the following error:
Traceback (most recent call last):
  File "D:\work\conda\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "D:\work\conda\lib\site-packages\lightgbm\sklearn.py", line 967, in fit
    super().fit(X, _y, sample_weight=sample_weight, init_score=init_score, eval_set=valid_sets,
  File "D:\work\conda\lib\site-packages\lightgbm\sklearn.py", line 748, in fit
    self._Booster = train(
  File "D:\work\conda\lib\site-packages\lightgbm\engine.py", line 271, in train
    booster = Booster(params=params, train_set=train_set)
  

{'boosting_type': 'gbdt', 'colsample_bytree': 0.8, 'learning_rate': 0.01, 'n_estimators': 25, 'num_leaves': 17, 'objective': 'binary', 'reg_alpha': 6, 'reg_lambda': 7, 'seed': 500, 'subsample': 0.75}
0.7246329466990116


{'boosting_type': 'gbdt', 'colsample_bytree': 0.8, 'learning_rate': 0.01, 'n_estimators': 24, 'num_leaves': 16, 'objective': 'binary', 'reg_alpha': 6, 'reg_lambda': 6, 'seed': 500, 'subsample': 0.75}

0.7185256967726017


{'boosting_type': 'gbdt', 'colsample_bytree': 0.8, 'learning_rate': 0.01, 'n_estimators': 25, 'num_leaves': 17, 'objective': 'binary', 'reg_alpha': 6, 'reg_lambda': 7, 'seed': 500, 'subsample': 0.75}
0.7246329466990116


In [555]:
# Using parameters already set above, replace in the best from the grid search
best_params = {k: grid.best_params_.get(k, v) for k, v in params.items()}
best_params['verbosity'] = -1

In [556]:
best_params

{'boosting_type': 'gbdt',
 'max_depth': -1,
 'objective': 'binary',
 'num_leaves': 17,
 'learning_rate': 0.01,
 'max_bin': 512,
 'subsample_for_bin': 200,
 'subsample': 0.75,
 'subsample_freq': 1,
 'colsample_bytree': 0.8,
 'reg_alpha': 6,
 'reg_lambda': 7,
 'min_split_gain': 0.5,
 'min_child_weight': 1,
 'min_child_samples': 5,
 'scale_pos_weight': 1,
 'num_class': 1,
 'metric': 'binary_error',
 'verbosity': -1}

In [433]:
recommender.item_fact.shape

(2469, 50)

In [434]:
recommender.user_fact.shape

(2400, 50)

In [435]:
data_train.shape

(491041, 13)

In [436]:
data_train_full.shape

(2278490, 12)

In [560]:
res = recommender.get_als_recommendations(N=5)
res 

{1: [1137176, 942817, 951197, 871680, 923723],
 2: [9707240, 876932, 1120559, 1135566, 849129],
 3: [835431, 872342, 976047, 1089023, 1084551],
 4: [1042942, 932863, 5568197, 907418, 1091520],
 5: [821845, 983858, 864857, 1053530, 911409],
 6: [1131351, 1125382, 1115576, 1019881, 9837501],
 7: [833458, 871680, 1075074, 1109192, 1084551],
 8: [829950, 1084551, 5567876, 9524291, 12262992],
 9: [1120559, 1005172, 969725, 5591103, 1024032],
 10: [15596488, 7025203, 945372, 879876, 961747],
 12: [6514085, 909349, 1115228, 978354, 12132685],
 13: [1089023, 12262992, 978354, 821845, 1082185],
 14: [1131374, 5568447, 13842090, 831509, 978354],
 15: [978354, 998239, 1077373, 931757, 9268695],
 16: [951071, 1084551, 1035843, 829950, 838097],
 17: [1070845, 1123146, 1035880, 1124077, 955587],
 18: [861419, 1061203, 5587656, 1082544, 930955],
 19: [821845, 1048849, 1131374, 13381584, 1082185],
 20: [7462562, 10150013, 932340, 1069334, 1082185],
 21: [934697, 1070076, 883003, 968992, 5567876],
 22:

In [561]:
temp = pd.DataFrame(columns=['user_id', 'item_id'])
temp['user_id'] = res.keys()
temp['item_id'] = res.values()
temp

Unnamed: 0,user_id,item_id
0,1,"[1137176, 942817, 951197, 871680, 923723]"
1,2,"[9707240, 876932, 1120559, 1135566, 849129]"
2,3,"[835431, 872342, 976047, 1089023, 1084551]"
3,4,"[1042942, 932863, 5568197, 907418, 1091520]"
4,5,"[821845, 983858, 864857, 1053530, 911409]"
...,...,...
2464,2496,"[998239, 1093329, 1131374, 9362429, 1113274]"
2465,2497,"[1054088, 998239, 10149863, 7441210, 1082185]"
2466,2498,"[854496, 909349, 878128, 1082185, 6534178]"
2467,2499,"[1107824, 1080354, 1131374, 857130, 934676]"


In [562]:
result['als'] = 1
temp_list = []
for i in result['user_id'].values:#range(len(result)):
    try:
        temp_list.append(res[i])
    except:
        #temp_list.append(recommender.get_similar_users_recommendation(user=i, N=5))
        temp_list.append(recommender._extend_with_top_popular(recommendations=[], N=5))
        #temp_list.append(i)
    #result.loc[result['user_id']==i, 'als_recommendations'] = res
    #result[result['user_id']==i]['als_recommendations'] = res[i]

result['als'] = temp_list
result

Unnamed: 0,user_id,actual,als,own_recommendations,2stg_own,top_pop
0,1,"[821867, 834484, 856942, 865456, 889248, 90795...","[1137176, 942817, 951197, 871680, 923723]","[983897, 1075074, 1089023, 9655212, 977545]","[1056775, 9832160, 1135096, 9655212, 1061203]","[1082185, 6534178, 1029743, 995242, 1106523]"
1,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[835431, 872342, 976047, 1089023, 1084551]","[929472, 904435, 933248, 10456568, 857697]","[921345, 1082544, 933248, 1084551, 10456568]","[1082185, 6534178, 1029743, 995242, 1106523]"
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107...","[1131351, 1125382, 1115576, 1019881, 9837501]","[913671, 863447, 987234, 988439, 862866]","[953675, 1084036, 8203834, 996269, 834303]","[1082185, 6534178, 1029743, 995242, 1106523]"
3,7,"[840386, 889774, 898068, 909714, 929067, 95347...","[833458, 871680, 1075074, 1109192, 1084551]","[6602729, 877337, 1122358, 886965, 1001277]","[1001277, 1021715, 1128333, 9837501, 954778]","[1082185, 6534178, 1029743, 995242, 1106523]"
4,8,"[835098, 872137, 910439, 924610, 992977, 10412...","[829950, 1084551, 5567876, 9524291, 12262992]","[904148, 1093329, 1069334, 932340, 837579]","[916050, 7139529, 983897, 919755, 12301073]","[1082185, 6534178, 1029743, 995242, 1106523]"
...,...,...,...,...,...,...
2037,2496,[6534178],"[998239, 1093329, 1131374, 9362429, 1113274]","[995645, 5591083, 998239, 7441210, 1020823]","[904435, 995645, 937301, 1082544, 5591083]","[1082185, 6534178, 1029743, 995242, 1106523]"
2038,2497,"[1016709, 9835695, 1132298, 16809501, 845294, ...","[1054088, 998239, 10149863, 7441210, 1082185]","[998239, 942817, 961747, 1057855, 907945]","[9836195, 961747, 884896, 942817, 908181]","[1082185, 6534178, 1029743, 995242, 1106523]"
2039,2498,"[15716530, 834484, 901776, 914190, 958382, 972...","[854496, 909349, 878128, 1082185, 6534178]","[849257, 12130522, 1093329, 997128, 953677]","[849257, 963226, 1093329, 1005456, 12302069]","[1082185, 6534178, 1029743, 995242, 1106523]"
2040,2499,"[867188, 877580, 902396, 914190, 951590, 95813...","[1107824, 1080354, 1131374, 857130, 934676]","[885447, 924804, 5570048, 1057855, 833458]","[924804, 838186, 927291, 953104, 1085778]","[1082185, 6534178, 1029743, 995242, 1106523]"


In [440]:
res = recommender.get_own_recommendations(N=5)
res 

{1: [983897, 1075074, 1089023, 9655212, 977545],
 2: [1001266, 1001277, 9707240, 1122085, 1035843],
 3: [929472, 904435, 933248, 10456568, 857697],
 4: [1121367, 887003, 1091520, 12385373, 992151],
 5: [1053530, 1002499, 978354, 1131321, 935393],
 6: [913671, 863447, 987234, 988439, 862866],
 7: [6602729, 877337, 1122358, 886965, 1001277],
 8: [904148, 1093329, 1069334, 932340, 837579],
 9: [5569303, 1109192, 1128244, 1120559, 9707240],
 10: [1060872, 982493, 1121059, 15596488, 879876],
 12: [6463742, 939323, 6514085, 1137694, 887618],
 13: [15926775, 932340, 1042942, 6551861, 898466],
 14: [818981, 878445, 978354, 1127838, 990335],
 15: [1014509, 1035843, 1104343, 1053530, 1042616],
 16: [923149, 1100533, 1084551, 1035843, 9835695],
 17: [1117128, 1132968, 9707240, 997128, 862866],
 18: [909750, 9837501, 9677886, 978354, 961620],
 19: [1000236, 837751, 1048727, 1038746, 821970],
 20: [1111786, 945611, 884686, 1040842, 821556],
 21: [961791, 15596488, 1031923, 934697, 912817],
 22: [99

In [441]:
temp = pd.DataFrame(columns=['user_id', 'item_id'])
temp['user_id'] = res.keys()
temp['item_id'] = res.values()
temp

Unnamed: 0,user_id,item_id
0,1,"[983897, 1075074, 1089023, 9655212, 977545]"
1,2,"[1001266, 1001277, 9707240, 1122085, 1035843]"
2,3,"[929472, 904435, 933248, 10456568, 857697]"
3,4,"[1121367, 887003, 1091520, 12385373, 992151]"
4,5,"[1053530, 1002499, 978354, 1131321, 935393]"
...,...,...
2464,2496,"[995645, 5591083, 998239, 7441210, 1020823]"
2465,2497,"[998239, 942817, 961747, 1057855, 907945]"
2466,2498,"[849257, 12130522, 1093329, 997128, 953677]"
2467,2499,"[885447, 924804, 5570048, 1057855, 833458]"


In [442]:
result['own_recommendations'] = 1
temp_list = []
for i in result['user_id'].values:#range(len(result)):
    try:
        temp_list.append(res[i])
    except:
        #temp_list.append(recommender.get_similar_users_recommendation(user=i, N=5))
        temp_list.append(recommender._extend_with_top_popular(recommendations=[], N=5))
        #temp_list.append(i)
    #result.loc[result['user_id']==i, 'als_recommendations'] = res
    #result[result['user_id']==i]['als_recommendations'] = res[i]

result['own_recommendations'] = temp_list
result

Unnamed: 0,user_id,actual,als,own_recommendations
0,1,"[821867, 834484, 856942, 865456, 889248, 90795...","[923723, 1137176, 873203, 1054088, 951197]","[983897, 1075074, 1089023, 9655212, 977545]"
1,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[1084551, 940278, 835431, 13381584, 1085778]","[929472, 904435, 933248, 10456568, 857697]"
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107...","[1125382, 9837501, 852182, 1054088, 1115576]","[913671, 863447, 987234, 988439, 862866]"
3,7,"[840386, 889774, 898068, 909714, 929067, 95347...","[871680, 1075074, 1050583, 833458, 1001277]","[6602729, 877337, 1122358, 886965, 1001277]"
4,8,"[835098, 872137, 910439, 924610, 992977, 10412...","[829950, 1089023, 5567876, 1093329, 964221]","[904148, 1093329, 1069334, 932340, 837579]"
...,...,...,...,...
2037,2496,[6534178],"[998239, 949991, 1131374, 12262992, 978354]","[995645, 5591083, 998239, 7441210, 1020823]"
2038,2497,"[1016709, 9835695, 1132298, 16809501, 845294, ...","[1054088, 998239, 1038497, 10149863, 1082185]","[998239, 942817, 961747, 1057855, 907945]"
2039,2498,"[15716530, 834484, 901776, 914190, 958382, 972...","[854496, 878128, 923723, 1011841, 963226]","[849257, 12130522, 1093329, 997128, 953677]"
2040,2499,"[867188, 877580, 902396, 914190, 951590, 95813...","[1005456, 1107824, 991948, 1131374, 12132164]","[885447, 924804, 5570048, 1057855, 833458]"


In [599]:
recommender.fit_2_level(item_features_st2=item_features, user_features_st2=user_features)

0.5852728172858677


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [600]:
stg_2_ = recommender.predict_2_level(data_test['user_id'].unique(),item_features_st2=item_features, user_features_st2=user_features, N=5)

In [601]:
stg_2_

{1: [971053, 9858742, 991024, 9832160, 911454],
 3: [10456568, 5564906, 933248, 882830, 1115228],
 6: [862866, 1132911, 911409, 1058404, 821083],
 7: [871513, 931757, 13040302, 1072483, 821556],
 8: [935393, 7139529, 872342, 932340, 1069334],
 9: [6534030, 882190, 5591103, 889731, 5568197],
 13: [1006414, 1069334, 826571, 12188584, 1001266],
 14: [857130, 978354, 861419, 1001266, 995311],
 15: [9881593, 1088414, 1103752, 845078, 13417590],
 16: [1127831, 860776, 1100533, 923746, 1098066],
 17: [948670, 1068832, 963971, 843450, 997128],
 18: [1110632, 870929, 909761, 1085696, 961620],
 19: [1082172, 1046587, 936723, 877337, 1043751],
 20: [953675, 884686, 821556, 8293447, 1055425],
 22: [917381, 991874, 821970, 879045, 968992],
 23: [1138858, 959737, 1091520, 934399, 1139786],
 24: [1082172, 1041453, 5587656, 9803601, 1098927],
 25: [15452812, 890955, 15926927, 929472, 867648],
 26: [823704, 906838, 5591154, 965543, 930666],
 27: [5572828, 971852, 1098910, 951071, 868745],
 28: [992730,

In [585]:
temp = pd.DataFrame(columns=['user_id', 'item_id'])
temp['user_id'] = stg_2_.keys()
temp['item_id'] = stg_2_.values()
temp

Unnamed: 0,user_id,item_id
0,1,"[1075074, 934369, 983897, 977545, 1137176]"
1,3,"[920844, 904435, 929472, 933248, 920978]"
2,6,"[1084036, 913671, 1098844, 821083, 1132968]"
3,7,"[877337, 6602729, 886965, 1122358, 1074172]"
4,8,"[985579, 823565, 1069334, 935393, 7139529]"
...,...,...
2031,2496,"[865196, 1082544, 998239, 5580577, 1020823]"
2032,2497,"[944852, 908181, 998239, 961747, 1113274]"
2033,2498,"[12130522, 997128, 987044, 1077745, 5568447]"
2034,2499,"[5570048, 924804, 953675, 15741861, 6463949]"


In [594]:
result['2stg_own'] = 1
temp_list = []
for i in result['user_id'].values:#range(len(result)):
    try:
        temp_list.append(stg_2_[i])
    except:
        #temp_list.append(recommender.get_similar_users_recommendation(user=i, N=5))
        temp_list.append(recommender._extend_with_top_popular(recommendations=[], N=5))
        #temp_list.append(i)
    #result.loc[result['user_id']==i, 'als_recommendations'] = res
    #result[result['user_id']==i]['als_recommendations'] = res[i]

result['2stg_own'] = temp_list
result

Unnamed: 0,user_id,actual,als,own_recommendations,2stg_own,top_pop
0,1,"[821867, 834484, 856942, 865456, 889248, 90795...","[1137176, 942817, 951197, 871680, 923723]","[983897, 1075074, 1089023, 9655212, 977545]","[1082185, 6534178, 1029743, 995242, 1106523]","[1082185, 6534178, 1029743, 995242, 1106523]"
1,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[835431, 872342, 976047, 1089023, 1084551]","[929472, 904435, 933248, 10456568, 857697]","[929472, 904435, 933248, 10456568, 1082185]","[1082185, 6534178, 1029743, 995242, 1106523]"
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107...","[1131351, 1125382, 1115576, 1019881, 9837501]","[913671, 863447, 987234, 988439, 862866]","[1082185, 6534178, 1029743, 995242, 1106523]","[1082185, 6534178, 1029743, 995242, 1106523]"
3,7,"[840386, 889774, 898068, 909714, 929067, 95347...","[833458, 871680, 1075074, 1109192, 1084551]","[6602729, 877337, 1122358, 886965, 1001277]","[1082185, 6534178, 1029743, 995242, 1106523]","[1082185, 6534178, 1029743, 995242, 1106523]"
4,8,"[835098, 872137, 910439, 924610, 992977, 10412...","[829950, 1084551, 5567876, 9524291, 12262992]","[904148, 1093329, 1069334, 932340, 837579]","[1082185, 6534178, 1029743, 995242, 1106523]","[1082185, 6534178, 1029743, 995242, 1106523]"
...,...,...,...,...,...,...
2037,2496,[6534178],"[998239, 1093329, 1131374, 9362429, 1113274]","[995645, 5591083, 998239, 7441210, 1020823]","[1082185, 6534178, 1029743, 995242, 1106523]","[1082185, 6534178, 1029743, 995242, 1106523]"
2038,2497,"[1016709, 9835695, 1132298, 16809501, 845294, ...","[1054088, 998239, 10149863, 7441210, 1082185]","[998239, 942817, 961747, 1057855, 907945]","[1082185, 6534178, 1029743, 995242, 1106523]","[1082185, 6534178, 1029743, 995242, 1106523]"
2039,2498,"[15716530, 834484, 901776, 914190, 958382, 972...","[854496, 909349, 878128, 1082185, 6534178]","[849257, 12130522, 1093329, 997128, 953677]","[1082185, 6534178, 1029743, 995242, 1106523]","[1082185, 6534178, 1029743, 995242, 1106523]"
2040,2499,"[867188, 877580, 902396, 914190, 951590, 95813...","[1107824, 1080354, 1131374, 857130, 934676]","[885447, 924804, 5570048, 1057855, 833458]","[1082185, 6534178, 1029743, 995242, 1106523]","[1082185, 6534178, 1029743, 995242, 1106523]"


In [488]:
result['top_pop'] = 1
temp_list = []
for i in result['user_id'].values:#range(len(result)):


    temp_list.append(recommender._extend_with_top_popular(recommendations=[], N=5))

result['top_pop'] = temp_list
result

Unnamed: 0,user_id,actual,als,own_recommendations,2stg_own,top_pop
0,1,"[821867, 834484, 856942, 865456, 889248, 90795...","[923723, 1137176, 873203, 1054088, 951197]","[983897, 1075074, 1089023, 9655212, 977545]","[855488, 831509, 9655212, 983897, 1061203]","[1082185, 6534178, 1029743, 995242, 1106523]"
1,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[1084551, 940278, 835431, 13381584, 1085778]","[929472, 904435, 933248, 10456568, 857697]","[1076161, 10456568, 7168417, 898342, 882830]","[1082185, 6534178, 1029743, 995242, 1106523]"
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107...","[1125382, 9837501, 852182, 1054088, 1115576]","[913671, 863447, 987234, 988439, 862866]","[878715, 1108094, 10456152, 9859112, 1138854]","[1082185, 6534178, 1029743, 995242, 1106523]"
3,7,"[840386, 889774, 898068, 909714, 929067, 95347...","[871680, 1075074, 1050583, 833458, 1001277]","[6602729, 877337, 1122358, 886965, 1001277]","[898342, 1024032, 842707, 1072953, 5591154]","[1082185, 6534178, 1029743, 995242, 1106523]"
4,8,"[835098, 872137, 910439, 924610, 992977, 10412...","[829950, 1089023, 5567876, 1093329, 964221]","[904148, 1093329, 1069334, 932340, 837579]","[983897, 942817, 1076306, 829722, 887618]","[1082185, 6534178, 1029743, 995242, 1106523]"
...,...,...,...,...,...,...
2037,2496,[6534178],"[998239, 949991, 1131374, 12262992, 978354]","[995645, 5591083, 998239, 7441210, 1020823]","[831509, 1082544, 971053, 1054088, 900418]","[1082185, 6534178, 1029743, 995242, 1106523]"
2038,2497,"[1016709, 9835695, 1132298, 16809501, 845294, ...","[1054088, 998239, 1038497, 10149863, 1082185]","[998239, 942817, 961747, 1057855, 907945]","[1082544, 950935, 849202, 942817, 849257]","[1082185, 6534178, 1029743, 995242, 1106523]"
2039,2498,"[15716530, 834484, 901776, 914190, 958382, 972...","[854496, 878128, 923723, 1011841, 963226]","[849257, 12130522, 1093329, 997128, 953677]","[7168417, 898342, 1013531, 838097, 10456152]","[1082185, 6534178, 1029743, 995242, 1106523]"
2040,2499,"[867188, 877580, 902396, 914190, 951590, 95813...","[1005456, 1107824, 991948, 1131374, 12132164]","[885447, 924804, 5570048, 1057855, 833458]","[833458, 842707, 825006, 971949, 919902]","[1082185, 6534178, 1029743, 995242, 1106523]"


In [489]:
def precision_at_k(recommended_list, bought_list, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    

    
    if k < len(recommended_list):
        recommended_list = recommended_list[:k]

    flags = np.isin(bought_list, recommended_list)
    precision = flags.sum() / len(recommended_list)

    return precision

In [490]:
def recall_at_k(recommended_list, bought_list, k=5):

    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)

    if k < len(recommended_list):
        recommended_list = recommended_list[:k]

    flags = np.isin(bought_list, recommended_list)
    recall = flags.sum() / len(bought_list)

    return recall

In [501]:
dict_loss = {}

In [602]:
dict_loss['own_recommendations'] = {'recall_at_k':result.apply(lambda row: recall_at_k(row['own_recommendations'], row['actual']), axis=1).mean(),
                                   'precision_at_k':result.apply(lambda row: precision_at_k(row['own_recommendations'], row['actual']), axis=1).mean()}
dict_loss['als'] = {'recall_at_k':result.apply(lambda row: recall_at_k(row['als'], row['actual']), axis=1).mean(),
                                   'precision_at_k':result.apply(lambda row: precision_at_k(row['als'], row['actual']), axis=1).mean()}
dict_loss['2stg_own'] = {'recall_at_k':result.apply(lambda row: recall_at_k(row['2stg_own'], row['actual']), axis=1).mean(),
                                   'precision_at_k':result.apply(lambda row: precision_at_k(row['2stg_own'], row['actual']), axis=1).mean()}
dict_loss['top_pop'] = {'recall_at_k':result.apply(lambda row: recall_at_k(row['top_pop'], row['actual']), axis=1).mean(),
                                   'precision_at_k':result.apply(lambda row: precision_at_k(row['top_pop'], row['actual']), axis=1).mean()}
#dict_loss['2stg_als'] = {'recall_at_k':result.apply(lambda row: recall_at_k(row['2stg_als'], row['actual']), axis=1).mean(),
#                                   'precision_at_k':result.apply(lambda row: precision_at_k(row['2stg_als'], row['actual']), axis=1).mean()}
#dict_loss['2stg_top'] = {'recall_at_k':result.apply(lambda row: recall_at_k(row['2stg_top'], row['actual']), axis=1).mean(),
#                                   'precision_at_k':result.apply(lambda row: precision_at_k(row['2stg_top'], row['actual']), axis=1).mean()}

In [603]:
dict_loss 

{'own_recommendations': {'recall_at_k': 0.01087187022329594,
  'precision_at_k': 0.09764936336924537},
 'als': {'recall_at_k': 0.004926521669930625,
  'precision_at_k': 0.04054848188050958},
 '2stg_own': {'recall_at_k': 0.029240860652367258,
  'precision_at_k': 0.20088148873653008},
 'top_pop': {'recall_at_k': 0.030878660673682164,
  'precision_at_k': 0.2055827619980383}}

In [52]:
n_items_before = data_train_full['item_id'].nunique()

data_train = prefilter_items(data_train_full, item_features=item_features, take_n_popular=10000)

n_items_after = data_train['item_id'].nunique()
print('Decreased # items from {} to {}'.format(n_items_before, n_items_after))
data_train.head(5)

Decreased # items from 86865 to 6288


Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc,price
7,2375,26984851516,1,1085983,1,2.99,364,-0.4,1642,1,0.0,0.0,2.99
11,1364,26984896261,1,842930,1,2.19,31742,0.0,1520,1,0.0,0.0,2.19
14,1364,26984896261,1,937406,1,2.5,31742,-0.99,1520,1,0.0,0.0,2.5
35,1172,26985025264,1,1000493,1,4.44,396,-0.89,946,1,0.0,0.0,4.44
39,1172,26985025264,1,1075214,1,5.99,396,-3.0,946,1,0.0,0.0,5.99


In [53]:
user_item_matrix = pd.pivot_table(data_train, 
                                  index='user_id', columns='item_id', 
                                  values='quantity', # Можно пробоват ьдругие варианты
                                  aggfunc='count', 
                                  fill_value=0
                                 )

user_item_matrix = user_item_matrix.astype(float) # необходимый тип матрицы для implicit
user_item_matrix.head(2)

item_id,818981,818996,819330,819400,819487,819590,819849,819927,819978,819982,...,15926928,15972074,15972298,15972790,16053142,16100266,16729296,16729299,16729363,17104444
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
userids = user_item_matrix.index.values
itemids = user_item_matrix.columns.values

matrix_userids = np.arange(len(userids))
matrix_itemids = np.arange(len(itemids))

id_to_itemid = dict(zip(matrix_itemids, itemids))
id_to_userid = dict(zip(matrix_userids, userids))

itemid_to_id = dict(zip(itemids, matrix_itemids))
userid_to_id = dict(zip(userids, matrix_userids))

In [55]:
user_feat = pd.DataFrame(user_item_matrix.index)
user_feat = user_feat.merge(user_features, on='user_id', how='left')
user_feat.set_index('user_id', inplace=True)

item_feat = pd.DataFrame(user_item_matrix.columns)
item_feat = item_feat.merge(item_features, on='item_id', how='left')
item_feat.set_index('item_id', inplace=True)

user_feat.head(2)

Unnamed: 0_level_0,age_desc,marital_status_code,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,65+,A,35-49K,Homeowner,2 Adults No Kids,2.0,None/Unknown
2,,,,,,,


In [56]:
from lightfm import LightFM
from scipy.sparse import csr_matrix, coo_matrix

In [57]:
sparse_user_item = csr_matrix(user_item_matrix).tocsr()

In [58]:
user_feat_lightfm = pd.get_dummies(user_feat, columns=user_feat.columns.tolist())
item_feat_lightfm = pd.get_dummies(item_feat, columns=item_feat.columns.tolist())

In [62]:
model = LightFM(no_components=100,
                loss='bpr', # 'warp'
                learning_rate=0.05, 
                item_alpha=0.1, user_alpha=0.1, 
                random_state=42)

model.fit((sparse_user_item > 0) * 1,  # user-item matrix из 0 и 1
          sample_weight=coo_matrix(user_item_matrix),
          user_features=csr_matrix(user_feat_lightfm.values).tocsr(),
          item_features=csr_matrix(item_feat_lightfm.values).tocsr(),
          epochs=15, 
          num_threads=4) 

<lightfm.lightfm.LightFM at 0x186f5f5fdc0>

In [63]:
from lightfm.evaluation import precision_at_k as precision_at_k_lightfm

In [64]:
train_precision = precision_at_k_lightfm(model, sparse_user_item, 
                                 user_features=csr_matrix(user_feat_lightfm.values).tocsr(),
                                 item_features=csr_matrix(item_feat_lightfm.values).tocsr(),
                                 k=5).mean()

train_precision

0.012140835

In [38]:
test_item_ids = np.array(range(len(data_train['item_id'].unique())))

In [39]:
len(data_train['user_id'].unique())

2469

In [40]:
csr_matrix(user_feat_lightfm.values).tocsr().shape

(2469, 41)

In [41]:
csr_matrix(item_feat_lightfm.values).tocsr().shape

(2400, 1547)

In [42]:
len(test_item_ids)

2400

In [43]:
temp_list = []
for i in data_train['user_id'].unique():

        predictions = model.predict(user_ids=int(userid_to_id[i]), item_ids=test_item_ids,
                                user_features=csr_matrix(user_feat_lightfm.values).tocsr(),
                                item_features=csr_matrix(item_feat_lightfm.values).tocsr(),
                                num_threads=4)
        num_largest = 5
        indices = (-predictions).argpartition(num_largest, axis=None)[:num_largest]
        
        temp_list.append(data_train['item_id'].unique()[indices])
        print(i)
        print(predictions)
        
temp_list

2375
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1364
[-131.83076 -131.8343  -131.8343  ... -131.84618 -131.81294 -131.91458]
1130
[-62.806763 -62.810307 -62.810307 ... -62.822174 -62.788944 -62.89058 ]
98
[-93.32231 -93.32585 -93.32585 ... -93.33772 -93.30449 -93.40613]
1172
[-109.56726 -109.5708  -109.5708  ... -109.58267 -109.54944 -109.65108]
1060
[-118.5735  -118.57704 -118.57704 ... -118.58891 -118.55568 -118.65732]
212
[-128.58357 -128.58711 -128.58711 ... -128.59898 -128.56575 -128.66739]
2052
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
718
[-121.74857  -121.75212  -121.75212  ... -121.763985 -121.73076
 -121.83239 ]
240
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2305
[-156.8387  -156.84224 -156.84224 ... -156.85411 -156.82088 -156.92252]
1916
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
271
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
255
[4.920853  4.917309  4.917309  

365
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2102
[-164.01396 -164.0175  -164.0175  ... -164.02937 -163.99614 -164.09778]
1689
[-136.34676 -136.3503  -136.3503  ... -136.36217 -136.32893 -136.43057]
1380
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1107
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1083
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1432
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
454
[-119.15524  -119.15878  -119.15878  ... -119.170654 -119.13742
 -119.23906 ]
2247
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2448
[-189.65913 -189.66267 -189.66267 ... -189.67455 -189.64131 -189.74295]
451
[-108.24627  -108.24982  -108.24982  ... -108.26168  -108.228455
 -108.330086]
2104
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1184
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2257
[4.920853  4.917309  4.917309  .

773
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2050
[-117.6694   -117.67294  -117.67294  ... -117.684814 -117.65158
 -117.75322 ]
1769
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1353
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1492
[-166.63644 -166.63998 -166.63998 ... -166.65186 -166.61862 -166.72026]
2002
[-166.96538 -166.96892 -166.96892 ... -166.98079 -166.94756 -167.0492 ]
1309
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2048
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
273
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1856
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2331
[-122.17545  -122.179    -122.179    ... -122.190865 -122.15764
 -122.25927 ]
1760
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
655
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1263
[-101.9654   -101.96895  -101.96895  ..

1355
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
178
[-101.49565 -101.49919 -101.49919 ... -101.51106 -101.47783 -101.57947]
669
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1642
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
874
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1971
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1771
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
368
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
22
[-183.84444 -183.84798 -183.84798 ... -183.85985 -183.82661 -183.92825]
1873
[-124.532394 -124.535934 -124.535934 ... -124.547806 -124.51457
 -124.61621 ]
1297
[-160.61989 -160.62343 -160.62343 ... -160.6353  -160.60207 -160.7037 ]
420
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
251
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1485
[-166.30124 -166.30478 -166.30478 ... -166.31665 -

1300
[-125.576904 -125.580444 -125.580444 ... -125.592316 -125.55908
 -125.66072 ]
401
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1868
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1932
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1320
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1928
[-103.859146 -103.862686 -103.862686 ... -103.87456  -103.841324
 -103.94296 ]
2001
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
484
[-135.23221 -135.23575 -135.23575 ... -135.24762 -135.21439 -135.31602]
476
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1111
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
963
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
193
[-183.84444 -183.84798 -183.84798 ... -183.85985 -183.82661 -183.92825]
2186
[-128.29236 -128.2959  -128.2959  ... -128.30777 -128.27454 -128.37617]
309
[-162.97218 -162.97572 -162.97572 .

2062
[-136.07927 -136.08281 -136.08281 ... -136.09468 -136.06145 -136.16309]
2220
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1613
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2309
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
115
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
386
[-98.44883  -98.45238  -98.45238  ... -98.46424  -98.431015 -98.532646]
185
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
323
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1519
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
167
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1319
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1787
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2157
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2069
[-127.4453   -127.44884  -127.44884  ... -127.46071  -127.427475


222
[-149.16423 -149.16777 -149.16777 ... -149.17964 -149.14641 -149.24805]
2452
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1467
[-73.94183  -73.94537  -73.94537  ... -73.957245 -73.92401  -74.02565 ]
470
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
307
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
607
[-127.857834 -127.861374 -127.861374 ... -127.873245 -127.84001
 -127.94165 ]
469
[-119.92271  -119.926254 -119.926254 ... -119.93812  -119.90489
 -120.00652 ]
1233
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1022
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1506
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2063
[-169.03499 -169.03853 -169.03853 ... -169.0504  -169.01717 -169.1188 ]
2416
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2321
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1015
[-170.51375 -170.51729 -170.51729 .

1206
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
586
[-69.391525 -69.395065 -69.395065 ... -69.40694  -69.3737   -69.47534 ]
2316
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
744
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2013
[-117.174286 -117.177826 -117.177826 ... -117.1897   -117.15646
 -117.2581  ]
714
[-73.62932  -73.63286  -73.63286  ... -73.64473  -73.611496 -73.713135]
1532
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
552
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1156
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1726
[-152.66917 -152.67271 -152.67271 ... -152.68459 -152.65135 -152.75299]
1034
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
894
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
535
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
943
[4.920853  4.917309  4.917309  ... 4.9054403 4.93

1315
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
598
[-111.83775  -111.84129  -111.84129  ... -111.853165 -111.81993
 -111.92157 ]
1674
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2046
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
298
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1931
[-170.51375 -170.51729 -170.51729 ... -170.52916 -170.49593 -170.59756]
1648
[-113.64551  -113.64905  -113.64905  ... -113.66092  -113.627686
 -113.729324]
2025
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1518
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1958
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
697
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
579
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1902
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1904
[4.920853  4.917309  4.917309  ... 4.9054403 

1357
[-168.0949  -168.09843 -168.09843 ... -168.1103  -168.07707 -168.17871]
1154
[-176.86302 -176.86656 -176.86656 ... -176.87843 -176.8452  -176.94684]
930
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
55
[-171.98956 -171.9931  -171.9931  ... -172.00497 -171.97174 -172.07338]
955
[-117.46551  -117.469055 -117.469055 ... -117.48092  -117.44769
 -117.549324]
1200
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
699
[-189.65913 -189.66267 -189.66267 ... -189.67455 -189.64131 -189.74295]
945
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
110
[-160.85298 -160.85652 -160.85652 ... -160.8684  -160.83516 -160.9368 ]
569
[-117.884995 -117.888535 -117.888535 ... -117.900406 -117.86717
 -117.96881 ]
1970
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
831
[-100.09088 -100.09442 -100.09442 ... -100.10629 -100.07306 -100.1747 ]
885
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
491
[-141.90965 -141.9132  

1120
[-50.27645  -50.279995 -50.279995 ... -50.291862 -50.258633 -50.360268]
1719
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1903
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
648
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
703
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1113
[-132.67412 -132.67766 -132.67766 ... -132.68953 -132.6563  -132.75793]
1062
[-164.16994 -164.17348 -164.17348 ... -164.18535 -164.15211 -164.25375]
1796
[-121.680336 -121.68388  -121.68388  ... -121.69575  -121.66252
 -121.76415 ]
1773
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1997
[-126.021866 -126.025406 -126.025406 ... -126.03728  -126.00404
 -126.10568 ]
1781
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1790
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2015
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2184
[-128.93475 -128.9383  -128.9383

1883
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
285
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
461
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
692
[-155.62373 -155.62727 -155.62727 ... -155.63914 -155.60591 -155.70755]
199
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
369
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
809
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
929
[-64.771194 -64.774734 -64.774734 ... -64.786606 -64.75337  -64.85501 ]
1169
[-166.86003 -166.86357 -166.86357 ... -166.87544 -166.84221 -166.94385]
264
[-181.37791 -181.38145 -181.38145 ... -181.39333 -181.36009 -181.46173]
1214
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
234
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1086
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
705
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.8

2155
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
295
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1244
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1991
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1664
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1250
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1311
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
873
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
160
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2272
[-167.4835  -167.48705 -167.48705 ... -167.49892 -167.46568 -167.56732]
694
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
566
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
290
[-140.58533 -140.58887 -140.58887 ... -140.60074 -140.5675  -140.66914]
1890
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]


548
[-160.84416 -160.8477  -160.8477  ... -160.85957 -160.82634 -160.92798]
37
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2467
[-122.78026  -122.7838   -122.7838   ... -122.79567  -122.762436
 -122.864075]
88
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1969
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1134
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
936
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1743
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
489
[-115.11937  -115.12291  -115.12291  ... -115.13478  -115.10155
 -115.203186]
1711
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1344
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1183
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
574
[-122.78026  -122.7838   -122.7838   ... -122.79567  -122.762436
 -122.864075]
1750
[4.920853  4.917309  4.917309  ... 4

2056
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1867
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
2286
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
995
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1346
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
1762
[-120.014435 -120.017975 -120.017975 ... -120.02985  -119.99661
 -120.09825 ]
2279
[-142.83406 -142.8376  -142.8376  ... -142.84947 -142.81624 -142.91788]
547
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
868
[-171.98956 -171.9931  -171.9931  ... -172.00497 -171.97174 -172.07338]
908
[-75.5311   -75.53464  -75.53464  ... -75.54651  -75.513275 -75.614914]
1690
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
112
[4.920853  4.917309  4.917309  ... 4.9054403 4.938671  4.837036 ]
644
[-168.42361 -168.42715 -168.42715 ... -168.43903 -168.40579 -168.50743]
2145
[4.920853  4.917309  4.917309  ... 4.90544

KeyboardInterrupt: 

In [46]:
predictions = model.predict(user_ids=int(8), item_ids=test_item_ids,
                                user_features=csr_matrix(user_feat_lightfm.values).tocsr(),
                                item_features=csr_matrix(item_feat_lightfm.values).tocsr(),
                                num_threads=4)
num_largest = 5
indices = (-predictions).argpartition(num_largest, axis=None)[:num_largest]
        
data_train['item_id'].unique()[indices]

array([1118757, 1000806, 1106301,  844991, 1130111], dtype=int64)

In [44]:
result['LightFM'] = 1
temp_list = []
for i in result['user_id'].values:
    try:
        predictions = model.predict(user_ids=int(i), item_ids=test_item_ids,
                                user_features=csr_matrix(user_feat_lightfm.values).tocsr(),
                                item_features=csr_matrix(item_feat_lightfm.values).tocsr(),
                                num_threads=4)
        num_largest = 5
        indices = (-predictions).argpartition(num_largest, axis=None)[:num_largest]
        
        temp_list.append(data_train['item_id'].unique()[indices])
    except:
        #temp_list.append(recommender.get_similar_users_recommendation(user=i, N=5))
        temp_list.append(recommender._extend_with_top_popular(recommendations=[], N=5))
        #temp_list.append(i)
    #result.loc[result['user_id']==i, 'als_recommendations'] = res
    #result[result['user_id']==i]['als_recommendations'] = res[i]

result['LightFM'] = temp_list
result

Unnamed: 0,user_id,actual,als,own_recommendations,2stg_own,top_pop,2stg_als,2stg_top,LightFM
0,1,"[821867, 834484, 856942, 865456, 889248, 90795...","[977867, 1089023, 7025275, 1040346, 8090546]","[1089023, 878285, 991024, 5577022, 856942]","[1053282, 1041796, 940947, 1082185, 6534178]","[1082185, 6534178, 1029743, 995242, 1106523]","[839818, 856942, 1118787, 1082185, 6534178]","[1082185, 6533765, 5569471, 1070015, 953476]","[1118757, 1000806, 1106301, 844991, 1130111]"
1,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[1048383, 1060363, 12262830, 927360, 918368]","[1075979, 904435, 1136486, 1020210, 933248]","[1053690, 965138, 847573, 937791, 1082185]","[1082185, 6534178, 1029743, 995242, 1106523]","[1043730, 1000707, 15831255, 831557, 10344586]","[1053690, 962229, 1082185, 6534178, 1029743]","[1118757, 1000806, 1106301, 844991, 1130111]"
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107...","[1131488, 1084036, 985866, 12132648, 1098844]","[863447, 862866, 996269, 12384953, 1098844]","[863447, 8203834, 871611, 1082185, 6534178]","[1082185, 6534178, 1029743, 995242, 1106523]","[863447, 1082185, 6534178, 1029743, 995242]","[962229, 1053690, 916122, 6533765, 909894]","[1118757, 1000806, 1130111, 844991, 1106301]"
3,7,"[840386, 889774, 898068, 909714, 929067, 95347...","[849297, 908145, 1081710, 909811, 918368]","[1056973, 881249, 1139835, 1127232, 1132198]","[5568845, 9836460, 1122358, 1082185, 6534178]","[1082185, 6534178, 1029743, 995242, 1106523]","[968936, 1137775, 961620, 1130777, 865178]","[859075, 844179, 1068719, 1096036, 1101010]","[1118757, 1000806, 1106301, 844991, 1130111]"
4,8,"[835098, 872137, 910439, 924610, 992977, 10412...","[1048383, 10122036, 1096099, 821845, 1082185]","[5567231, 932340, 987628, 1103902, 878636]","[981086, 837579, 5577022, 969932, 1082185]","[1082185, 6534178, 1029743, 995242, 1106523]","[855557, 981086, 942817, 1082185, 6534178]","[893018, 862349, 962229, 1053690, 1082185]","[1118757, 1000806, 1106301, 844991, 1130111]"
...,...,...,...,...,...,...,...,...,...
2037,2496,[6534178],"[9677949, 5592094, 957013, 1060529, 995876]","[937301, 865196, 5591083, 996269, 7441210]","[828106, 8203782, 1062966, 5591083, 1082185]","[1082185, 6534178, 1029743, 995242, 1106523]","[995876, 7441210, 865178, 1082185, 6534178]","[995965, 844179, 859075, 1082185, 6534178]","[1082185, 6534178, 1029743, 995242, 1106523]"
2038,2497,"[1016709, 9835695, 1132298, 16809501, 845294, ...","[8290587, 1112598, 10122036, 868683, 1082185]","[1119089, 944852, 977867, 942817, 907945]","[942817, 998556, 831063, 912914, 12731517]","[1082185, 6534178, 1029743, 995242, 1106523]","[849202, 5570048, 844179, 1082185, 6534178]","[844179, 893018, 859075, 1053690, 1082185]","[1082185, 6534178, 1029743, 995242, 1106523]"
2039,2498,"[15716530, 834484, 901776, 914190, 958382, 972...","[854754, 10122036, 909396, 1082185, 6534178]","[1121603, 1100379, 1093329, 953677, 878636]","[846550, 1130029, 12132648, 821083, 945998]","[1082185, 6534178, 1029743, 995242, 1106523]","[1107039, 991575, 844179, 1100379, 1082185]","[987562, 1053690, 9526410, 844179, 859075]","[1082185, 6534178, 1029743, 995242, 1106523]"
2040,2499,"[867188, 877580, 902396, 914190, 951590, 95813...","[5570408, 6773196, 982799, 852486, 1082185]","[924804, 962991, 5567231, 5570048, 858302]","[7025275, 985911, 888835, 9337729, 1082185]","[1082185, 6534178, 1029743, 995242, 1106523]","[844179, 5570048, 1013641, 880150, 1082185]","[995965, 844179, 1053690, 893018, 1082185]","[1082185, 6534178, 1029743, 995242, 1106523]"


In [570]:
pd.DataFrame.from_dict(dict_loss)

Unnamed: 0,own_recommendations,als,2stg_own,top_pop
recall_at_k,0.010872,0.004927,0.006921,0.030879
precision_at_k,0.097649,0.040548,0.060137,0.205583


In [None]:
крайне странные результаты.

-----